// Copyright 2011 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Author: jdtang@google.com (Jonathan Tang) // // GUnit char_ref tests. These are quick smoke tests, mostly to identify // crashing bugs so that they can be fixed without having to debug // multi-language tests. As such, they focus on coverage rather than // completeness. For testing the full spec, use char_ref_py_tests, which share // their testdata with the Python html5lib library. #include "char_ref.h" #include #include #include "gtest/gtest.h" #include "test_utils.h" #include "utf8.h" namespace { class CharRefTest : public GumboTest { protected: bool ConsumeCharRef(const char* input) { return ConsumeCharRef(input, ' ', false); } bool ConsumeCharRef( const char* input, int additional_allowed_char, bool is_in_attribute) { text_ = input; utf8iterator_init(&parser_, input, strlen(input), &iter_); bool result = consume_char_ref( &parser_, &iter_, additional_allowed_char, is_in_attribute, &output_); fflush(stdout); return result; } Utf8Iterator iter_; OneOrTwoCodepoints output_; }; TEST_F(CharRefTest, Whitespace) { EXPECT_TRUE(ConsumeCharRef("  ")); EXPECT_EQ(kGumboNoChar, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NumericHex) { EXPECT_TRUE(ConsumeCharRef("ካ")); EXPECT_EQ(0x12ab, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NumericDecimal) { EXPECT_TRUE(ConsumeCharRef("Ӓ")); EXPECT_EQ(1234, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NumericInvalidDigit) { errors_are_expected_ = true; EXPECT_FALSE(ConsumeCharRef("&#google")); EXPECT_EQ(kGumboNoChar, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); EXPECT_EQ('&', utf8iterator_current(&iter_)); } TEST_F(CharRefTest, NumericNoSemicolon) { errors_are_expected_ = true; EXPECT_FALSE(ConsumeCharRef("Ӓgoogle")); EXPECT_EQ(1234, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); EXPECT_EQ('g', utf8iterator_current(&iter_)); } TEST_F(CharRefTest, NumericReplacement) { errors_are_expected_ = true; EXPECT_FALSE(ConsumeCharRef("‚")); // Low quotation mark character. EXPECT_EQ(0x201A, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NumericInvalid) { errors_are_expected_ = true; EXPECT_FALSE(ConsumeCharRef("�")); EXPECT_EQ(0xFFFD, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NumericUtfInvalid) { errors_are_expected_ = true; EXPECT_FALSE(ConsumeCharRef("")); EXPECT_EQ(0x7, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NamedReplacement) { EXPECT_TRUE(ConsumeCharRef("<")); EXPECT_EQ('<', output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NamedReplacementNoSemicolon) { errors_are_expected_ = true; EXPECT_FALSE(ConsumeCharRef(">")); EXPECT_EQ('>', output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NamedReplacementWithInvalidUtf8) { errors_are_expected_ = true; EXPECT_TRUE(ConsumeCharRef("&\xc3\xa5")); EXPECT_EQ(kGumboNoChar, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); } TEST_F(CharRefTest, NamedReplacementInvalid) { errors_are_expected_ = true; EXPECT_FALSE(ConsumeCharRef("&google;")); EXPECT_EQ(kGumboNoChar, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); EXPECT_EQ('&', utf8iterator_current(&iter_)); } // TEST_F(CharRefTest, NamedReplacementInvalidNoSemicolon) { // EXPECT_FALSE(ConsumeCharRef("&google")); // EXPECT_EQ(kGumboNoChar, output_.first); // EXPECT_EQ(kGumboNoChar, output_.second); // EXPECT_EQ('&', utf8iterator_current(&iter_)); //} TEST_F(CharRefTest, AdditionalAllowedChar) { EXPECT_TRUE(ConsumeCharRef("&\"", '"', false)); EXPECT_EQ(kGumboNoChar, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); EXPECT_EQ('&', utf8iterator_current(&iter_)); } TEST_F(CharRefTest, InAttribute) { EXPECT_TRUE(ConsumeCharRef("¬ed", ' ', true)); EXPECT_EQ(kGumboNoChar, output_.first); EXPECT_EQ(kGumboNoChar, output_.second); EXPECT_EQ('&', utf8iterator_current(&iter_)); } TEST_F(CharRefTest, MultiChars) { EXPECT_TRUE(ConsumeCharRef("⋵̸")); EXPECT_EQ(0x22F5, output_.first); EXPECT_EQ(0x0338, output_.second); } TEST_F(CharRefTest, CharAfter) { EXPECT_TRUE(ConsumeCharRef("<x")); EXPECT_EQ('<', output_.first); EXPECT_EQ(kGumboNoChar, output_.second); EXPECT_EQ('x', utf8iterator_current(&iter_)); } } // namespace