| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- // Copyright 2011 Google Inc. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- //
- // Author: jdtang@google.com (Jonathan Tang)
- //
- // GUnit char_ref tests. These are quick smoke tests, mostly to identify
- // crashing bugs so that they can be fixed without having to debug
- // multi-language tests. As such, they focus on coverage rather than
- // completeness. For testing the full spec, use char_ref_py_tests, which share
- // their testdata with the Python html5lib library.
- #include "char_ref.h"
- #include <stdio.h>
- #include <string.h>
- #include "gtest/gtest.h"
- #include "test_utils.h"
- #include "utf8.h"
- namespace {
- class CharRefTest : public GumboTest {
- protected:
- bool ConsumeCharRef(const char* input) {
- return ConsumeCharRef(input, ' ', false);
- }
- bool ConsumeCharRef(
- const char* input, int additional_allowed_char, bool is_in_attribute) {
- text_ = input;
- utf8iterator_init(&parser_, input, strlen(input), &iter_);
- bool result = consume_char_ref(
- &parser_, &iter_, additional_allowed_char, is_in_attribute, &output_);
- fflush(stdout);
- return result;
- }
- Utf8Iterator iter_;
- OneOrTwoCodepoints output_;
- };
- TEST_F(CharRefTest, Whitespace) {
- EXPECT_TRUE(ConsumeCharRef(" "));
- EXPECT_EQ(kGumboNoChar, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NumericHex) {
- EXPECT_TRUE(ConsumeCharRef("ካ"));
- EXPECT_EQ(0x12ab, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NumericDecimal) {
- EXPECT_TRUE(ConsumeCharRef("Ӓ"));
- EXPECT_EQ(1234, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NumericInvalidDigit) {
- errors_are_expected_ = true;
- EXPECT_FALSE(ConsumeCharRef("&#google"));
- EXPECT_EQ(kGumboNoChar, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- EXPECT_EQ('&', utf8iterator_current(&iter_));
- }
- TEST_F(CharRefTest, NumericNoSemicolon) {
- errors_are_expected_ = true;
- EXPECT_FALSE(ConsumeCharRef("Ӓgoogle"));
- EXPECT_EQ(1234, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- EXPECT_EQ('g', utf8iterator_current(&iter_));
- }
- TEST_F(CharRefTest, NumericReplacement) {
- errors_are_expected_ = true;
- EXPECT_FALSE(ConsumeCharRef("‚"));
- // Low quotation mark character.
- EXPECT_EQ(0x201A, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NumericInvalid) {
- errors_are_expected_ = true;
- EXPECT_FALSE(ConsumeCharRef("�"));
- EXPECT_EQ(0xFFFD, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NumericUtfInvalid) {
- errors_are_expected_ = true;
- EXPECT_FALSE(ConsumeCharRef(""));
- EXPECT_EQ(0x7, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NamedReplacement) {
- EXPECT_TRUE(ConsumeCharRef("<"));
- EXPECT_EQ('<', output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NamedReplacementNoSemicolon) {
- errors_are_expected_ = true;
- EXPECT_FALSE(ConsumeCharRef(">"));
- EXPECT_EQ('>', output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NamedReplacementWithInvalidUtf8) {
- errors_are_expected_ = true;
- EXPECT_TRUE(ConsumeCharRef("&\xc3\xa5"));
- EXPECT_EQ(kGumboNoChar, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- }
- TEST_F(CharRefTest, NamedReplacementInvalid) {
- errors_are_expected_ = true;
- EXPECT_FALSE(ConsumeCharRef("&google;"));
- EXPECT_EQ(kGumboNoChar, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- EXPECT_EQ('&', utf8iterator_current(&iter_));
- }
- // TEST_F(CharRefTest, NamedReplacementInvalidNoSemicolon) {
- // EXPECT_FALSE(ConsumeCharRef("&google"));
- // EXPECT_EQ(kGumboNoChar, output_.first);
- // EXPECT_EQ(kGumboNoChar, output_.second);
- // EXPECT_EQ('&', utf8iterator_current(&iter_));
- //}
- TEST_F(CharRefTest, AdditionalAllowedChar) {
- EXPECT_TRUE(ConsumeCharRef("&\"", '"', false));
- EXPECT_EQ(kGumboNoChar, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- EXPECT_EQ('&', utf8iterator_current(&iter_));
- }
- TEST_F(CharRefTest, InAttribute) {
- EXPECT_TRUE(ConsumeCharRef("¬ed", ' ', true));
- EXPECT_EQ(kGumboNoChar, output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- EXPECT_EQ('&', utf8iterator_current(&iter_));
- }
- TEST_F(CharRefTest, MultiChars) {
- EXPECT_TRUE(ConsumeCharRef("⋵̸"));
- EXPECT_EQ(0x22F5, output_.first);
- EXPECT_EQ(0x0338, output_.second);
- }
- TEST_F(CharRefTest, CharAfter) {
- EXPECT_TRUE(ConsumeCharRef("<x"));
- EXPECT_EQ('<', output_.first);
- EXPECT_EQ(kGumboNoChar, output_.second);
- EXPECT_EQ('x', utf8iterator_current(&iter_));
- }
- } // namespace
|