sfreundel
/
SharpMuPDF


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
							// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: jdtang@google.com (Jonathan Tang)
//
// GUnit char_ref tests.  These are quick smoke tests, mostly to identify
// crashing bugs so that they can be fixed without having to debug
// multi-language tests.  As such, they focus on coverage rather than
// completeness.  For testing the full spec, use char_ref_py_tests, which share
// their testdata with the Python html5lib library.

#include "char_ref.h"

#include <stdio.h>
#include <string.h>

#include "gtest/gtest.h"
#include "test_utils.h"
#include "utf8.h"

namespace {

class CharRefTest : public GumboTest {
 protected:
  bool ConsumeCharRef(const char* input) {
    return ConsumeCharRef(input, ' ', false);
  }

  bool ConsumeCharRef(
      const char* input, int additional_allowed_char, bool is_in_attribute) {
    text_ = input;
    utf8iterator_init(&parser_, input, strlen(input), &iter_);
    bool result = consume_char_ref(
        &parser_, &iter_, additional_allowed_char, is_in_attribute, &output_);
    fflush(stdout);
    return result;
  }

  Utf8Iterator iter_;
  OneOrTwoCodepoints output_;
};

TEST_F(CharRefTest, Whitespace) {
  EXPECT_TRUE(ConsumeCharRef(" &nbsp;"));
  EXPECT_EQ(kGumboNoChar, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NumericHex) {
  EXPECT_TRUE(ConsumeCharRef("&#x12ab;"));
  EXPECT_EQ(0x12ab, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NumericDecimal) {
  EXPECT_TRUE(ConsumeCharRef("&#1234;"));
  EXPECT_EQ(1234, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NumericInvalidDigit) {
  errors_are_expected_ = true;
  EXPECT_FALSE(ConsumeCharRef("&#google"));
  EXPECT_EQ(kGumboNoChar, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
  EXPECT_EQ('&', utf8iterator_current(&iter_));
}

TEST_F(CharRefTest, NumericNoSemicolon) {
  errors_are_expected_ = true;
  EXPECT_FALSE(ConsumeCharRef("&#1234google"));
  EXPECT_EQ(1234, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
  EXPECT_EQ('g', utf8iterator_current(&iter_));
}

TEST_F(CharRefTest, NumericReplacement) {
  errors_are_expected_ = true;
  EXPECT_FALSE(ConsumeCharRef("&#X82"));
  // Low quotation mark character.
  EXPECT_EQ(0x201A, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NumericInvalid) {
  errors_are_expected_ = true;
  EXPECT_FALSE(ConsumeCharRef("&#xDA00"));
  EXPECT_EQ(0xFFFD, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NumericUtfInvalid) {
  errors_are_expected_ = true;
  EXPECT_FALSE(ConsumeCharRef("&#x007"));
  EXPECT_EQ(0x7, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NamedReplacement) {
  EXPECT_TRUE(ConsumeCharRef("&lt;"));
  EXPECT_EQ('<', output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NamedReplacementNoSemicolon) {
  errors_are_expected_ = true;
  EXPECT_FALSE(ConsumeCharRef("&gt"));
  EXPECT_EQ('>', output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NamedReplacementWithInvalidUtf8) {
  errors_are_expected_ = true;
  EXPECT_TRUE(ConsumeCharRef("&\xc3\xa5"));
  EXPECT_EQ(kGumboNoChar, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
}

TEST_F(CharRefTest, NamedReplacementInvalid) {
  errors_are_expected_ = true;
  EXPECT_FALSE(ConsumeCharRef("&google;"));
  EXPECT_EQ(kGumboNoChar, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
  EXPECT_EQ('&', utf8iterator_current(&iter_));
}

// TEST_F(CharRefTest, NamedReplacementInvalidNoSemicolon) {
//  EXPECT_FALSE(ConsumeCharRef("&google"));
//  EXPECT_EQ(kGumboNoChar, output_.first);
//  EXPECT_EQ(kGumboNoChar, output_.second);
//  EXPECT_EQ('&', utf8iterator_current(&iter_));
//}

TEST_F(CharRefTest, AdditionalAllowedChar) {
  EXPECT_TRUE(ConsumeCharRef("&\"", '"', false));
  EXPECT_EQ(kGumboNoChar, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
  EXPECT_EQ('&', utf8iterator_current(&iter_));
}

TEST_F(CharRefTest, InAttribute) {
  EXPECT_TRUE(ConsumeCharRef("&noted", ' ', true));
  EXPECT_EQ(kGumboNoChar, output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
  EXPECT_EQ('&', utf8iterator_current(&iter_));
}

TEST_F(CharRefTest, MultiChars) {
  EXPECT_TRUE(ConsumeCharRef("&notindot;"));
  EXPECT_EQ(0x22F5, output_.first);
  EXPECT_EQ(0x0338, output_.second);
}

TEST_F(CharRefTest, CharAfter) {
  EXPECT_TRUE(ConsumeCharRef("&lt;x"));
  EXPECT_EQ('<', output_.first);
  EXPECT_EQ(kGumboNoChar, output_.second);
  EXPECT_EQ('x', utf8iterator_current(&iter_));
}

}  // namespace