// Copyright 2011 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Author: jdtang@google.com (Jonathan Tang) #include "tokenizer.h" #include #include "gtest/gtest.h" #include "test_utils.h" extern const char* kGumboTagNames[]; namespace { // Tests for tokenizer.c class GumboTokenizerTest : public GumboTest { protected: GumboTokenizerTest() { gumbo_tokenizer_state_init(&parser_, "", 0); } virtual ~GumboTokenizerTest() { gumbo_tokenizer_state_destroy(&parser_); gumbo_token_destroy(&parser_, &token_); } void SetInput(const char* input) { text_ = input; gumbo_tokenizer_state_destroy(&parser_); gumbo_tokenizer_state_init(&parser_, input, strlen(input)); } void Advance(int num_tokens) { for (int i = 0; i < num_tokens; ++i) { EXPECT_TRUE(gumbo_lex(&parser_, &token_)); gumbo_token_destroy(&parser_, &token_); } } GumboToken token_; }; TEST(GumboTagEnumTest, TagEnumIncludesAllTags) { EXPECT_EQ(150, GUMBO_TAG_UNKNOWN); EXPECT_STREQ("", kGumboTagNames[GUMBO_TAG_UNKNOWN]); } TEST_F(GumboTokenizerTest, PartialTag) { SetInput("\n" " "); Advance(4); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); // GumboTokenStartTag* start_tag = &token_.v.start_tag; EXPECT_EQ(GUMBO_TAG_SPAN, start_tag->tag); EXPECT_EQ(2, token_.position.line); EXPECT_EQ(3, token_.position.column); ASSERT_EQ(1, start_tag->attributes.length); GumboAttribute* clas = static_cast(start_tag->attributes.data[0]); EXPECT_STREQ("class", clas->name); EXPECT_EQ("class", ToString(clas->original_name)); EXPECT_EQ(2, clas->name_start.line); EXPECT_EQ(9, clas->name_start.column); EXPECT_EQ(14, clas->name_end.column); EXPECT_STREQ("foo", clas->value); EXPECT_EQ("foo", ToString(clas->original_value)); EXPECT_EQ(15, clas->value_start.column); EXPECT_EQ(18, clas->value_end.column); } TEST_F(GumboTokenizerTest, Doctype) { SetInput(""); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type); EXPECT_EQ(0, token_.position.offset); GumboTokenDocType* doc_type = &token_.v.doc_type; EXPECT_FALSE(doc_type->force_quirks); EXPECT_FALSE(doc_type->has_public_identifier); EXPECT_FALSE(doc_type->has_system_identifier); EXPECT_STREQ("html", doc_type->name); EXPECT_STREQ("", doc_type->public_identifier); EXPECT_STREQ("", doc_type->system_identifier); } TEST_F(GumboTokenizerTest, DoctypePublic) { SetInput( ""); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type); EXPECT_EQ(0, token_.position.offset); GumboTokenDocType* doc_type = &token_.v.doc_type; EXPECT_FALSE(doc_type->force_quirks); EXPECT_TRUE(doc_type->has_public_identifier); EXPECT_TRUE(doc_type->has_system_identifier); EXPECT_STREQ("html", doc_type->name); EXPECT_STREQ( "-//W3C//DTD XHTML 1.0 Transitional//EN", doc_type->public_identifier); EXPECT_STREQ("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", doc_type->system_identifier); } TEST_F(GumboTokenizerTest, DoctypeSystem) { SetInput(""); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type); EXPECT_EQ(0, token_.position.offset); GumboTokenDocType* doc_type = &token_.v.doc_type; EXPECT_FALSE(doc_type->force_quirks); EXPECT_FALSE(doc_type->has_public_identifier); EXPECT_TRUE(doc_type->has_system_identifier); EXPECT_STREQ("root_element", doc_type->name); EXPECT_STREQ("DTD_location", doc_type->system_identifier); } TEST_F(GumboTokenizerTest, DoctypeUnterminated) { SetInput("force_quirks); EXPECT_TRUE(doc_type->has_public_identifier); EXPECT_FALSE(doc_type->has_system_identifier); EXPECT_STREQ("a", doc_type->name); EXPECT_STREQ("", doc_type->system_identifier); } TEST_F(GumboTokenizerTest, RawtextEnd) { SetInput("x ignores <tag>"); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.start_tag.tag); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_RAWTEXT); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('x', token_.v.character); gumbo_token_destroy(&parser_, &token_); Advance(9); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('<', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('t', token_.v.character); gumbo_token_destroy(&parser_, &token_); Advance(3); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.end_tag); } TEST_F(GumboTokenizerTest, RCDataEnd) { SetInput("x"); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.start_tag.tag); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_RCDATA); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('x', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.end_tag); } TEST_F(GumboTokenizerTest, ScriptEnd) { SetInput(""); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_SCRIPT, token_.v.start_tag.tag); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('x', token_.v.character); gumbo_token_destroy(&parser_, &token_); Advance(6); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('<', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('/', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('\'', token_.v.character); gumbo_token_destroy(&parser_, &token_); Advance(1); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_SCRIPT, token_.v.end_tag); } TEST_F(GumboTokenizerTest, ScriptEscapedEnd) { SetInput("x"); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.start_tag.tag); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT_ESCAPED); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('x', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_TITLE, token_.v.end_tag); } TEST_F(GumboTokenizerTest, ScriptCommentEscaped) { SetInput( ""); Advance(1); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT); Advance(15); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('x', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_WHITESPACE, token_.type); EXPECT_EQ(' ', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('<', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_WHITESPACE, token_.type); EXPECT_EQ(' ', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('7', token_.v.character); gumbo_token_destroy(&parser_, &token_); Advance(4); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('<', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('/', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('d', token_.v.character); gumbo_token_destroy(&parser_, &token_); Advance(25); } TEST_F(GumboTokenizerTest, ScriptEscapedEmbeddedLessThan) { SetInput(""); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_START_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_SCRIPT, token_.v.start_tag.tag); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT); gumbo_token_destroy(&parser_, &token_); Advance(14); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('x', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('<', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('7', token_.v.character); gumbo_token_destroy(&parser_, &token_); Advance(8); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_END_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_SCRIPT, token_.v.end_tag); } TEST_F(GumboTokenizerTest, ScriptHasTagEmbedded) { SetInput(""); Advance(1); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT); Advance(11); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('<', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('/', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('d', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('i', token_.v.character); } TEST_F(GumboTokenizerTest, ScriptDoubleEscaped) { SetInput( ""); Advance(1); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_SCRIPT); Advance(34); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('<', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('s', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('C', token_.v.character); gumbo_token_destroy(&parser_, &token_); Advance(20); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('-', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('-', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('>', token_.v.character); } TEST_F(GumboTokenizerTest, CData) { // SetInput uses strlen and so can't handle nulls. text_ = ""; gumbo_tokenizer_state_destroy(&parser_); gumbo_tokenizer_state_init( &parser_, text_, sizeof("") - 1); gumbo_tokenizer_set_is_current_node_foreign(&parser_, true); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_NULL, token_.type); EXPECT_EQ(0, token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CDATA, token_.type); EXPECT_EQ('f', token_.v.character); } TEST_F(GumboTokenizerTest, StyleHasTagEmbedded) { SetInput(""); Advance(1); gumbo_tokenizer_set_state(&parser_, GUMBO_LEX_RCDATA); Advance(7); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('<', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('h', token_.v.character); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('e', token_.v.character); } TEST_F(GumboTokenizerTest, PreWithNewlines) { SetInput("
\r\na
"); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_DOCTYPE, token_.type); EXPECT_EQ(0, token_.position.offset); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type); EXPECT_EQ("
", ToString(token_.original_text));
  EXPECT_EQ(15, token_.position.offset);
}

TEST_F(GumboTokenizerTest, SelfClosingStartTag) {
  SetInput("
"); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type); EXPECT_EQ(0, token_.position.offset); EXPECT_EQ("
", ToString(token_.original_text)); GumboTokenStartTag* start_tag = &token_.v.start_tag; EXPECT_EQ(GUMBO_TAG_BR, start_tag->tag); EXPECT_EQ(0, start_tag->attributes.length); EXPECT_TRUE(start_tag->is_self_closing); } TEST_F(GumboTokenizerTest, OpenTagWithAttributes) { SetInput(""); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type); GumboTokenStartTag* start_tag = &token_.v.start_tag; EXPECT_EQ(GUMBO_TAG_A, start_tag->tag); EXPECT_FALSE(start_tag->is_self_closing); ASSERT_EQ(2, start_tag->attributes.length); GumboAttribute* href = static_cast(start_tag->attributes.data[0]); EXPECT_STREQ("href", href->name); EXPECT_EQ("href", ToString(href->original_name)); EXPECT_STREQ("/search?q=foo&hl=en", href->value); EXPECT_EQ("'/search?q=foo&hl=en'", ToString(href->original_value)); GumboAttribute* id = static_cast(start_tag->attributes.data[1]); EXPECT_STREQ("id", id->name); EXPECT_EQ("id", ToString(id->original_name)); EXPECT_STREQ("link", id->value); EXPECT_EQ("link", ToString(id->original_value)); } TEST_F(GumboTokenizerTest, BogusComment1) { SetInput("Text"); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_COMMENT, token_.type); EXPECT_STREQ("?xml is bogus-comment", token_.v.text); gumbo_token_destroy(&parser_, &token_); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); EXPECT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ('T', token_.v.character); errors_are_expected_ = true; } TEST_F(GumboTokenizerTest, BogusComment2) { SetInput(""); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type); GumboTokenStartTag* start_tag = &token_.v.start_tag; EXPECT_EQ(GUMBO_TAG_UNKNOWN, start_tag->tag); EXPECT_TRUE(start_tag->is_self_closing); ASSERT_EQ(1, start_tag->attributes.length); GumboAttribute* long_attr = static_cast(start_tag->attributes.data[0]); EXPECT_STREQ("long_attr", long_attr->name); EXPECT_EQ("long_attr", ToString(long_attr->original_name)); EXPECT_STREQ( "SomeCode;\n" " calls_a_big_long_function();\n" " return true;", long_attr->value); } TEST_F(GumboTokenizerTest, DoubleAmpersand) { SetInput(""); EXPECT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type); GumboTokenStartTag* start_tag = &token_.v.start_tag; EXPECT_EQ(GUMBO_TAG_SPAN, start_tag->tag); EXPECT_FALSE(start_tag->is_self_closing); ASSERT_EQ(1, start_tag->attributes.length); GumboAttribute* jsif = static_cast(start_tag->attributes.data[0]); EXPECT_STREQ("jsif", jsif->name); EXPECT_EQ("jsif", ToString(jsif->original_name)); EXPECT_STREQ("foo && bar", jsif->value); EXPECT_EQ("\"foo && bar\"", ToString(jsif->original_value)); } TEST_F(GumboTokenizerTest, MatchedTagPair) { SetInput("
a
"); ASSERT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_START_TAG, token_.type); EXPECT_EQ(0, token_.position.offset); GumboTokenStartTag* start_tag = &token_.v.start_tag; EXPECT_EQ(GUMBO_TAG_DIV, start_tag->tag); EXPECT_FALSE(start_tag->is_self_closing); ASSERT_EQ(2, start_tag->attributes.length); GumboAttribute* id = static_cast(start_tag->attributes.data[0]); EXPECT_STREQ("id", id->name); EXPECT_EQ("id", ToString(id->original_name)); EXPECT_EQ(1, id->name_start.line); EXPECT_EQ(5, id->name_start.offset); EXPECT_EQ(6, id->name_start.column); EXPECT_EQ(8, id->name_end.column); EXPECT_STREQ("dash<-Dash", id->value); EXPECT_EQ("dash<-Dash", ToString(id->original_value)); EXPECT_EQ(9, id->value_start.column); EXPECT_EQ(19, id->value_end.column); GumboAttribute* data_attr = static_cast(start_tag->attributes.data[1]); EXPECT_STREQ("data-test", data_attr->name); EXPECT_EQ("data-test", ToString(data_attr->original_name)); EXPECT_EQ(20, data_attr->name_start.column); EXPECT_EQ(29, data_attr->name_end.column); EXPECT_STREQ("bar", data_attr->value); EXPECT_EQ("\"bar\"", ToString(data_attr->original_value)); EXPECT_EQ(30, data_attr->value_start.column); EXPECT_EQ(35, data_attr->value_end.column); gumbo_token_destroy(&parser_, &token_); ASSERT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_CHARACTER, token_.type); EXPECT_EQ(35, token_.position.offset); EXPECT_EQ('a', token_.v.character); gumbo_token_destroy(&parser_, &token_); ASSERT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_END_TAG, token_.type); EXPECT_EQ(GUMBO_TAG_DIV, token_.v.end_tag); errors_are_expected_ = true; } TEST_F(GumboTokenizerTest, BogusEndTag) { // According to the spec, the correct parse of this is an end tag token for // "" (notice the ending bracket) with the attribute "th=th" (ignored // because end tags don't take attributes), with the tokenizer passing through // the self-closing tag state in the process. SetInput(""); ASSERT_TRUE(gumbo_lex(&parser_, &token_)); ASSERT_EQ(GUMBO_TOKEN_END_TAG, token_.type); EXPECT_EQ(0, token_.position.offset); EXPECT_EQ(GUMBO_TAG_UNKNOWN, token_.v.end_tag); EXPECT_EQ("", ToString(token_.original_text)); errors_are_expected_ = true; } } // namespace