// Copyright 2011 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Author: jdtang@google.com (Jonathan Tang) #include "gumbo.h" #include #include "gtest/gtest.h" #include "test_utils.h" namespace { class GumboParserTest : public ::testing::Test { protected: GumboParserTest() : options_(kGumboDefaultOptions), output_(NULL), root_(NULL) { InitLeakDetection(&options_, &malloc_stats_); } virtual ~GumboParserTest() { if (output_) { gumbo_destroy_output(&options_, output_); } EXPECT_EQ(malloc_stats_.objects_allocated, malloc_stats_.objects_freed); } virtual void Parse(const char* input) { if (output_) { gumbo_destroy_output(&options_, output_); } output_ = gumbo_parse_with_options(&options_, input, strlen(input)); // The naming inconsistency is because these tests were initially written // when gumbo_parse returned the document element instead of an GumboOutput // structure. root_ = output_->document; } virtual void ParseFragment( const char* input, GumboTag context, GumboNamespaceEnum context_ns) { if (output_) { gumbo_destroy_output(&options_, output_); } options_.fragment_context = context; options_.fragment_namespace = context_ns; output_ = gumbo_parse_with_options(&options_, input, strlen(input)); root_ = output_->document; } virtual void Parse(const std::string& input) { // This overload is so we can test/demonstrate that computing offsets from // the .data() member of an STL string works properly. if (output_) { gumbo_destroy_output(&options_, output_); } output_ = gumbo_parse_with_options(&options_, input.data(), input.length()); root_ = output_->document; SanityCheckPointers(input.data(), input.length(), output_->root, 1000); } MallocStats malloc_stats_; GumboOptions options_; GumboOutput* output_; GumboNode* root_; }; TEST_F(GumboParserTest, NullDocument) { Parse(""); ASSERT_TRUE(root_); ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type); EXPECT_EQ(GUMBO_INSERTION_BY_PARSER, root_->parse_flags); GumboNode* body; GetAndAssertBody(root_, &body); } TEST_F(GumboParserTest, ParseTwice) { Parse(""); ASSERT_TRUE(root_); ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type); std::string second_input(""); Parse(second_input); ASSERT_TRUE(root_); ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type); GumboNode* body; GetAndAssertBody(root_, &body); } TEST_F(GumboParserTest, OneChar) { std::string input("T"); Parse(input); ASSERT_TRUE(root_); ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type); EXPECT_EQ(GUMBO_INSERTION_BY_PARSER, root_->parse_flags); ASSERT_EQ(1, GetChildCount(root_)); GumboNode* html = GetChild(root_, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type); EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_BY_PARSER); EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_IMPLICIT_END_TAG); EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_IMPLIED); EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag); ASSERT_EQ(2, GetChildCount(html)); GumboNode* head = GetChild(html, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type); EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag); EXPECT_EQ(0, GetChildCount(head)); GumboNode* body = GetChild(html, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag); ASSERT_EQ(1, GetChildCount(body)); EXPECT_EQ(1, body->v.element.start_pos.line); EXPECT_EQ(1, body->v.element.start_pos.column); EXPECT_EQ(0, body->v.element.start_pos.offset); EXPECT_EQ(1, body->v.element.end_pos.line); EXPECT_EQ(2, body->v.element.end_pos.column); EXPECT_EQ(1, body->v.element.end_pos.offset); GumboNode* text = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("T", text->v.text.text); EXPECT_EQ(1, text->v.text.start_pos.line); EXPECT_EQ(1, text->v.text.start_pos.column); EXPECT_EQ(0, text->v.text.start_pos.offset); EXPECT_EQ(input.data(), text->v.text.original_text.data); EXPECT_EQ(1, text->v.text.original_text.length); } TEST_F(GumboParserTest, TextOnly) { Parse("Test"); EXPECT_EQ(1, output_->errors.length); // No doctype. ASSERT_EQ(1, GetChildCount(root_)); GumboNode* html = GetChild(root_, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type); EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag); ASSERT_EQ(2, GetChildCount(html)); GumboNode* head = GetChild(html, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type); EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag); EXPECT_EQ(0, GetChildCount(head)); GumboNode* body = GetChild(html, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag); ASSERT_EQ(1, GetChildCount(body)); GumboNode* text = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Test", text->v.text.text); } TEST_F(GumboParserTest, SelfClosingTagError) { Parse("
"); // TODO(jdtang): I think this is double-counting some error cases, I think we // may ultimately want to de-dup errors that occur on the same token. EXPECT_EQ(8, output_->errors.length); } TEST_F(GumboParserTest, UnexpectedEndBreak) { Parse("
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* br = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, br->type); EXPECT_EQ(GUMBO_TAG_BR, br->v.element.tag); ASSERT_EQ(0, GetChildCount(br)); GumboNode* div = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag); ASSERT_EQ(0, GetChildCount(div)); } TEST_F(GumboParserTest, CaseSensitiveAttributes) { Parse("
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* div = GetChild(body, 0); GumboVector* attributes = &div->v.element.attributes; ASSERT_EQ(1, attributes->length); GumboAttribute* clas = static_cast(attributes->data[0]); EXPECT_EQ(GUMBO_ATTR_NAMESPACE_NONE, clas->attr_namespace); EXPECT_STREQ("class", clas->name); EXPECT_STREQ("CamelCase", clas->value); } TEST_F(GumboParserTest, ExplicitHtmlStructure) { Parse( "\n" "Foo\n" "
Test
"); ASSERT_EQ(1, GetChildCount(root_)); EXPECT_EQ(0, output_->errors.length); ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type); EXPECT_STREQ("html", root_->v.document.name); EXPECT_STREQ("", root_->v.document.public_identifier); EXPECT_STREQ("", root_->v.document.system_identifier); GumboNode* html = GetChild(root_, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type); EXPECT_EQ(GUMBO_INSERTION_NORMAL, html->parse_flags); EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag); EXPECT_EQ(2, html->v.element.start_pos.line); EXPECT_EQ(1, html->v.element.start_pos.column); EXPECT_EQ(16, html->v.element.start_pos.offset); EXPECT_EQ(3, html->v.element.end_pos.line); EXPECT_EQ(39, html->v.element.end_pos.column); EXPECT_EQ(92, html->v.element.end_pos.offset); EXPECT_EQ("", ToString(html->v.element.original_tag)); EXPECT_EQ("", ToString(html->v.element.original_end_tag)); ASSERT_EQ(3, GetChildCount(html)); GumboNode* head = GetChild(html, 0); EXPECT_EQ(GUMBO_INSERTION_NORMAL, head->parse_flags); ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type); EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag); EXPECT_EQ(html, head->parent); EXPECT_EQ(0, head->index_within_parent); EXPECT_EQ(1, GetChildCount(head)); GumboNode* body = GetChild(html, 2); EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags); ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag); EXPECT_EQ(html, body->parent); EXPECT_EQ(3, body->v.element.start_pos.line); EXPECT_EQ(1, body->v.element.start_pos.column); EXPECT_EQ(54, body->v.element.start_pos.offset); EXPECT_EQ(3, body->v.element.end_pos.line); EXPECT_EQ(32, body->v.element.end_pos.column); EXPECT_EQ(85, body->v.element.end_pos.offset); EXPECT_EQ("", ToString(body->v.element.original_tag)); EXPECT_EQ("", ToString(body->v.element.original_end_tag)); EXPECT_EQ(2, body->index_within_parent); ASSERT_EQ(1, GetChildCount(body)); GumboNode* div = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag); EXPECT_EQ(body, div->parent); EXPECT_EQ(0, div->index_within_parent); ASSERT_EQ(1, GetChildCount(div)); ASSERT_EQ(1, GetAttributeCount(div)); GumboAttribute* clas = GetAttribute(div, 0); EXPECT_STREQ("class", clas->name); EXPECT_EQ("class", ToString(clas->original_name)); EXPECT_STREQ("bar", clas->value); EXPECT_EQ("bar", ToString(clas->original_value)); GumboNode* text = GetChild(div, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Test", text->v.text.text); } TEST_F(GumboParserTest, Whitespace) { Parse("
    \n
  • Text\n
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* ul = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, ul->type); EXPECT_EQ(GUMBO_TAG_UL, ul->v.element.tag); ASSERT_EQ(2, GetChildCount(ul)); GumboNode* whitespace = GetChild(ul, 0); ASSERT_EQ(GUMBO_NODE_WHITESPACE, whitespace->type); EXPECT_STREQ("\n ", whitespace->v.text.text); GumboNode* li = GetChild(ul, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, li->type); EXPECT_EQ(GUMBO_TAG_LI, li->v.element.tag); ASSERT_EQ(1, GetChildCount(li)); GumboNode* text = GetChild(li, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Text\n", text->v.text.text); } TEST_F(GumboParserTest, DuplicateAttributes) { std::string text(""); Parse(text); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* input = GetChild(body, 0); EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, input->parse_flags); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, input->v.element.tag); EXPECT_EQ(0, GetChildCount(input)); ASSERT_EQ(2, GetAttributeCount(input)); GumboAttribute* checked = GetAttribute(input, 0); EXPECT_STREQ("checked", checked->name); EXPECT_STREQ("false", checked->value); EXPECT_EQ(1, checked->name_start.line); EXPECT_EQ(8, checked->name_start.column); EXPECT_EQ(15, checked->name_end.column); EXPECT_EQ(16, checked->value_start.column); EXPECT_EQ(23, checked->value_end.column); EXPECT_EQ(7, checked->original_name.data - text.data()); EXPECT_EQ(7, checked->original_name.length); EXPECT_EQ(15, checked->original_value.data - text.data()); EXPECT_EQ(7, checked->original_value.length); GumboAttribute* id = GetAttribute(input, 1); EXPECT_STREQ("id", id->name); EXPECT_STREQ("foo", id->value); // TODO(jdtang): Run some assertions on the parse error that's added. } TEST_F(GumboParserTest, LinkTagsInHead) { Parse( "\n" " \n" " Sample title>\n\n" " \n" " \n" " \n" " Foo"); ASSERT_EQ(1, GetChildCount(root_)); GumboNode* html = GetChild(root_, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type); EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, html->parse_flags); EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag); ASSERT_EQ(3, GetChildCount(html)); GumboNode* head = GetChild(html, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type); EXPECT_EQ(GUMBO_INSERTION_NORMAL, head->parse_flags); EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag); EXPECT_EQ(7, GetChildCount(head)); GumboNode* text1 = GetChild(head, 2); ASSERT_EQ(GUMBO_NODE_WHITESPACE, text1->type); EXPECT_STREQ("\n\n ", text1->v.text.text); GumboNode* link1 = GetChild(head, 3); ASSERT_EQ(GUMBO_NODE_ELEMENT, link1->type); EXPECT_EQ(GUMBO_TAG_LINK, link1->v.element.tag); EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, link1->parse_flags); EXPECT_EQ(0, GetChildCount(link1)); GumboNode* text2 = GetChild(head, 4); ASSERT_EQ(GUMBO_NODE_WHITESPACE, text2->type); EXPECT_STREQ("\n ", text2->v.text.text); GumboNode* link2 = GetChild(head, 5); ASSERT_EQ(GUMBO_NODE_ELEMENT, link2->type); EXPECT_EQ(GUMBO_TAG_LINK, link2->v.element.tag); EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, link2->parse_flags); EXPECT_EQ(0, GetChildCount(link2)); GumboNode* text3 = GetChild(head, 6); ASSERT_EQ(GUMBO_NODE_WHITESPACE, text3->type); EXPECT_STREQ("\n ", text3->v.text.text); GumboNode* body = GetChild(html, 2); ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags); EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag); ASSERT_EQ(1, GetChildCount(body)); } TEST_F(GumboParserTest, WhitespaceBeforeHtml) { Parse("\nTest"); ASSERT_EQ(1, GetChildCount(root_)); GumboNode* body = GetChild(GetChild(root_, 0), 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body)); ASSERT_EQ(1, GetChildCount(body)); GumboNode* text = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Test", text->v.text.text); } TEST_F(GumboParserTest, TextAfterHtml) { Parse("Test after doc"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body)); ASSERT_EQ(1, GetChildCount(body)); GumboNode* text = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Test after doc", text->v.text.text); } TEST_F(GumboParserTest, WhitespaceInHead) { Parse(" Test"); GumboNode* html = GetChild(root_, 0); EXPECT_EQ(GUMBO_NODE_ELEMENT, html->type); EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html)); EXPECT_EQ(2, GetChildCount(html)); GumboNode* head = GetChild(html, 0); EXPECT_EQ(GUMBO_NODE_ELEMENT, head->type); EXPECT_EQ(GUMBO_TAG_HEAD, GetTag(head)); EXPECT_EQ(0, GetChildCount(head)); GumboNode* body = GetChild(html, 1); EXPECT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body)); GumboNode* text = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Test", text->v.text.text); } TEST_F(GumboParserTest, Doctype) { Parse("Test"); GumboDocument* doc = &root_->v.document; EXPECT_EQ(1, doc->children.length); EXPECT_EQ(GUMBO_DOCTYPE_NO_QUIRKS, doc->doc_type_quirks_mode); EXPECT_STREQ("html", doc->name); EXPECT_STREQ("", doc->public_identifier); EXPECT_STREQ("", doc->system_identifier); } TEST_F(GumboParserTest, InvalidDoctype) { Parse("Test"); // Default doc token; the declared one is ignored. GumboDocument* doc = &root_->v.document; EXPECT_EQ(1, doc->children.length); EXPECT_EQ(GUMBO_DOCTYPE_QUIRKS, doc->doc_type_quirks_mode); EXPECT_STREQ("", doc->name); EXPECT_STREQ("", doc->public_identifier); EXPECT_STREQ("", doc->system_identifier); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body)); ASSERT_EQ(1, GetChildCount(body)); GumboNode* text = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Test", text->v.text.text); } TEST_F(GumboParserTest, SingleComment) { Parse(""); GumboNode* comment = GetChild(root_, 0); ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type); EXPECT_STREQ(" comment ", comment->v.text.text); } TEST_F(GumboParserTest, CommentInText) { Parse("Start end"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(3, GetChildCount(body)); GumboNode* start = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_TEXT, start->type); EXPECT_STREQ("Start ", start->v.text.text); GumboNode* comment = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type); EXPECT_EQ(body, comment->parent); EXPECT_EQ(1, comment->index_within_parent); EXPECT_STREQ(" comment ", comment->v.text.text); GumboNode* end = GetChild(body, 2); ASSERT_EQ(GUMBO_NODE_TEXT, end->type); EXPECT_STREQ(" end", end->v.text.text); } TEST_F(GumboParserTest, CommentBeforeNode) { Parse("\n

hello world!

"); GumboNode* comment = GetChild(root_, 0); ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type); EXPECT_STREQ("This is a comment", comment->v.text.text); EXPECT_EQ( "", ToString(comment->v.text.original_text)); // Newline is ignored per the rules for "initial" insertion mode. GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* h1 = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, h1->type); EXPECT_EQ(GUMBO_TAG_H1, h1->v.element.tag); } TEST_F(GumboParserTest, CommentInVerbatimMode) { Parse("
Text
"); GumboNode* html = GetChild(root_, 0); EXPECT_EQ(GUMBO_NODE_ELEMENT, html->type); EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html)); EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLIED | GUMBO_INSERTION_IMPLICIT_END_TAG, html->parse_flags); EXPECT_EQ(3, GetChildCount(html)); GumboNode* body = GetChild(html, 1); EXPECT_EQ(GUMBO_NODE_ELEMENT, body->type); EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body)); EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags); EXPECT_EQ(3, GetChildCount(body)); GumboNode* comment = GetChild(html, 2); ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type); EXPECT_EQ(GUMBO_INSERTION_NORMAL, comment->parse_flags); EXPECT_STREQ(" comment \n\n", comment->v.text.text); } TEST_F(GumboParserTest, UnknownTag) { Parse("1

2"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* foo = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, foo->type); EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(foo)); EXPECT_EQ("", ToString(foo->v.element.original_tag)); // According to the spec, the misplaced end tag is ignored, and so we return // an empty original_end_tag text. We may want to extend our error-reporting // a bit so that we close off the tag that it *would have closed*, had the // HTML been correct, along with a parse flag that says the end tag was in the // wrong place. EXPECT_EQ("", ToString(foo->v.element.original_end_tag)); } TEST_F(GumboParserTest, UnknownTag2) { Parse("

"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* div = GetChild(body, 0); ASSERT_EQ(1, GetChildCount(div)); GumboNode* sarcasm = GetChild(div, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, sarcasm->type); EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(sarcasm)); EXPECT_EQ("", ToString(sarcasm->v.element.original_tag)); EXPECT_EQ("", ToString(sarcasm->v.element.original_end_tag)); } TEST_F(GumboParserTest, InvalidEndTag) { Parse(""); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* a = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, a->type); EXPECT_EQ(GUMBO_TAG_A, GetTag(a)); ASSERT_EQ(1, GetChildCount(a)); GumboNode* img = GetChild(a, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, img->type); EXPECT_EQ(GUMBO_TAG_IMG, GetTag(img)); ASSERT_EQ(0, GetChildCount(img)); } TEST_F(GumboParserTest, Tables) { Parse( "\n" "
\n" " \n" " \n" " \n" " " "
OneTwo
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(4, GetChildCount(body)); GumboNode* br = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, br->type); EXPECT_EQ(GUMBO_TAG_BR, GetTag(br)); EXPECT_EQ(body, br->parent); EXPECT_EQ(0, br->index_within_parent); ASSERT_EQ(0, GetChildCount(br)); GumboNode* iframe = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, iframe->type); EXPECT_EQ(GUMBO_TAG_IFRAME, GetTag(iframe)); ASSERT_EQ(0, GetChildCount(iframe)); GumboNode* table = GetChild(body, 2); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table)); EXPECT_EQ(body, table->parent); EXPECT_EQ(2, table->index_within_parent); ASSERT_EQ(2, GetChildCount(table)); GumboNode* table_text = GetChild(table, 0); ASSERT_EQ(GUMBO_NODE_WHITESPACE, table_text->type); EXPECT_STREQ("\n ", table_text->v.text.text); GumboNode* tbody = GetChild(table, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type); EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody)); ASSERT_EQ(2, GetChildCount(tbody)); // Second node is whitespace. GumboNode* tr = GetChild(tbody, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr)); ASSERT_EQ(5, GetChildCount(tr)); // Including whitespace. GumboNode* tr_text = GetChild(tr, 0); ASSERT_EQ(GUMBO_NODE_WHITESPACE, tr_text->type); EXPECT_EQ(tr, tr_text->parent); EXPECT_EQ(0, tr_text->index_within_parent); EXPECT_STREQ("\n ", tr_text->v.text.text); GumboNode* th = GetChild(tr, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type); EXPECT_EQ(GUMBO_TAG_TH, GetTag(th)); EXPECT_EQ(tr, th->parent); EXPECT_EQ(1, th->index_within_parent); ASSERT_EQ(1, GetChildCount(th)); GumboNode* th_text = GetChild(th, 0); ASSERT_EQ(GUMBO_NODE_TEXT, th_text->type); EXPECT_STREQ("One", th_text->v.text.text); GumboNode* td = GetChild(tr, 3); ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type); EXPECT_EQ(GUMBO_TAG_TD, GetTag(td)); ASSERT_EQ(1, GetChildCount(td)); GumboNode* td_text = GetChild(td, 0); ASSERT_EQ(GUMBO_NODE_TEXT, td_text->type); EXPECT_STREQ("Two", td_text->v.text.text); GumboNode* td2_text = GetChild(td, 0); ASSERT_EQ(GUMBO_NODE_TEXT, td2_text->type); EXPECT_STREQ("Two", td2_text->v.text.text); GumboNode* div = GetChild(body, 3); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div)); ASSERT_EQ(0, GetChildCount(div)); } TEST_F(GumboParserTest, StartParagraphInTable) { Parse("

foo

"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* paragraph = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, paragraph->type); EXPECT_EQ(GUMBO_TAG_P, GetTag(paragraph)); EXPECT_EQ(body, paragraph->parent); EXPECT_EQ(0, paragraph->index_within_parent); ASSERT_EQ(1, GetChildCount(paragraph)); GumboNode* text = GetChild(paragraph, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("foo", text->v.text.text); GumboNode* table = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table)); EXPECT_EQ(body, table->parent); EXPECT_EQ(1, table->index_within_parent); ASSERT_EQ(0, GetChildCount(table)); } TEST_F(GumboParserTest, EndParagraphInTable) { Parse("

"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* paragraph = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, paragraph->type); EXPECT_EQ(GUMBO_TAG_P, GetTag(paragraph)); EXPECT_EQ(body, paragraph->parent); EXPECT_EQ(0, paragraph->index_within_parent); ASSERT_EQ(0, GetChildCount(paragraph)); GumboNode* table = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table)); EXPECT_EQ(body, table->parent); EXPECT_EQ(1, table->index_within_parent); ASSERT_EQ(0, GetChildCount(table)); } TEST_F(GumboParserTest, UnknownTagInTable) { Parse("bar
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* foo = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, foo->type); EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(foo)); EXPECT_EQ("", ToString(foo->v.element.original_tag)); EXPECT_EQ(body, foo->parent); EXPECT_EQ(0, foo->index_within_parent); ASSERT_EQ(1, GetChildCount(foo)); GumboNode* bar = GetChild(foo, 0); ASSERT_EQ(GUMBO_NODE_TEXT, bar->type); EXPECT_STREQ("bar", bar->v.text.text); GumboNode* table = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table)); EXPECT_EQ(body, table->parent); EXPECT_EQ(1, table->index_within_parent); ASSERT_EQ(0, GetChildCount(table)); } TEST_F(GumboParserTest, UnclosedTableTags) { Parse( "\n" " \n" "
One\n" " Two\n" "
Row2\n" "
Row3\n" "
\n" ""); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* table = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table)); ASSERT_EQ(2, GetChildCount(table)); GumboNode* table_text = GetChild(table, 0); ASSERT_EQ(GUMBO_NODE_WHITESPACE, table_text->type); EXPECT_STREQ("\n ", table_text->v.text.text); GumboNode* tbody = GetChild(table, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type); EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody)); ASSERT_EQ(3, GetChildCount(tbody)); GumboNode* tr = GetChild(tbody, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr)); ASSERT_EQ(3, GetChildCount(tr)); GumboNode* tr_text = GetChild(tr, 0); ASSERT_EQ(GUMBO_NODE_WHITESPACE, tr_text->type); EXPECT_STREQ("\n ", tr_text->v.text.text); GumboNode* td1 = GetChild(tr, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type); EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1)); ASSERT_EQ(1, GetChildCount(td1)); GumboNode* td1_text = GetChild(td1, 0); ASSERT_EQ(GUMBO_NODE_TEXT, td1_text->type); EXPECT_STREQ("One\n ", td1_text->v.text.text); GumboNode* td2 = GetChild(tr, 2); ASSERT_EQ(GUMBO_NODE_ELEMENT, td2->type); EXPECT_EQ(GUMBO_TAG_TD, GetTag(td2)); ASSERT_EQ(1, GetChildCount(td2)); GumboNode* td2_text = GetChild(td2, 0); ASSERT_EQ(GUMBO_NODE_TEXT, td2_text->type); EXPECT_STREQ("Two\n ", td2_text->v.text.text); GumboNode* tr3 = GetChild(tbody, 2); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr3->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr3)); ASSERT_EQ(1, GetChildCount(tr3)); GumboNode* body_text = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_WHITESPACE, body_text->type); EXPECT_STREQ("\n", body_text->v.text.text); } TEST_F(GumboParserTest, MisnestedTable) { Parse("
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* div = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div)); ASSERT_EQ(0, GetChildCount(div)); GumboNode* table = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table)); ASSERT_EQ(1, GetChildCount(table)); GumboNode* tbody = GetChild(table, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type); EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody)); ASSERT_EQ(1, GetChildCount(tbody)); GumboNode* tr = GetChild(tbody, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr)); ASSERT_EQ(1, GetChildCount(tr)); GumboNode* td = GetChild(tr, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type); EXPECT_EQ(GUMBO_TAG_TD, GetTag(td)); ASSERT_EQ(0, GetChildCount(td)); } TEST_F(GumboParserTest, MisnestedTable2) { Parse(" GumboNode* cell3 = GetChild(td1, 1); ASSERT_EQ(GUMBO_NODE_TEXT, cell3->type); EXPECT_STREQ("Cell3", cell3->v.text.text); GumboNode* table2 = GetChild(td1, 2); ASSERT_EQ(GUMBO_NODE_ELEMENT, table2->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table2)); ASSERT_EQ(1, GetChildCount(table2)); GumboNode* tbody2 = GetChild(table2, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody2->type); EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody2)); ASSERT_EQ(2, GetChildCount(tbody2)); GumboNode* tr2 = GetChild(tbody2, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr2->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr2)); ASSERT_EQ(1, GetChildCount(tr2)); GumboNode* th = GetChild(tr2, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type); EXPECT_EQ(GUMBO_TAG_TH, GetTag(th)); ASSERT_EQ(1, GetChildCount(th)); GumboNode* cell2 = GetChild(th, 0); ASSERT_EQ(GUMBO_NODE_TEXT, cell2->type); EXPECT_STREQ("Cell2", cell2->v.text.text); GumboNode* tr3 = GetChild(tbody2, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr3->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr3)); ASSERT_EQ(0, GetChildCount(tr3)); } TEST_F(GumboParserTest, Select) { Parse("
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* select = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type); EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select)); ASSERT_EQ(2, GetChildCount(select)); GumboNode* option1 = GetChild(select, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, option1->type); EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option1)); ASSERT_EQ(1, GetChildCount(option1)); GumboNode* option2 = GetChild(select, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, option2->type); EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option2)); ASSERT_EQ(1, GetChildCount(option2)); GumboNode* div = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div)); ASSERT_EQ(0, GetChildCount(div)); } TEST_F(GumboParserTest, ComplicatedSelect) { Parse( ""); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* select = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type); EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select)); ASSERT_EQ(1, GetChildCount(select)); GumboNode* optgroup = GetChild(select, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, optgroup->type); EXPECT_EQ(GUMBO_TAG_OPTGROUP, GetTag(optgroup)); ASSERT_EQ(1, GetChildCount(optgroup)); GumboNode* option = GetChild(optgroup, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, option->type); EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option)); ASSERT_EQ(1, GetChildCount(option)); GumboNode* text = GetChild(option, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Option", text->v.text.text); GumboNode* input = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input)); ASSERT_EQ(0, GetChildCount(input)); } TEST_F(GumboParserTest, DoubleSelect) { Parse("
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* select = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type); EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select)); ASSERT_EQ(0, GetChildCount(select)); GumboNode* div = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div)); ASSERT_EQ(0, GetChildCount(div)); } TEST_F(GumboParserTest, InputInSelect) { Parse("
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(3, GetChildCount(body)); GumboNode* select = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type); EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select)); ASSERT_EQ(0, GetChildCount(select)); GumboNode* input = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input)); ASSERT_EQ(0, GetChildCount(input)); GumboNode* div = GetChild(body, 2); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div)); ASSERT_EQ(0, GetChildCount(div)); } TEST_F(GumboParserTest, SelectInTable) { Parse("
Cell1Cell3
Cell2
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* table1 = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, table1->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table1)); ASSERT_EQ(1, GetChildCount(table1)); GumboNode* tbody1 = GetChild(table1, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody1->type); EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody1)); ASSERT_EQ(1, GetChildCount(tbody1)); GumboNode* tr1 = GetChild(tbody1, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr1->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr1)); ASSERT_EQ(1, GetChildCount(tr1)); GumboNode* td1 = GetChild(tr1, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type); EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1)); ASSERT_EQ(3, GetChildCount(td1)); GumboNode* cell1 = GetChild(td1, 0); ASSERT_EQ(GUMBO_NODE_TEXT, cell1->type); EXPECT_STREQ("Cell1", cell1->v.text.text); // Foster-parented out of the inner
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* table = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table)); ASSERT_EQ(1, GetChildCount(table)); GumboNode* tbody = GetChild(table, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type); EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody)); ASSERT_EQ(1, GetChildCount(tbody)); GumboNode* tr = GetChild(tbody, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr)); ASSERT_EQ(1, GetChildCount(tr)); GumboNode* td = GetChild(tr, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type); EXPECT_EQ(GUMBO_TAG_TD, GetTag(td)); ASSERT_EQ(1, GetChildCount(td)); GumboNode* select = GetChild(td, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type); EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select)); ASSERT_EQ(1, GetChildCount(select)); GumboNode* option = GetChild(select, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, option->type); EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option)); ASSERT_EQ(0, GetChildCount(option)); } TEST_F(GumboParserTest, ImplicitColgroup) { Parse("
"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* table = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table)); ASSERT_EQ(1, GetChildCount(table)); GumboNode* colgroup = GetChild(table, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, colgroup->type); EXPECT_EQ(GUMBO_TAG_COLGROUP, GetTag(colgroup)); ASSERT_EQ(2, GetChildCount(colgroup)); GumboNode* col1 = GetChild(colgroup, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, col1->type); EXPECT_EQ(GUMBO_TAG_COL, GetTag(col1)); ASSERT_EQ(0, GetChildCount(col1)); GumboNode* col2 = GetChild(colgroup, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, col2->type); EXPECT_EQ(GUMBO_TAG_COL, GetTag(col2)); ASSERT_EQ(0, GetChildCount(col2)); } TEST_F(GumboParserTest, Form) { Parse("
After form"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* form = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type); EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form)); ASSERT_EQ(1, GetChildCount(form)); GumboNode* input = GetChild(form, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input)); ASSERT_EQ(0, GetChildCount(input)); GumboNode* text = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("After form", text->v.text.text); } TEST_F(GumboParserTest, NestedForm) { Parse("
After form"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); GumboNode* form = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type); EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form)); ASSERT_EQ(2, GetChildCount(form)); GumboNode* label = GetChild(form, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, label->type); EXPECT_EQ(GUMBO_TAG_LABEL, GetTag(label)); ASSERT_EQ(1, GetChildCount(label)); GumboNode* input = GetChild(form, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input)); ASSERT_EQ(0, GetChildCount(input)); GumboNode* text = GetChild(body, 1); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("After form", text->v.text.text); } TEST_F(GumboParserTest, MisnestedFormInTable) { // Parse of this is somewhat weird. The first
is opened outside the // table, so when
checks to see if there's a form in scope, it stops // at the boundary and returns null. The form pointer is nulled out // anyway, though, which means that the second form (parsed in the table body // state) ends up creating an element. It's immediately popped off // the stack, but the form element pointer remains set to that node (which is // not on the stack of open elements). The final tag triggers the // "does not have node in scope" clause and is ignored. (Note that this is // different from "has a form element in scope" - the first form is still in // scope at that point, but the form pointer does not point to it.) Then the // original element is closed implicitly when the table cell is closed. Parse( "
type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table1)); ASSERT_EQ(1, GetChildCount(table1)); GumboNode* tbody1 = GetChild(table1, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody1->type); EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody1)); ASSERT_EQ(1, GetChildCount(tbody1)); GumboNode* tr1 = GetChild(tbody1, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr1->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr1)); ASSERT_EQ(1, GetChildCount(tr1)); GumboNode* td1 = GetChild(tr1, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type); EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1)); ASSERT_EQ(1, GetChildCount(td1)); GumboNode* form1 = GetChild(td1, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, form1->type); EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form1)); ASSERT_EQ(1, GetChildCount(form1)); GumboNode* table2 = GetChild(form1, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, table2->type); EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table2)); ASSERT_EQ(1, GetChildCount(table2)); GumboNode* tbody2 = GetChild(table2, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody2->type); EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody2)); ASSERT_EQ(2, GetChildCount(tbody2)); GumboNode* tr2 = GetChild(tbody2, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr2->type); EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr2)); ASSERT_EQ(1, GetChildCount(tr2)); GumboNode* form2 = GetChild(tbody2, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, form2->type); EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form2)); ASSERT_EQ(0, GetChildCount(form2)); } TEST_F(GumboParserTest, IsIndex) { Parse(""); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* form = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type); EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form)); ASSERT_EQ(3, GetChildCount(form)); GumboAttribute* action = GetAttribute(form, 0); EXPECT_STREQ("action", action->name); EXPECT_STREQ("/action", action->value); GumboNode* hr1 = GetChild(form, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, hr1->type); EXPECT_EQ(GUMBO_TAG_HR, GetTag(hr1)); ASSERT_EQ(0, GetChildCount(hr1)); GumboNode* label = GetChild(form, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, label->type); EXPECT_EQ(GUMBO_TAG_LABEL, GetTag(label)); ASSERT_EQ(2, GetChildCount(label)); GumboNode* text = GetChild(label, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Secret Message", text->v.text.text); GumboNode* input = GetChild(label, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input)); ASSERT_EQ(0, GetChildCount(input)); ASSERT_EQ(2, GetAttributeCount(input)); GumboAttribute* id = GetAttribute(input, 0); EXPECT_STREQ("id", id->name); EXPECT_STREQ("form1", id->value); GumboAttribute* name = GetAttribute(input, 1); EXPECT_STREQ("name", name->name); EXPECT_STREQ("isindex", name->value); GumboNode* hr2 = GetChild(form, 2); ASSERT_EQ(GUMBO_NODE_ELEMENT, hr2->type); EXPECT_EQ(GUMBO_TAG_HR, GetTag(hr2)); ASSERT_EQ(0, GetChildCount(hr2)); } TEST_F(GumboParserTest, IsIndexDuplicateAttribute) { Parse(""); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* form = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type); EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form)); ASSERT_EQ(3, GetChildCount(form)); GumboNode* label = GetChild(form, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, label->type); EXPECT_EQ(GUMBO_TAG_LABEL, GetTag(label)); ASSERT_EQ(2, GetChildCount(label)); GumboNode* input = GetChild(label, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input)); ASSERT_EQ(0, GetChildCount(input)); ASSERT_EQ(1, GetAttributeCount(input)); GumboAttribute* name = GetAttribute(input, 0); EXPECT_STREQ("name", name->name); EXPECT_STREQ("isindex", name->value); } TEST_F(GumboParserTest, NestedRawtextTags) { Parse( "
" "" "
" "
0"); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* math = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, math->type); EXPECT_EQ(GUMBO_TAG_MATH, math->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_MATHML, math->v.element.tag_namespace); ASSERT_EQ(1, GetChildCount(math)); GumboNode* th = GetChild(math, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type); EXPECT_EQ(GUMBO_TAG_TH, th->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_MATHML, th->v.element.tag_namespace); ASSERT_EQ(1, GetChildCount(th)); GumboNode* mi = GetChild(th, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, mi->type); EXPECT_EQ(GUMBO_TAG_MI, mi->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_MATHML, mi->v.element.tag_namespace); ASSERT_EQ(2, GetChildCount(mi)); GumboNode* table = GetChild(mi, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, table->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_HTML, table->v.element.tag_namespace); ASSERT_EQ(0, GetChildCount(table)); GumboNode* div = GetChild(mi, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_HTML, div->v.element.tag_namespace); ASSERT_EQ(1, GetChildCount(div)); GumboNode* text = GetChild(div, 0); ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("0", text->v.text.text); } TEST_F(GumboParserTest, TdInMathml) { Parse(""); GumboNode* body; GetAndAssertBody(root_, &body); ASSERT_EQ(1, GetChildCount(body)); GumboNode* table = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); EXPECT_EQ(GUMBO_TAG_TABLE, table->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_HTML, table->v.element.tag_namespace); ASSERT_EQ(1, GetChildCount(table)); GumboNode* tbody = GetChild(table, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type); EXPECT_EQ(GUMBO_TAG_TBODY, tbody->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_HTML, tbody->v.element.tag_namespace); ASSERT_EQ(1, GetChildCount(tbody)); GumboNode* tr = GetChild(tbody, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type); EXPECT_EQ(GUMBO_TAG_TR, tr->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_HTML, tr->v.element.tag_namespace); ASSERT_EQ(1, GetChildCount(tr)); GumboNode* th = GetChild(tr, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type); EXPECT_EQ(GUMBO_TAG_TH, th->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_HTML, th->v.element.tag_namespace); ASSERT_EQ(1, GetChildCount(th)); GumboNode* math = GetChild(th, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, math->type); EXPECT_EQ(GUMBO_TAG_MATH, math->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_MATHML, math->v.element.tag_namespace); ASSERT_EQ(1, GetChildCount(math)); GumboNode* td = GetChild(math, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type); EXPECT_EQ(GUMBO_TAG_TD, td->v.element.tag); EXPECT_EQ(GUMBO_NAMESPACE_MATHML, td->v.element.tag_namespace); ASSERT_EQ(0, GetChildCount(td)); } TEST_F(GumboParserTest, SelectInForeignContent) { Parse("