parser.cc 63 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957
  1. // Copyright 2011 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // Author: jdtang@google.com (Jonathan Tang)
  16. #include "gumbo.h"
  17. #include <string>
  18. #include "gtest/gtest.h"
  19. #include "test_utils.h"
  20. namespace {
  21. class GumboParserTest : public ::testing::Test {
  22. protected:
  23. GumboParserTest()
  24. : options_(kGumboDefaultOptions), output_(NULL), root_(NULL) {
  25. InitLeakDetection(&options_, &malloc_stats_);
  26. }
  27. virtual ~GumboParserTest() {
  28. if (output_) {
  29. gumbo_destroy_output(&options_, output_);
  30. }
  31. EXPECT_EQ(malloc_stats_.objects_allocated, malloc_stats_.objects_freed);
  32. }
  33. virtual void Parse(const char* input) {
  34. if (output_) {
  35. gumbo_destroy_output(&options_, output_);
  36. }
  37. output_ = gumbo_parse_with_options(&options_, input, strlen(input));
  38. // The naming inconsistency is because these tests were initially written
  39. // when gumbo_parse returned the document element instead of an GumboOutput
  40. // structure.
  41. root_ = output_->document;
  42. }
  43. virtual void ParseFragment(
  44. const char* input, GumboTag context, GumboNamespaceEnum context_ns) {
  45. if (output_) {
  46. gumbo_destroy_output(&options_, output_);
  47. }
  48. options_.fragment_context = context;
  49. options_.fragment_namespace = context_ns;
  50. output_ = gumbo_parse_with_options(&options_, input, strlen(input));
  51. root_ = output_->document;
  52. }
  53. virtual void Parse(const std::string& input) {
  54. // This overload is so we can test/demonstrate that computing offsets from
  55. // the .data() member of an STL string works properly.
  56. if (output_) {
  57. gumbo_destroy_output(&options_, output_);
  58. }
  59. output_ = gumbo_parse_with_options(&options_, input.data(), input.length());
  60. root_ = output_->document;
  61. SanityCheckPointers(input.data(), input.length(), output_->root, 1000);
  62. }
  63. MallocStats malloc_stats_;
  64. GumboOptions options_;
  65. GumboOutput* output_;
  66. GumboNode* root_;
  67. };
  68. TEST_F(GumboParserTest, NullDocument) {
  69. Parse("");
  70. ASSERT_TRUE(root_);
  71. ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
  72. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER, root_->parse_flags);
  73. GumboNode* body;
  74. GetAndAssertBody(root_, &body);
  75. }
  76. TEST_F(GumboParserTest, ParseTwice) {
  77. Parse("");
  78. ASSERT_TRUE(root_);
  79. ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
  80. std::string second_input("");
  81. Parse(second_input);
  82. ASSERT_TRUE(root_);
  83. ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
  84. GumboNode* body;
  85. GetAndAssertBody(root_, &body);
  86. }
  87. TEST_F(GumboParserTest, OneChar) {
  88. std::string input("T");
  89. Parse(input);
  90. ASSERT_TRUE(root_);
  91. ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
  92. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER, root_->parse_flags);
  93. ASSERT_EQ(1, GetChildCount(root_));
  94. GumboNode* html = GetChild(root_, 0);
  95. ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  96. EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_BY_PARSER);
  97. EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_IMPLICIT_END_TAG);
  98. EXPECT_TRUE(html->parse_flags & GUMBO_INSERTION_IMPLIED);
  99. EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
  100. ASSERT_EQ(2, GetChildCount(html));
  101. GumboNode* head = GetChild(html, 0);
  102. ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
  103. EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
  104. EXPECT_EQ(0, GetChildCount(head));
  105. GumboNode* body = GetChild(html, 1);
  106. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  107. EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
  108. ASSERT_EQ(1, GetChildCount(body));
  109. EXPECT_EQ(1, body->v.element.start_pos.line);
  110. EXPECT_EQ(1, body->v.element.start_pos.column);
  111. EXPECT_EQ(0, body->v.element.start_pos.offset);
  112. EXPECT_EQ(1, body->v.element.end_pos.line);
  113. EXPECT_EQ(2, body->v.element.end_pos.column);
  114. EXPECT_EQ(1, body->v.element.end_pos.offset);
  115. GumboNode* text = GetChild(body, 0);
  116. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  117. EXPECT_STREQ("T", text->v.text.text);
  118. EXPECT_EQ(1, text->v.text.start_pos.line);
  119. EXPECT_EQ(1, text->v.text.start_pos.column);
  120. EXPECT_EQ(0, text->v.text.start_pos.offset);
  121. EXPECT_EQ(input.data(), text->v.text.original_text.data);
  122. EXPECT_EQ(1, text->v.text.original_text.length);
  123. }
  124. TEST_F(GumboParserTest, TextOnly) {
  125. Parse("Test");
  126. EXPECT_EQ(1, output_->errors.length); // No doctype.
  127. ASSERT_EQ(1, GetChildCount(root_));
  128. GumboNode* html = GetChild(root_, 0);
  129. ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  130. EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
  131. ASSERT_EQ(2, GetChildCount(html));
  132. GumboNode* head = GetChild(html, 0);
  133. ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
  134. EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
  135. EXPECT_EQ(0, GetChildCount(head));
  136. GumboNode* body = GetChild(html, 1);
  137. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  138. EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
  139. ASSERT_EQ(1, GetChildCount(body));
  140. GumboNode* text = GetChild(body, 0);
  141. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  142. EXPECT_STREQ("Test", text->v.text.text);
  143. }
  144. TEST_F(GumboParserTest, SelfClosingTagError) {
  145. Parse("<div/>");
  146. // TODO(jdtang): I think this is double-counting some error cases, I think we
  147. // may ultimately want to de-dup errors that occur on the same token.
  148. EXPECT_EQ(8, output_->errors.length);
  149. }
  150. TEST_F(GumboParserTest, UnexpectedEndBreak) {
  151. Parse("</br><div></div>");
  152. GumboNode* body;
  153. GetAndAssertBody(root_, &body);
  154. ASSERT_EQ(2, GetChildCount(body));
  155. GumboNode* br = GetChild(body, 0);
  156. ASSERT_EQ(GUMBO_NODE_ELEMENT, br->type);
  157. EXPECT_EQ(GUMBO_TAG_BR, br->v.element.tag);
  158. ASSERT_EQ(0, GetChildCount(br));
  159. GumboNode* div = GetChild(body, 1);
  160. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  161. EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag);
  162. ASSERT_EQ(0, GetChildCount(div));
  163. }
  164. TEST_F(GumboParserTest, CaseSensitiveAttributes) {
  165. Parse("<div class=CamelCase>");
  166. GumboNode* body;
  167. GetAndAssertBody(root_, &body);
  168. ASSERT_EQ(1, GetChildCount(body));
  169. GumboNode* div = GetChild(body, 0);
  170. GumboVector* attributes = &div->v.element.attributes;
  171. ASSERT_EQ(1, attributes->length);
  172. GumboAttribute* clas = static_cast<GumboAttribute*>(attributes->data[0]);
  173. EXPECT_EQ(GUMBO_ATTR_NAMESPACE_NONE, clas->attr_namespace);
  174. EXPECT_STREQ("class", clas->name);
  175. EXPECT_STREQ("CamelCase", clas->value);
  176. }
  177. TEST_F(GumboParserTest, ExplicitHtmlStructure) {
  178. Parse(
  179. "<!doctype html>\n<html>"
  180. "<head><title>Foo</title></head>\n"
  181. "<body><div class=bar>Test</div></body></html>");
  182. ASSERT_EQ(1, GetChildCount(root_));
  183. EXPECT_EQ(0, output_->errors.length);
  184. ASSERT_EQ(GUMBO_NODE_DOCUMENT, root_->type);
  185. EXPECT_STREQ("html", root_->v.document.name);
  186. EXPECT_STREQ("", root_->v.document.public_identifier);
  187. EXPECT_STREQ("", root_->v.document.system_identifier);
  188. GumboNode* html = GetChild(root_, 0);
  189. ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  190. EXPECT_EQ(GUMBO_INSERTION_NORMAL, html->parse_flags);
  191. EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
  192. EXPECT_EQ(2, html->v.element.start_pos.line);
  193. EXPECT_EQ(1, html->v.element.start_pos.column);
  194. EXPECT_EQ(16, html->v.element.start_pos.offset);
  195. EXPECT_EQ(3, html->v.element.end_pos.line);
  196. EXPECT_EQ(39, html->v.element.end_pos.column);
  197. EXPECT_EQ(92, html->v.element.end_pos.offset);
  198. EXPECT_EQ("<html>", ToString(html->v.element.original_tag));
  199. EXPECT_EQ("</html>", ToString(html->v.element.original_end_tag));
  200. ASSERT_EQ(3, GetChildCount(html));
  201. GumboNode* head = GetChild(html, 0);
  202. EXPECT_EQ(GUMBO_INSERTION_NORMAL, head->parse_flags);
  203. ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
  204. EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
  205. EXPECT_EQ(html, head->parent);
  206. EXPECT_EQ(0, head->index_within_parent);
  207. EXPECT_EQ(1, GetChildCount(head));
  208. GumboNode* body = GetChild(html, 2);
  209. EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags);
  210. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  211. EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
  212. EXPECT_EQ(html, body->parent);
  213. EXPECT_EQ(3, body->v.element.start_pos.line);
  214. EXPECT_EQ(1, body->v.element.start_pos.column);
  215. EXPECT_EQ(54, body->v.element.start_pos.offset);
  216. EXPECT_EQ(3, body->v.element.end_pos.line);
  217. EXPECT_EQ(32, body->v.element.end_pos.column);
  218. EXPECT_EQ(85, body->v.element.end_pos.offset);
  219. EXPECT_EQ("<body>", ToString(body->v.element.original_tag));
  220. EXPECT_EQ("</body>", ToString(body->v.element.original_end_tag));
  221. EXPECT_EQ(2, body->index_within_parent);
  222. ASSERT_EQ(1, GetChildCount(body));
  223. GumboNode* div = GetChild(body, 0);
  224. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  225. EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag);
  226. EXPECT_EQ(body, div->parent);
  227. EXPECT_EQ(0, div->index_within_parent);
  228. ASSERT_EQ(1, GetChildCount(div));
  229. ASSERT_EQ(1, GetAttributeCount(div));
  230. GumboAttribute* clas = GetAttribute(div, 0);
  231. EXPECT_STREQ("class", clas->name);
  232. EXPECT_EQ("class", ToString(clas->original_name));
  233. EXPECT_STREQ("bar", clas->value);
  234. EXPECT_EQ("bar", ToString(clas->original_value));
  235. GumboNode* text = GetChild(div, 0);
  236. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  237. EXPECT_STREQ("Test", text->v.text.text);
  238. }
  239. TEST_F(GumboParserTest, Whitespace) {
  240. Parse("<ul>\n <li>Text\n</ul>");
  241. GumboNode* body;
  242. GetAndAssertBody(root_, &body);
  243. ASSERT_EQ(1, GetChildCount(body));
  244. GumboNode* ul = GetChild(body, 0);
  245. ASSERT_EQ(GUMBO_NODE_ELEMENT, ul->type);
  246. EXPECT_EQ(GUMBO_TAG_UL, ul->v.element.tag);
  247. ASSERT_EQ(2, GetChildCount(ul));
  248. GumboNode* whitespace = GetChild(ul, 0);
  249. ASSERT_EQ(GUMBO_NODE_WHITESPACE, whitespace->type);
  250. EXPECT_STREQ("\n ", whitespace->v.text.text);
  251. GumboNode* li = GetChild(ul, 1);
  252. ASSERT_EQ(GUMBO_NODE_ELEMENT, li->type);
  253. EXPECT_EQ(GUMBO_TAG_LI, li->v.element.tag);
  254. ASSERT_EQ(1, GetChildCount(li));
  255. GumboNode* text = GetChild(li, 0);
  256. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  257. EXPECT_STREQ("Text\n", text->v.text.text);
  258. }
  259. TEST_F(GumboParserTest, DuplicateAttributes) {
  260. std::string text("<input checked=\"false\" checked=true id=foo id='bar'>");
  261. Parse(text);
  262. GumboNode* body;
  263. GetAndAssertBody(root_, &body);
  264. ASSERT_EQ(1, GetChildCount(body));
  265. GumboNode* input = GetChild(body, 0);
  266. EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, input->parse_flags);
  267. ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
  268. EXPECT_EQ(GUMBO_TAG_INPUT, input->v.element.tag);
  269. EXPECT_EQ(0, GetChildCount(input));
  270. ASSERT_EQ(2, GetAttributeCount(input));
  271. GumboAttribute* checked = GetAttribute(input, 0);
  272. EXPECT_STREQ("checked", checked->name);
  273. EXPECT_STREQ("false", checked->value);
  274. EXPECT_EQ(1, checked->name_start.line);
  275. EXPECT_EQ(8, checked->name_start.column);
  276. EXPECT_EQ(15, checked->name_end.column);
  277. EXPECT_EQ(16, checked->value_start.column);
  278. EXPECT_EQ(23, checked->value_end.column);
  279. EXPECT_EQ(7, checked->original_name.data - text.data());
  280. EXPECT_EQ(7, checked->original_name.length);
  281. EXPECT_EQ(15, checked->original_value.data - text.data());
  282. EXPECT_EQ(7, checked->original_value.length);
  283. GumboAttribute* id = GetAttribute(input, 1);
  284. EXPECT_STREQ("id", id->name);
  285. EXPECT_STREQ("foo", id->value);
  286. // TODO(jdtang): Run some assertions on the parse error that's added.
  287. }
  288. TEST_F(GumboParserTest, LinkTagsInHead) {
  289. Parse(
  290. "<html>\n"
  291. " <head>\n"
  292. " <title>Sample title></title>\n\n"
  293. " <link rel=stylesheet>\n"
  294. " <link rel=author>\n"
  295. " </head>\n"
  296. " <body>Foo</body>");
  297. ASSERT_EQ(1, GetChildCount(root_));
  298. GumboNode* html = GetChild(root_, 0);
  299. ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  300. EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, html->parse_flags);
  301. EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
  302. ASSERT_EQ(3, GetChildCount(html));
  303. GumboNode* head = GetChild(html, 0);
  304. ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
  305. EXPECT_EQ(GUMBO_INSERTION_NORMAL, head->parse_flags);
  306. EXPECT_EQ(GUMBO_TAG_HEAD, head->v.element.tag);
  307. EXPECT_EQ(7, GetChildCount(head));
  308. GumboNode* text1 = GetChild(head, 2);
  309. ASSERT_EQ(GUMBO_NODE_WHITESPACE, text1->type);
  310. EXPECT_STREQ("\n\n ", text1->v.text.text);
  311. GumboNode* link1 = GetChild(head, 3);
  312. ASSERT_EQ(GUMBO_NODE_ELEMENT, link1->type);
  313. EXPECT_EQ(GUMBO_TAG_LINK, link1->v.element.tag);
  314. EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, link1->parse_flags);
  315. EXPECT_EQ(0, GetChildCount(link1));
  316. GumboNode* text2 = GetChild(head, 4);
  317. ASSERT_EQ(GUMBO_NODE_WHITESPACE, text2->type);
  318. EXPECT_STREQ("\n ", text2->v.text.text);
  319. GumboNode* link2 = GetChild(head, 5);
  320. ASSERT_EQ(GUMBO_NODE_ELEMENT, link2->type);
  321. EXPECT_EQ(GUMBO_TAG_LINK, link2->v.element.tag);
  322. EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, link2->parse_flags);
  323. EXPECT_EQ(0, GetChildCount(link2));
  324. GumboNode* text3 = GetChild(head, 6);
  325. ASSERT_EQ(GUMBO_NODE_WHITESPACE, text3->type);
  326. EXPECT_STREQ("\n ", text3->v.text.text);
  327. GumboNode* body = GetChild(html, 2);
  328. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  329. EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags);
  330. EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
  331. ASSERT_EQ(1, GetChildCount(body));
  332. }
  333. TEST_F(GumboParserTest, WhitespaceBeforeHtml) {
  334. Parse("<!doctype html>\n<html>Test</html>");
  335. ASSERT_EQ(1, GetChildCount(root_));
  336. GumboNode* body = GetChild(GetChild(root_, 0), 1);
  337. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  338. EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
  339. ASSERT_EQ(1, GetChildCount(body));
  340. GumboNode* text = GetChild(body, 0);
  341. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  342. EXPECT_STREQ("Test", text->v.text.text);
  343. }
  344. TEST_F(GumboParserTest, TextAfterHtml) {
  345. Parse("<html>Test</html> after doc");
  346. GumboNode* body;
  347. GetAndAssertBody(root_, &body);
  348. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  349. EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
  350. ASSERT_EQ(1, GetChildCount(body));
  351. GumboNode* text = GetChild(body, 0);
  352. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  353. EXPECT_STREQ("Test after doc", text->v.text.text);
  354. }
  355. TEST_F(GumboParserTest, WhitespaceInHead) {
  356. Parse("<html> Test</html>");
  357. GumboNode* html = GetChild(root_, 0);
  358. EXPECT_EQ(GUMBO_NODE_ELEMENT, html->type);
  359. EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html));
  360. EXPECT_EQ(2, GetChildCount(html));
  361. GumboNode* head = GetChild(html, 0);
  362. EXPECT_EQ(GUMBO_NODE_ELEMENT, head->type);
  363. EXPECT_EQ(GUMBO_TAG_HEAD, GetTag(head));
  364. EXPECT_EQ(0, GetChildCount(head));
  365. GumboNode* body = GetChild(html, 1);
  366. EXPECT_EQ(GUMBO_NODE_ELEMENT, body->type);
  367. EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
  368. GumboNode* text = GetChild(body, 0);
  369. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  370. EXPECT_STREQ("Test", text->v.text.text);
  371. }
  372. TEST_F(GumboParserTest, Doctype) {
  373. Parse("<!doctype html>Test");
  374. GumboDocument* doc = &root_->v.document;
  375. EXPECT_EQ(1, doc->children.length);
  376. EXPECT_EQ(GUMBO_DOCTYPE_NO_QUIRKS, doc->doc_type_quirks_mode);
  377. EXPECT_STREQ("html", doc->name);
  378. EXPECT_STREQ("", doc->public_identifier);
  379. EXPECT_STREQ("", doc->system_identifier);
  380. }
  381. TEST_F(GumboParserTest, InvalidDoctype) {
  382. Parse("Test<!doctype root_element SYSTEM \"DTD_location\">");
  383. // Default doc token; the declared one is ignored.
  384. GumboDocument* doc = &root_->v.document;
  385. EXPECT_EQ(1, doc->children.length);
  386. EXPECT_EQ(GUMBO_DOCTYPE_QUIRKS, doc->doc_type_quirks_mode);
  387. EXPECT_STREQ("", doc->name);
  388. EXPECT_STREQ("", doc->public_identifier);
  389. EXPECT_STREQ("", doc->system_identifier);
  390. GumboNode* body;
  391. GetAndAssertBody(root_, &body);
  392. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  393. EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
  394. ASSERT_EQ(1, GetChildCount(body));
  395. GumboNode* text = GetChild(body, 0);
  396. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  397. EXPECT_STREQ("Test", text->v.text.text);
  398. }
  399. TEST_F(GumboParserTest, SingleComment) {
  400. Parse("<!-- comment -->");
  401. GumboNode* comment = GetChild(root_, 0);
  402. ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type);
  403. EXPECT_STREQ(" comment ", comment->v.text.text);
  404. }
  405. TEST_F(GumboParserTest, CommentInText) {
  406. Parse("Start <!-- comment --> end");
  407. GumboNode* body;
  408. GetAndAssertBody(root_, &body);
  409. ASSERT_EQ(3, GetChildCount(body));
  410. GumboNode* start = GetChild(body, 0);
  411. ASSERT_EQ(GUMBO_NODE_TEXT, start->type);
  412. EXPECT_STREQ("Start ", start->v.text.text);
  413. GumboNode* comment = GetChild(body, 1);
  414. ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type);
  415. EXPECT_EQ(body, comment->parent);
  416. EXPECT_EQ(1, comment->index_within_parent);
  417. EXPECT_STREQ(" comment ", comment->v.text.text);
  418. GumboNode* end = GetChild(body, 2);
  419. ASSERT_EQ(GUMBO_NODE_TEXT, end->type);
  420. EXPECT_STREQ(" end", end->v.text.text);
  421. }
  422. TEST_F(GumboParserTest, CommentBeforeNode) {
  423. Parse("<!--This is a comment-->\n<h1>hello world!</h1>");
  424. GumboNode* comment = GetChild(root_, 0);
  425. ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type);
  426. EXPECT_STREQ("This is a comment", comment->v.text.text);
  427. EXPECT_EQ(
  428. "<!--This is a comment-->", ToString(comment->v.text.original_text));
  429. // Newline is ignored per the rules for "initial" insertion mode.
  430. GumboNode* body;
  431. GetAndAssertBody(root_, &body);
  432. ASSERT_EQ(1, GetChildCount(body));
  433. GumboNode* h1 = GetChild(body, 0);
  434. ASSERT_EQ(GUMBO_NODE_ELEMENT, h1->type);
  435. EXPECT_EQ(GUMBO_TAG_H1, h1->v.element.tag);
  436. }
  437. TEST_F(GumboParserTest, CommentInVerbatimMode) {
  438. Parse("<body> <div id='onegoogle'>Text</div> </body><!-- comment \n\n-->");
  439. GumboNode* html = GetChild(root_, 0);
  440. EXPECT_EQ(GUMBO_NODE_ELEMENT, html->type);
  441. EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html));
  442. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLIED |
  443. GUMBO_INSERTION_IMPLICIT_END_TAG,
  444. html->parse_flags);
  445. EXPECT_EQ(3, GetChildCount(html));
  446. GumboNode* body = GetChild(html, 1);
  447. EXPECT_EQ(GUMBO_NODE_ELEMENT, body->type);
  448. EXPECT_EQ(GUMBO_TAG_BODY, GetTag(body));
  449. EXPECT_EQ(GUMBO_INSERTION_NORMAL, body->parse_flags);
  450. EXPECT_EQ(3, GetChildCount(body));
  451. GumboNode* comment = GetChild(html, 2);
  452. ASSERT_EQ(GUMBO_NODE_COMMENT, comment->type);
  453. EXPECT_EQ(GUMBO_INSERTION_NORMAL, comment->parse_flags);
  454. EXPECT_STREQ(" comment \n\n", comment->v.text.text);
  455. }
  456. TEST_F(GumboParserTest, UnknownTag) {
  457. Parse("<foo>1<p>2</FOO>");
  458. GumboNode* body;
  459. GetAndAssertBody(root_, &body);
  460. ASSERT_EQ(1, GetChildCount(body));
  461. GumboNode* foo = GetChild(body, 0);
  462. ASSERT_EQ(GUMBO_NODE_ELEMENT, foo->type);
  463. EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(foo));
  464. EXPECT_EQ("<foo>", ToString(foo->v.element.original_tag));
  465. // According to the spec, the misplaced end tag is ignored, and so we return
  466. // an empty original_end_tag text. We may want to extend our error-reporting
  467. // a bit so that we close off the tag that it *would have closed*, had the
  468. // HTML been correct, along with a parse flag that says the end tag was in the
  469. // wrong place.
  470. EXPECT_EQ("", ToString(foo->v.element.original_end_tag));
  471. }
  472. TEST_F(GumboParserTest, UnknownTag2) {
  473. Parse("<div><sarcasm><div></div></sarcasm></div>");
  474. GumboNode* body;
  475. GetAndAssertBody(root_, &body);
  476. ASSERT_EQ(1, GetChildCount(body));
  477. GumboNode* div = GetChild(body, 0);
  478. ASSERT_EQ(1, GetChildCount(div));
  479. GumboNode* sarcasm = GetChild(div, 0);
  480. ASSERT_EQ(GUMBO_NODE_ELEMENT, sarcasm->type);
  481. EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(sarcasm));
  482. EXPECT_EQ("<sarcasm>", ToString(sarcasm->v.element.original_tag));
  483. EXPECT_EQ("</sarcasm>", ToString(sarcasm->v.element.original_end_tag));
  484. }
  485. TEST_F(GumboParserTest, InvalidEndTag) {
  486. Parse("<a><img src=foo.jpg></img></a>");
  487. GumboNode* body;
  488. GetAndAssertBody(root_, &body);
  489. ASSERT_EQ(1, GetChildCount(body));
  490. GumboNode* a = GetChild(body, 0);
  491. ASSERT_EQ(GUMBO_NODE_ELEMENT, a->type);
  492. EXPECT_EQ(GUMBO_TAG_A, GetTag(a));
  493. ASSERT_EQ(1, GetChildCount(a));
  494. GumboNode* img = GetChild(a, 0);
  495. ASSERT_EQ(GUMBO_NODE_ELEMENT, img->type);
  496. EXPECT_EQ(GUMBO_TAG_IMG, GetTag(img));
  497. ASSERT_EQ(0, GetChildCount(img));
  498. }
  499. TEST_F(GumboParserTest, Tables) {
  500. Parse(
  501. "<html><table>\n"
  502. " <tr><br /></invalid-tag>\n"
  503. " <th>One</th>\n"
  504. " <td>Two</td>\n"
  505. " </tr>\n"
  506. " <iframe></iframe>"
  507. "</table><tr></tr><div></div></html>");
  508. GumboNode* body;
  509. GetAndAssertBody(root_, &body);
  510. ASSERT_EQ(4, GetChildCount(body));
  511. GumboNode* br = GetChild(body, 0);
  512. ASSERT_EQ(GUMBO_NODE_ELEMENT, br->type);
  513. EXPECT_EQ(GUMBO_TAG_BR, GetTag(br));
  514. EXPECT_EQ(body, br->parent);
  515. EXPECT_EQ(0, br->index_within_parent);
  516. ASSERT_EQ(0, GetChildCount(br));
  517. GumboNode* iframe = GetChild(body, 1);
  518. ASSERT_EQ(GUMBO_NODE_ELEMENT, iframe->type);
  519. EXPECT_EQ(GUMBO_TAG_IFRAME, GetTag(iframe));
  520. ASSERT_EQ(0, GetChildCount(iframe));
  521. GumboNode* table = GetChild(body, 2);
  522. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  523. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
  524. EXPECT_EQ(body, table->parent);
  525. EXPECT_EQ(2, table->index_within_parent);
  526. ASSERT_EQ(2, GetChildCount(table));
  527. GumboNode* table_text = GetChild(table, 0);
  528. ASSERT_EQ(GUMBO_NODE_WHITESPACE, table_text->type);
  529. EXPECT_STREQ("\n ", table_text->v.text.text);
  530. GumboNode* tbody = GetChild(table, 1);
  531. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
  532. EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody));
  533. ASSERT_EQ(2, GetChildCount(tbody));
  534. // Second node is whitespace.
  535. GumboNode* tr = GetChild(tbody, 0);
  536. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
  537. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr));
  538. ASSERT_EQ(5, GetChildCount(tr)); // Including whitespace.
  539. GumboNode* tr_text = GetChild(tr, 0);
  540. ASSERT_EQ(GUMBO_NODE_WHITESPACE, tr_text->type);
  541. EXPECT_EQ(tr, tr_text->parent);
  542. EXPECT_EQ(0, tr_text->index_within_parent);
  543. EXPECT_STREQ("\n ", tr_text->v.text.text);
  544. GumboNode* th = GetChild(tr, 1);
  545. ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type);
  546. EXPECT_EQ(GUMBO_TAG_TH, GetTag(th));
  547. EXPECT_EQ(tr, th->parent);
  548. EXPECT_EQ(1, th->index_within_parent);
  549. ASSERT_EQ(1, GetChildCount(th));
  550. GumboNode* th_text = GetChild(th, 0);
  551. ASSERT_EQ(GUMBO_NODE_TEXT, th_text->type);
  552. EXPECT_STREQ("One", th_text->v.text.text);
  553. GumboNode* td = GetChild(tr, 3);
  554. ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type);
  555. EXPECT_EQ(GUMBO_TAG_TD, GetTag(td));
  556. ASSERT_EQ(1, GetChildCount(td));
  557. GumboNode* td_text = GetChild(td, 0);
  558. ASSERT_EQ(GUMBO_NODE_TEXT, td_text->type);
  559. EXPECT_STREQ("Two", td_text->v.text.text);
  560. GumboNode* td2_text = GetChild(td, 0);
  561. ASSERT_EQ(GUMBO_NODE_TEXT, td2_text->type);
  562. EXPECT_STREQ("Two", td2_text->v.text.text);
  563. GumboNode* div = GetChild(body, 3);
  564. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  565. EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
  566. ASSERT_EQ(0, GetChildCount(div));
  567. }
  568. TEST_F(GumboParserTest, StartParagraphInTable) {
  569. Parse("<table><P></tr></td>foo</table>");
  570. GumboNode* body;
  571. GetAndAssertBody(root_, &body);
  572. ASSERT_EQ(2, GetChildCount(body));
  573. GumboNode* paragraph = GetChild(body, 0);
  574. ASSERT_EQ(GUMBO_NODE_ELEMENT, paragraph->type);
  575. EXPECT_EQ(GUMBO_TAG_P, GetTag(paragraph));
  576. EXPECT_EQ(body, paragraph->parent);
  577. EXPECT_EQ(0, paragraph->index_within_parent);
  578. ASSERT_EQ(1, GetChildCount(paragraph));
  579. GumboNode* text = GetChild(paragraph, 0);
  580. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  581. EXPECT_STREQ("foo", text->v.text.text);
  582. GumboNode* table = GetChild(body, 1);
  583. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  584. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
  585. EXPECT_EQ(body, table->parent);
  586. EXPECT_EQ(1, table->index_within_parent);
  587. ASSERT_EQ(0, GetChildCount(table));
  588. }
  589. TEST_F(GumboParserTest, EndParagraphInTable) {
  590. Parse("<table></p></table>");
  591. GumboNode* body;
  592. GetAndAssertBody(root_, &body);
  593. ASSERT_EQ(2, GetChildCount(body));
  594. GumboNode* paragraph = GetChild(body, 0);
  595. ASSERT_EQ(GUMBO_NODE_ELEMENT, paragraph->type);
  596. EXPECT_EQ(GUMBO_TAG_P, GetTag(paragraph));
  597. EXPECT_EQ(body, paragraph->parent);
  598. EXPECT_EQ(0, paragraph->index_within_parent);
  599. ASSERT_EQ(0, GetChildCount(paragraph));
  600. GumboNode* table = GetChild(body, 1);
  601. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  602. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
  603. EXPECT_EQ(body, table->parent);
  604. EXPECT_EQ(1, table->index_within_parent);
  605. ASSERT_EQ(0, GetChildCount(table));
  606. }
  607. TEST_F(GumboParserTest, UnknownTagInTable) {
  608. Parse("<table><foo>bar</table>");
  609. GumboNode* body;
  610. GetAndAssertBody(root_, &body);
  611. ASSERT_EQ(2, GetChildCount(body));
  612. GumboNode* foo = GetChild(body, 0);
  613. ASSERT_EQ(GUMBO_NODE_ELEMENT, foo->type);
  614. EXPECT_EQ(GUMBO_TAG_UNKNOWN, GetTag(foo));
  615. EXPECT_EQ("<foo>", ToString(foo->v.element.original_tag));
  616. EXPECT_EQ(body, foo->parent);
  617. EXPECT_EQ(0, foo->index_within_parent);
  618. ASSERT_EQ(1, GetChildCount(foo));
  619. GumboNode* bar = GetChild(foo, 0);
  620. ASSERT_EQ(GUMBO_NODE_TEXT, bar->type);
  621. EXPECT_STREQ("bar", bar->v.text.text);
  622. GumboNode* table = GetChild(body, 1);
  623. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  624. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
  625. EXPECT_EQ(body, table->parent);
  626. EXPECT_EQ(1, table->index_within_parent);
  627. ASSERT_EQ(0, GetChildCount(table));
  628. }
  629. TEST_F(GumboParserTest, UnclosedTableTags) {
  630. Parse(
  631. "<html><table>\n"
  632. " <tr>\n"
  633. " <td>One\n"
  634. " <td>Two\n"
  635. " <tr><td>Row2\n"
  636. " <tr><td>Row3\n"
  637. "</table>\n"
  638. "</html>");
  639. GumboNode* body;
  640. GetAndAssertBody(root_, &body);
  641. ASSERT_EQ(2, GetChildCount(body));
  642. GumboNode* table = GetChild(body, 0);
  643. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  644. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
  645. ASSERT_EQ(2, GetChildCount(table));
  646. GumboNode* table_text = GetChild(table, 0);
  647. ASSERT_EQ(GUMBO_NODE_WHITESPACE, table_text->type);
  648. EXPECT_STREQ("\n ", table_text->v.text.text);
  649. GumboNode* tbody = GetChild(table, 1);
  650. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
  651. EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody));
  652. ASSERT_EQ(3, GetChildCount(tbody));
  653. GumboNode* tr = GetChild(tbody, 0);
  654. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
  655. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr));
  656. ASSERT_EQ(3, GetChildCount(tr));
  657. GumboNode* tr_text = GetChild(tr, 0);
  658. ASSERT_EQ(GUMBO_NODE_WHITESPACE, tr_text->type);
  659. EXPECT_STREQ("\n ", tr_text->v.text.text);
  660. GumboNode* td1 = GetChild(tr, 1);
  661. ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type);
  662. EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1));
  663. ASSERT_EQ(1, GetChildCount(td1));
  664. GumboNode* td1_text = GetChild(td1, 0);
  665. ASSERT_EQ(GUMBO_NODE_TEXT, td1_text->type);
  666. EXPECT_STREQ("One\n ", td1_text->v.text.text);
  667. GumboNode* td2 = GetChild(tr, 2);
  668. ASSERT_EQ(GUMBO_NODE_ELEMENT, td2->type);
  669. EXPECT_EQ(GUMBO_TAG_TD, GetTag(td2));
  670. ASSERT_EQ(1, GetChildCount(td2));
  671. GumboNode* td2_text = GetChild(td2, 0);
  672. ASSERT_EQ(GUMBO_NODE_TEXT, td2_text->type);
  673. EXPECT_STREQ("Two\n ", td2_text->v.text.text);
  674. GumboNode* tr3 = GetChild(tbody, 2);
  675. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr3->type);
  676. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr3));
  677. ASSERT_EQ(1, GetChildCount(tr3));
  678. GumboNode* body_text = GetChild(body, 1);
  679. ASSERT_EQ(GUMBO_NODE_WHITESPACE, body_text->type);
  680. EXPECT_STREQ("\n", body_text->v.text.text);
  681. }
  682. TEST_F(GumboParserTest, MisnestedTable) {
  683. Parse("<table><tr><div><td></div></table>");
  684. GumboNode* body;
  685. GetAndAssertBody(root_, &body);
  686. ASSERT_EQ(2, GetChildCount(body));
  687. GumboNode* div = GetChild(body, 0);
  688. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  689. EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
  690. ASSERT_EQ(0, GetChildCount(div));
  691. GumboNode* table = GetChild(body, 1);
  692. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  693. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
  694. ASSERT_EQ(1, GetChildCount(table));
  695. GumboNode* tbody = GetChild(table, 0);
  696. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
  697. EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody));
  698. ASSERT_EQ(1, GetChildCount(tbody));
  699. GumboNode* tr = GetChild(tbody, 0);
  700. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
  701. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr));
  702. ASSERT_EQ(1, GetChildCount(tr));
  703. GumboNode* td = GetChild(tr, 0);
  704. ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type);
  705. EXPECT_EQ(GUMBO_TAG_TD, GetTag(td));
  706. ASSERT_EQ(0, GetChildCount(td));
  707. }
  708. TEST_F(GumboParserTest, MisnestedTable2) {
  709. Parse("<table><td>Cell1<table><th>Cell2<tr>Cell3</table>");
  710. GumboNode* body;
  711. GetAndAssertBody(root_, &body);
  712. ASSERT_EQ(1, GetChildCount(body));
  713. GumboNode* table1 = GetChild(body, 0);
  714. ASSERT_EQ(GUMBO_NODE_ELEMENT, table1->type);
  715. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table1));
  716. ASSERT_EQ(1, GetChildCount(table1));
  717. GumboNode* tbody1 = GetChild(table1, 0);
  718. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody1->type);
  719. EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody1));
  720. ASSERT_EQ(1, GetChildCount(tbody1));
  721. GumboNode* tr1 = GetChild(tbody1, 0);
  722. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr1->type);
  723. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr1));
  724. ASSERT_EQ(1, GetChildCount(tr1));
  725. GumboNode* td1 = GetChild(tr1, 0);
  726. ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type);
  727. EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1));
  728. ASSERT_EQ(3, GetChildCount(td1));
  729. GumboNode* cell1 = GetChild(td1, 0);
  730. ASSERT_EQ(GUMBO_NODE_TEXT, cell1->type);
  731. EXPECT_STREQ("Cell1", cell1->v.text.text);
  732. // Foster-parented out of the inner <tr>
  733. GumboNode* cell3 = GetChild(td1, 1);
  734. ASSERT_EQ(GUMBO_NODE_TEXT, cell3->type);
  735. EXPECT_STREQ("Cell3", cell3->v.text.text);
  736. GumboNode* table2 = GetChild(td1, 2);
  737. ASSERT_EQ(GUMBO_NODE_ELEMENT, table2->type);
  738. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table2));
  739. ASSERT_EQ(1, GetChildCount(table2));
  740. GumboNode* tbody2 = GetChild(table2, 0);
  741. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody2->type);
  742. EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody2));
  743. ASSERT_EQ(2, GetChildCount(tbody2));
  744. GumboNode* tr2 = GetChild(tbody2, 0);
  745. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr2->type);
  746. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr2));
  747. ASSERT_EQ(1, GetChildCount(tr2));
  748. GumboNode* th = GetChild(tr2, 0);
  749. ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type);
  750. EXPECT_EQ(GUMBO_TAG_TH, GetTag(th));
  751. ASSERT_EQ(1, GetChildCount(th));
  752. GumboNode* cell2 = GetChild(th, 0);
  753. ASSERT_EQ(GUMBO_NODE_TEXT, cell2->type);
  754. EXPECT_STREQ("Cell2", cell2->v.text.text);
  755. GumboNode* tr3 = GetChild(tbody2, 1);
  756. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr3->type);
  757. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr3));
  758. ASSERT_EQ(0, GetChildCount(tr3));
  759. }
  760. TEST_F(GumboParserTest, Select) {
  761. Parse("<select><option>One<option>Two</select><div></div>");
  762. GumboNode* body;
  763. GetAndAssertBody(root_, &body);
  764. ASSERT_EQ(2, GetChildCount(body));
  765. GumboNode* select = GetChild(body, 0);
  766. ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
  767. EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
  768. ASSERT_EQ(2, GetChildCount(select));
  769. GumboNode* option1 = GetChild(select, 0);
  770. ASSERT_EQ(GUMBO_NODE_ELEMENT, option1->type);
  771. EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option1));
  772. ASSERT_EQ(1, GetChildCount(option1));
  773. GumboNode* option2 = GetChild(select, 1);
  774. ASSERT_EQ(GUMBO_NODE_ELEMENT, option2->type);
  775. EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option2));
  776. ASSERT_EQ(1, GetChildCount(option2));
  777. GumboNode* div = GetChild(body, 1);
  778. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  779. EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
  780. ASSERT_EQ(0, GetChildCount(div));
  781. }
  782. TEST_F(GumboParserTest, ComplicatedSelect) {
  783. Parse(
  784. "<select><div class=foo></div><optgroup><option>Option"
  785. "</option><input></optgroup></select>");
  786. GumboNode* body;
  787. GetAndAssertBody(root_, &body);
  788. ASSERT_EQ(2, GetChildCount(body));
  789. GumboNode* select = GetChild(body, 0);
  790. ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
  791. EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
  792. ASSERT_EQ(1, GetChildCount(select));
  793. GumboNode* optgroup = GetChild(select, 0);
  794. ASSERT_EQ(GUMBO_NODE_ELEMENT, optgroup->type);
  795. EXPECT_EQ(GUMBO_TAG_OPTGROUP, GetTag(optgroup));
  796. ASSERT_EQ(1, GetChildCount(optgroup));
  797. GumboNode* option = GetChild(optgroup, 0);
  798. ASSERT_EQ(GUMBO_NODE_ELEMENT, option->type);
  799. EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option));
  800. ASSERT_EQ(1, GetChildCount(option));
  801. GumboNode* text = GetChild(option, 0);
  802. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  803. EXPECT_STREQ("Option", text->v.text.text);
  804. GumboNode* input = GetChild(body, 1);
  805. ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
  806. EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
  807. ASSERT_EQ(0, GetChildCount(input));
  808. }
  809. TEST_F(GumboParserTest, DoubleSelect) {
  810. Parse("<select><select><div></div>");
  811. GumboNode* body;
  812. GetAndAssertBody(root_, &body);
  813. ASSERT_EQ(2, GetChildCount(body));
  814. GumboNode* select = GetChild(body, 0);
  815. ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
  816. EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
  817. ASSERT_EQ(0, GetChildCount(select));
  818. GumboNode* div = GetChild(body, 1);
  819. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  820. EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
  821. ASSERT_EQ(0, GetChildCount(div));
  822. }
  823. TEST_F(GumboParserTest, InputInSelect) {
  824. Parse("<select><input /><div></div>");
  825. GumboNode* body;
  826. GetAndAssertBody(root_, &body);
  827. ASSERT_EQ(3, GetChildCount(body));
  828. GumboNode* select = GetChild(body, 0);
  829. ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
  830. EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
  831. ASSERT_EQ(0, GetChildCount(select));
  832. GumboNode* input = GetChild(body, 1);
  833. ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
  834. EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
  835. ASSERT_EQ(0, GetChildCount(input));
  836. GumboNode* div = GetChild(body, 2);
  837. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  838. EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
  839. ASSERT_EQ(0, GetChildCount(div));
  840. }
  841. TEST_F(GumboParserTest, SelectInTable) {
  842. Parse("<table><td><select><option value=1></table>");
  843. GumboNode* body;
  844. GetAndAssertBody(root_, &body);
  845. ASSERT_EQ(1, GetChildCount(body));
  846. GumboNode* table = GetChild(body, 0);
  847. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  848. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
  849. ASSERT_EQ(1, GetChildCount(table));
  850. GumboNode* tbody = GetChild(table, 0);
  851. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
  852. EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody));
  853. ASSERT_EQ(1, GetChildCount(tbody));
  854. GumboNode* tr = GetChild(tbody, 0);
  855. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
  856. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr));
  857. ASSERT_EQ(1, GetChildCount(tr));
  858. GumboNode* td = GetChild(tr, 0);
  859. ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type);
  860. EXPECT_EQ(GUMBO_TAG_TD, GetTag(td));
  861. ASSERT_EQ(1, GetChildCount(td));
  862. GumboNode* select = GetChild(td, 0);
  863. ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type);
  864. EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select));
  865. ASSERT_EQ(1, GetChildCount(select));
  866. GumboNode* option = GetChild(select, 0);
  867. ASSERT_EQ(GUMBO_NODE_ELEMENT, option->type);
  868. EXPECT_EQ(GUMBO_TAG_OPTION, GetTag(option));
  869. ASSERT_EQ(0, GetChildCount(option));
  870. }
  871. TEST_F(GumboParserTest, ImplicitColgroup) {
  872. Parse("<table><col /><col /></table>");
  873. GumboNode* body;
  874. GetAndAssertBody(root_, &body);
  875. ASSERT_EQ(1, GetChildCount(body));
  876. GumboNode* table = GetChild(body, 0);
  877. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  878. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table));
  879. ASSERT_EQ(1, GetChildCount(table));
  880. GumboNode* colgroup = GetChild(table, 0);
  881. ASSERT_EQ(GUMBO_NODE_ELEMENT, colgroup->type);
  882. EXPECT_EQ(GUMBO_TAG_COLGROUP, GetTag(colgroup));
  883. ASSERT_EQ(2, GetChildCount(colgroup));
  884. GumboNode* col1 = GetChild(colgroup, 0);
  885. ASSERT_EQ(GUMBO_NODE_ELEMENT, col1->type);
  886. EXPECT_EQ(GUMBO_TAG_COL, GetTag(col1));
  887. ASSERT_EQ(0, GetChildCount(col1));
  888. GumboNode* col2 = GetChild(colgroup, 0);
  889. ASSERT_EQ(GUMBO_NODE_ELEMENT, col2->type);
  890. EXPECT_EQ(GUMBO_TAG_COL, GetTag(col2));
  891. ASSERT_EQ(0, GetChildCount(col2));
  892. }
  893. TEST_F(GumboParserTest, Form) {
  894. Parse("<form><input type=hidden /><isindex /></form>After form");
  895. GumboNode* body;
  896. GetAndAssertBody(root_, &body);
  897. ASSERT_EQ(2, GetChildCount(body));
  898. GumboNode* form = GetChild(body, 0);
  899. ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type);
  900. EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form));
  901. ASSERT_EQ(1, GetChildCount(form));
  902. GumboNode* input = GetChild(form, 0);
  903. ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
  904. EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
  905. ASSERT_EQ(0, GetChildCount(input));
  906. GumboNode* text = GetChild(body, 1);
  907. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  908. EXPECT_STREQ("After form", text->v.text.text);
  909. }
  910. TEST_F(GumboParserTest, NestedForm) {
  911. Parse("<form><label>Label</label><form><input id=input2></form>After form");
  912. GumboNode* body;
  913. GetAndAssertBody(root_, &body);
  914. ASSERT_EQ(2, GetChildCount(body));
  915. GumboNode* form = GetChild(body, 0);
  916. ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type);
  917. EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form));
  918. ASSERT_EQ(2, GetChildCount(form));
  919. GumboNode* label = GetChild(form, 0);
  920. ASSERT_EQ(GUMBO_NODE_ELEMENT, label->type);
  921. EXPECT_EQ(GUMBO_TAG_LABEL, GetTag(label));
  922. ASSERT_EQ(1, GetChildCount(label));
  923. GumboNode* input = GetChild(form, 1);
  924. ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
  925. EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
  926. ASSERT_EQ(0, GetChildCount(input));
  927. GumboNode* text = GetChild(body, 1);
  928. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  929. EXPECT_STREQ("After form", text->v.text.text);
  930. }
  931. TEST_F(GumboParserTest, MisnestedFormInTable) {
  932. // Parse of this is somewhat weird. The first <form> is opened outside the
  933. // table, so when </form> checks to see if there's a form in scope, it stops
  934. // at the <table> boundary and returns null. The form pointer is nulled out
  935. // anyway, though, which means that the second form (parsed in the table body
  936. // state) ends up creating an element. It's immediately popped off
  937. // the stack, but the form element pointer remains set to that node (which is
  938. // not on the stack of open elements). The final </form> tag triggers the
  939. // "does not have node in scope" clause and is ignored. (Note that this is
  940. // different from "has a form element in scope" - the first form is still in
  941. // scope at that point, but the form pointer does not point to it.) Then the
  942. // original <form> element is closed implicitly when the table cell is closed.
  943. Parse(
  944. "<table><tr><td>"
  945. "<form><table><tr><td></td></tr></form>"
  946. "<form></tr></table></form>"
  947. "</td></tr></table");
  948. GumboNode* body;
  949. GetAndAssertBody(root_, &body);
  950. ASSERT_EQ(1, GetChildCount(body));
  951. GumboNode* table1 = GetChild(body, 0);
  952. ASSERT_EQ(GUMBO_NODE_ELEMENT, table1->type);
  953. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table1));
  954. ASSERT_EQ(1, GetChildCount(table1));
  955. GumboNode* tbody1 = GetChild(table1, 0);
  956. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody1->type);
  957. EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody1));
  958. ASSERT_EQ(1, GetChildCount(tbody1));
  959. GumboNode* tr1 = GetChild(tbody1, 0);
  960. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr1->type);
  961. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr1));
  962. ASSERT_EQ(1, GetChildCount(tr1));
  963. GumboNode* td1 = GetChild(tr1, 0);
  964. ASSERT_EQ(GUMBO_NODE_ELEMENT, td1->type);
  965. EXPECT_EQ(GUMBO_TAG_TD, GetTag(td1));
  966. ASSERT_EQ(1, GetChildCount(td1));
  967. GumboNode* form1 = GetChild(td1, 0);
  968. ASSERT_EQ(GUMBO_NODE_ELEMENT, form1->type);
  969. EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form1));
  970. ASSERT_EQ(1, GetChildCount(form1));
  971. GumboNode* table2 = GetChild(form1, 0);
  972. ASSERT_EQ(GUMBO_NODE_ELEMENT, table2->type);
  973. EXPECT_EQ(GUMBO_TAG_TABLE, GetTag(table2));
  974. ASSERT_EQ(1, GetChildCount(table2));
  975. GumboNode* tbody2 = GetChild(table2, 0);
  976. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody2->type);
  977. EXPECT_EQ(GUMBO_TAG_TBODY, GetTag(tbody2));
  978. ASSERT_EQ(2, GetChildCount(tbody2));
  979. GumboNode* tr2 = GetChild(tbody2, 0);
  980. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr2->type);
  981. EXPECT_EQ(GUMBO_TAG_TR, GetTag(tr2));
  982. ASSERT_EQ(1, GetChildCount(tr2));
  983. GumboNode* form2 = GetChild(tbody2, 1);
  984. ASSERT_EQ(GUMBO_NODE_ELEMENT, form2->type);
  985. EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form2));
  986. ASSERT_EQ(0, GetChildCount(form2));
  987. }
  988. TEST_F(GumboParserTest, IsIndex) {
  989. Parse("<isindex id=form1 action='/action' prompt='Secret Message'>");
  990. GumboNode* body;
  991. GetAndAssertBody(root_, &body);
  992. ASSERT_EQ(1, GetChildCount(body));
  993. GumboNode* form = GetChild(body, 0);
  994. ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type);
  995. EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form));
  996. ASSERT_EQ(3, GetChildCount(form));
  997. GumboAttribute* action = GetAttribute(form, 0);
  998. EXPECT_STREQ("action", action->name);
  999. EXPECT_STREQ("/action", action->value);
  1000. GumboNode* hr1 = GetChild(form, 0);
  1001. ASSERT_EQ(GUMBO_NODE_ELEMENT, hr1->type);
  1002. EXPECT_EQ(GUMBO_TAG_HR, GetTag(hr1));
  1003. ASSERT_EQ(0, GetChildCount(hr1));
  1004. GumboNode* label = GetChild(form, 1);
  1005. ASSERT_EQ(GUMBO_NODE_ELEMENT, label->type);
  1006. EXPECT_EQ(GUMBO_TAG_LABEL, GetTag(label));
  1007. ASSERT_EQ(2, GetChildCount(label));
  1008. GumboNode* text = GetChild(label, 0);
  1009. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  1010. EXPECT_STREQ("Secret Message", text->v.text.text);
  1011. GumboNode* input = GetChild(label, 1);
  1012. ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
  1013. EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
  1014. ASSERT_EQ(0, GetChildCount(input));
  1015. ASSERT_EQ(2, GetAttributeCount(input));
  1016. GumboAttribute* id = GetAttribute(input, 0);
  1017. EXPECT_STREQ("id", id->name);
  1018. EXPECT_STREQ("form1", id->value);
  1019. GumboAttribute* name = GetAttribute(input, 1);
  1020. EXPECT_STREQ("name", name->name);
  1021. EXPECT_STREQ("isindex", name->value);
  1022. GumboNode* hr2 = GetChild(form, 2);
  1023. ASSERT_EQ(GUMBO_NODE_ELEMENT, hr2->type);
  1024. EXPECT_EQ(GUMBO_TAG_HR, GetTag(hr2));
  1025. ASSERT_EQ(0, GetChildCount(hr2));
  1026. }
  1027. TEST_F(GumboParserTest, IsIndexDuplicateAttribute) {
  1028. Parse("<isindex name=foo>");
  1029. GumboNode* body;
  1030. GetAndAssertBody(root_, &body);
  1031. ASSERT_EQ(1, GetChildCount(body));
  1032. GumboNode* form = GetChild(body, 0);
  1033. ASSERT_EQ(GUMBO_NODE_ELEMENT, form->type);
  1034. EXPECT_EQ(GUMBO_TAG_FORM, GetTag(form));
  1035. ASSERT_EQ(3, GetChildCount(form));
  1036. GumboNode* label = GetChild(form, 1);
  1037. ASSERT_EQ(GUMBO_NODE_ELEMENT, label->type);
  1038. EXPECT_EQ(GUMBO_TAG_LABEL, GetTag(label));
  1039. ASSERT_EQ(2, GetChildCount(label));
  1040. GumboNode* input = GetChild(label, 1);
  1041. ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type);
  1042. EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input));
  1043. ASSERT_EQ(0, GetChildCount(input));
  1044. ASSERT_EQ(1, GetAttributeCount(input));
  1045. GumboAttribute* name = GetAttribute(input, 0);
  1046. EXPECT_STREQ("name", name->name);
  1047. EXPECT_STREQ("isindex", name->value);
  1048. }
  1049. TEST_F(GumboParserTest, NestedRawtextTags) {
  1050. Parse(
  1051. "<noscript><noscript jstag=false>"
  1052. "<style>div{text-align:center}</style></noscript>");
  1053. GumboNode* html = GetChild(root_, 0);
  1054. ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  1055. EXPECT_EQ(GUMBO_TAG_HTML, GetTag(html));
  1056. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
  1057. GUMBO_INSERTION_IMPLIED,
  1058. html->parse_flags);
  1059. ASSERT_EQ(2, GetChildCount(html));
  1060. GumboNode* head = GetChild(html, 0);
  1061. ASSERT_EQ(GUMBO_NODE_ELEMENT, head->type);
  1062. EXPECT_EQ(GUMBO_TAG_HEAD, GetTag(head));
  1063. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
  1064. GUMBO_INSERTION_IMPLIED,
  1065. head->parse_flags);
  1066. ASSERT_EQ(1, GetChildCount(head));
  1067. GumboNode* noscript = GetChild(head, 0);
  1068. ASSERT_EQ(GUMBO_NODE_ELEMENT, noscript->type);
  1069. EXPECT_EQ(GUMBO_TAG_NOSCRIPT, GetTag(noscript));
  1070. ASSERT_EQ(1, GetChildCount(noscript));
  1071. GumboNode* style = GetChild(noscript, 0);
  1072. ASSERT_EQ(GUMBO_NODE_ELEMENT, style->type);
  1073. EXPECT_EQ(GUMBO_TAG_STYLE, GetTag(style));
  1074. ASSERT_EQ(1, GetChildCount(style));
  1075. GumboNode* text = GetChild(style, 0);
  1076. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  1077. EXPECT_STREQ("div{text-align:center}", text->v.text.text);
  1078. }
  1079. TEST_F(GumboParserTest, RawtextInBody) {
  1080. Parse("<body><noembed jsif=false></noembed>");
  1081. GumboNode* body;
  1082. GetAndAssertBody(root_, &body);
  1083. ASSERT_EQ(1, GetChildCount(body));
  1084. GumboNode* noembed = GetChild(body, 0);
  1085. ASSERT_EQ(GUMBO_NODE_ELEMENT, noembed->type);
  1086. EXPECT_EQ(GUMBO_TAG_NOEMBED, GetTag(noembed));
  1087. EXPECT_EQ(1, GetAttributeCount(noembed));
  1088. }
  1089. TEST_F(GumboParserTest, MetaBeforeHead) {
  1090. Parse(
  1091. "<html><meta http-equiv='content-type' "
  1092. "content='text/html; charset=UTF-8' /><head></head>");
  1093. GumboNode* body;
  1094. GetAndAssertBody(root_, &body);
  1095. // Testing for a memory leak here, but
  1096. // TODO(jdtang): Flesh out structural asserts.
  1097. }
  1098. TEST_F(GumboParserTest, NoahsArkClause) {
  1099. Parse(
  1100. "<p><font size=4><font color=red><font size=4><font size=4>"
  1101. "<font size=4><font size=4><font size=4><font color=red><p>X");
  1102. GumboNode* body;
  1103. GetAndAssertBody(root_, &body);
  1104. ASSERT_EQ(2, GetChildCount(body));
  1105. GumboNode* p1 = GetChild(body, 0);
  1106. ASSERT_EQ(GUMBO_NODE_ELEMENT, p1->type);
  1107. EXPECT_EQ(GUMBO_TAG_P, p1->v.element.tag);
  1108. ASSERT_EQ(1, GetChildCount(p1));
  1109. GumboNode* size1 = GetChild(p1, 0);
  1110. GumboNode* red1 = GetChild(size1, 0);
  1111. ASSERT_EQ(GUMBO_NODE_ELEMENT, red1->type);
  1112. EXPECT_EQ(GUMBO_TAG_FONT, red1->v.element.tag);
  1113. ASSERT_EQ(1, GetAttributeCount(red1));
  1114. GumboAttribute* red1_attr = GetAttribute(red1, 0);
  1115. EXPECT_STREQ("color", red1_attr->name);
  1116. EXPECT_STREQ("red", red1_attr->value);
  1117. ASSERT_EQ(1, GetChildCount(red1));
  1118. GumboNode* p2 = GetChild(body, 1);
  1119. ASSERT_EQ(GUMBO_NODE_ELEMENT, p2->type);
  1120. EXPECT_EQ(GUMBO_TAG_P, p2->v.element.tag);
  1121. ASSERT_EQ(1, GetChildCount(p2));
  1122. GumboNode* red2 = GetChild(p2, 0);
  1123. ASSERT_EQ(GUMBO_NODE_ELEMENT, red2->type);
  1124. EXPECT_EQ(GUMBO_TAG_FONT, red2->v.element.tag);
  1125. ASSERT_EQ(1, GetAttributeCount(red2));
  1126. GumboAttribute* red2_attr = GetAttribute(red2, 0);
  1127. EXPECT_STREQ("color", red2_attr->name);
  1128. EXPECT_STREQ("red", red2_attr->value);
  1129. ASSERT_EQ(1, GetChildCount(red2));
  1130. }
  1131. TEST_F(GumboParserTest, AdoptionAgency1) {
  1132. // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#misnested-tags:-b-i-/b-/i
  1133. Parse("<p>1<b>2<i>3</b>4</i>5</p>");
  1134. ASSERT_EQ(1, GetChildCount(root_));
  1135. GumboNode* html = GetChild(root_, 0);
  1136. ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  1137. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
  1138. GUMBO_INSERTION_IMPLIED,
  1139. html->parse_flags);
  1140. EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
  1141. ASSERT_EQ(2, GetChildCount(html));
  1142. GumboNode* body = GetChild(html, 1);
  1143. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  1144. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
  1145. GUMBO_INSERTION_IMPLIED,
  1146. body->parse_flags);
  1147. EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
  1148. ASSERT_EQ(1, GetChildCount(body));
  1149. GumboNode* p = GetChild(body, 0);
  1150. ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type);
  1151. EXPECT_EQ(GUMBO_INSERTION_NORMAL, p->parse_flags);
  1152. EXPECT_EQ(GUMBO_TAG_P, p->v.element.tag);
  1153. ASSERT_EQ(4, GetChildCount(p));
  1154. GumboNode* text1 = GetChild(p, 0);
  1155. ASSERT_EQ(GUMBO_NODE_TEXT, text1->type);
  1156. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text1->parse_flags);
  1157. EXPECT_STREQ("1", text1->v.text.text);
  1158. GumboNode* b = GetChild(p, 1);
  1159. ASSERT_EQ(GUMBO_NODE_ELEMENT, b->type);
  1160. EXPECT_EQ(GUMBO_INSERTION_NORMAL, b->parse_flags);
  1161. EXPECT_EQ(GUMBO_TAG_B, b->v.element.tag);
  1162. ASSERT_EQ(2, GetChildCount(b));
  1163. GumboNode* text2 = GetChild(b, 0);
  1164. ASSERT_EQ(GUMBO_NODE_TEXT, text2->type);
  1165. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
  1166. EXPECT_STREQ("2", text2->v.text.text);
  1167. GumboNode* i = GetChild(b, 1);
  1168. ASSERT_EQ(GUMBO_NODE_ELEMENT, i->type);
  1169. EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, i->parse_flags);
  1170. EXPECT_EQ(GUMBO_TAG_I, i->v.element.tag);
  1171. ASSERT_EQ(1, GetChildCount(i));
  1172. GumboNode* text3 = GetChild(i, 0);
  1173. ASSERT_EQ(GUMBO_NODE_TEXT, text3->type);
  1174. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
  1175. EXPECT_STREQ("3", text3->v.text.text);
  1176. GumboNode* i2 = GetChild(p, 2);
  1177. ASSERT_EQ(GUMBO_NODE_ELEMENT, i2->type);
  1178. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER |
  1179. GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT,
  1180. i2->parse_flags);
  1181. EXPECT_EQ(GUMBO_TAG_I, i2->v.element.tag);
  1182. ASSERT_EQ(1, GetChildCount(i2));
  1183. GumboNode* text4 = GetChild(i2, 0);
  1184. ASSERT_EQ(GUMBO_NODE_TEXT, text4->type);
  1185. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
  1186. EXPECT_STREQ("4", text4->v.text.text);
  1187. GumboNode* text5 = GetChild(p, 3);
  1188. ASSERT_EQ(GUMBO_NODE_TEXT, text5->type);
  1189. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
  1190. EXPECT_STREQ("5", text5->v.text.text);
  1191. }
  1192. TEST_F(GumboParserTest, AdoptionAgency2) {
  1193. // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#misnested-tags:-b-p-/b-/p
  1194. Parse("<b>1<p>2</b>3</p>");
  1195. ASSERT_EQ(1, GetChildCount(root_));
  1196. GumboNode* html = GetChild(root_, 0);
  1197. ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  1198. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
  1199. GUMBO_INSERTION_IMPLIED,
  1200. html->parse_flags);
  1201. EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
  1202. ASSERT_EQ(2, GetChildCount(html));
  1203. GumboNode* body = GetChild(html, 1);
  1204. ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
  1205. EXPECT_EQ(GUMBO_INSERTION_BY_PARSER | GUMBO_INSERTION_IMPLICIT_END_TAG |
  1206. GUMBO_INSERTION_IMPLIED,
  1207. body->parse_flags);
  1208. EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
  1209. ASSERT_EQ(2, GetChildCount(body));
  1210. GumboNode* b = GetChild(body, 0);
  1211. ASSERT_EQ(GUMBO_NODE_ELEMENT, b->type);
  1212. EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, b->parse_flags);
  1213. EXPECT_EQ(GUMBO_TAG_B, b->v.element.tag);
  1214. ASSERT_EQ(1, GetChildCount(b));
  1215. GumboNode* text1 = GetChild(b, 0);
  1216. ASSERT_EQ(GUMBO_NODE_TEXT, text1->type);
  1217. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text1->parse_flags);
  1218. EXPECT_STREQ("1", text1->v.text.text);
  1219. GumboNode* p = GetChild(body, 1);
  1220. ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type);
  1221. EXPECT_EQ(GUMBO_INSERTION_ADOPTION_AGENCY_MOVED, p->parse_flags);
  1222. EXPECT_EQ(GUMBO_TAG_P, p->v.element.tag);
  1223. ASSERT_EQ(2, GetChildCount(p));
  1224. GumboNode* b2 = GetChild(p, 0);
  1225. ASSERT_EQ(GUMBO_NODE_ELEMENT, b2->type);
  1226. EXPECT_EQ(GUMBO_INSERTION_ADOPTION_AGENCY_CLONED | GUMBO_INSERTION_BY_PARSER,
  1227. b2->parse_flags);
  1228. EXPECT_EQ(GUMBO_TAG_B, b2->v.element.tag);
  1229. ASSERT_EQ(1, GetChildCount(b2));
  1230. GumboNode* text2 = GetChild(b2, 0);
  1231. ASSERT_EQ(GUMBO_NODE_TEXT, text2->type);
  1232. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
  1233. EXPECT_STREQ("2", text2->v.text.text);
  1234. GumboNode* text3 = GetChild(p, 1);
  1235. ASSERT_EQ(GUMBO_NODE_TEXT, text3->type);
  1236. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
  1237. EXPECT_STREQ("3", text3->v.text.text);
  1238. }
  1239. TEST_F(GumboParserTest, AdoptionAgency3) {
  1240. Parse("<div><a><b><u><i><code><div></a>");
  1241. }
  1242. TEST_F(GumboParserTest, ImplicitlyCloseLists) {
  1243. Parse(
  1244. "<ul>\n"
  1245. " <li>First\n"
  1246. " <li>Second\n"
  1247. "</ul>");
  1248. GumboNode* body;
  1249. GetAndAssertBody(root_, &body);
  1250. ASSERT_EQ(1, GetChildCount(body));
  1251. GumboNode* ul = GetChild(body, 0);
  1252. ASSERT_EQ(GUMBO_NODE_ELEMENT, ul->type);
  1253. EXPECT_EQ(GUMBO_TAG_UL, GetTag(ul));
  1254. ASSERT_EQ(3, GetChildCount(ul));
  1255. GumboNode* text = GetChild(ul, 0);
  1256. ASSERT_EQ(GUMBO_NODE_WHITESPACE, text->type);
  1257. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text->parse_flags);
  1258. EXPECT_STREQ("\n ", text->v.text.text);
  1259. GumboNode* li1 = GetChild(ul, 1);
  1260. ASSERT_EQ(GUMBO_NODE_ELEMENT, li1->type);
  1261. EXPECT_EQ(GUMBO_TAG_LI, GetTag(li1));
  1262. ASSERT_EQ(1, GetChildCount(li1));
  1263. GumboNode* li2 = GetChild(ul, 2);
  1264. ASSERT_EQ(GUMBO_NODE_ELEMENT, li2->type);
  1265. EXPECT_EQ(GUMBO_TAG_LI, GetTag(li2));
  1266. ASSERT_EQ(1, GetChildCount(li2));
  1267. }
  1268. TEST_F(GumboParserTest, CData) {
  1269. Parse("<svg><![CDATA[this is text]]></svg>");
  1270. GumboNode* body;
  1271. GetAndAssertBody(root_, &body);
  1272. ASSERT_EQ(1, GetChildCount(body));
  1273. GumboNode* svg = GetChild(body, 0);
  1274. ASSERT_EQ(1, GetChildCount(svg));
  1275. GumboNode* cdata = GetChild(svg, 0);
  1276. ASSERT_EQ(GUMBO_NODE_CDATA, cdata->type);
  1277. EXPECT_STREQ("this is text", cdata->v.text.text);
  1278. }
  1279. TEST_F(GumboParserTest, CDataUnsafe) {
  1280. // Can't use Parse() because of the strlen
  1281. output_ =
  1282. gumbo_parse_with_options(&options_, "<svg><![CDATA[\0filler\0text\0]]>",
  1283. sizeof("<svg><![CDATA[\0filler\0text\0]]>") - 1);
  1284. root_ = output_->document;
  1285. GumboNode* body;
  1286. GetAndAssertBody(root_, &body);
  1287. ASSERT_EQ(1, GetChildCount(body));
  1288. GumboNode* svg = GetChild(body, 0);
  1289. ASSERT_EQ(1, GetChildCount(svg));
  1290. GumboNode* cdata = GetChild(svg, 0);
  1291. ASSERT_EQ(GUMBO_NODE_CDATA, cdata->type);
  1292. // \xEF\xBF\xBD = unicode replacement char
  1293. EXPECT_STREQ(
  1294. "\xEF\xBF\xBD"
  1295. "filler\xEF\xBF\xBD"
  1296. "text\xEF\xBF\xBD",
  1297. cdata->v.text.text);
  1298. }
  1299. TEST_F(GumboParserTest, CDataInBody) {
  1300. Parse("<div><![CDATA[this is text]]></div>");
  1301. GumboNode* body;
  1302. GetAndAssertBody(root_, &body);
  1303. ASSERT_EQ(1, GetChildCount(body));
  1304. GumboNode* div = GetChild(body, 0);
  1305. ASSERT_EQ(1, GetChildCount(div));
  1306. GumboNode* cdata = GetChild(div, 0);
  1307. ASSERT_EQ(GUMBO_NODE_COMMENT, cdata->type);
  1308. EXPECT_STREQ("[CDATA[this is text]]", cdata->v.text.text);
  1309. }
  1310. TEST_F(GumboParserTest, FormattingTagsInHeading) {
  1311. Parse("<h2>This is <b>old</h2>text");
  1312. GumboNode* body;
  1313. GetAndAssertBody(root_, &body);
  1314. ASSERT_EQ(2, GetChildCount(body));
  1315. GumboNode* h2 = GetChild(body, 0);
  1316. ASSERT_EQ(GUMBO_NODE_ELEMENT, h2->type);
  1317. EXPECT_EQ(GUMBO_TAG_H2, GetTag(h2));
  1318. ASSERT_EQ(2, GetChildCount(h2));
  1319. GumboNode* text1 = GetChild(h2, 0);
  1320. ASSERT_EQ(GUMBO_NODE_TEXT, text1->type);
  1321. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text1->parse_flags);
  1322. EXPECT_STREQ("This is ", text1->v.text.text);
  1323. GumboNode* b = GetChild(h2, 1);
  1324. ASSERT_EQ(GUMBO_NODE_ELEMENT, b->type);
  1325. EXPECT_EQ(GUMBO_TAG_B, GetTag(b));
  1326. EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG, b->parse_flags);
  1327. ASSERT_EQ(1, GetChildCount(b));
  1328. GumboNode* text2 = GetChild(b, 0);
  1329. ASSERT_EQ(GUMBO_NODE_TEXT, text2->type);
  1330. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
  1331. EXPECT_STREQ("old", text2->v.text.text);
  1332. GumboNode* b2 = GetChild(body, 1);
  1333. ASSERT_EQ(GUMBO_NODE_ELEMENT, b2->type);
  1334. EXPECT_EQ(GUMBO_TAG_B, GetTag(b2));
  1335. EXPECT_EQ(GUMBO_INSERTION_IMPLICIT_END_TAG | GUMBO_INSERTION_BY_PARSER |
  1336. GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT,
  1337. b2->parse_flags);
  1338. ASSERT_EQ(1, GetChildCount(b2));
  1339. GumboNode* text3 = GetChild(b2, 0);
  1340. ASSERT_EQ(GUMBO_NODE_TEXT, text3->type);
  1341. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text3->parse_flags);
  1342. EXPECT_STREQ("text", text3->v.text.text);
  1343. }
  1344. TEST_F(GumboParserTest, ExtraReconstruction) {
  1345. Parse("<span><b></span></p>");
  1346. GumboNode* body;
  1347. GetAndAssertBody(root_, &body);
  1348. ASSERT_EQ(2, GetChildCount(body));
  1349. EXPECT_EQ(GUMBO_TAG_SPAN, GetTag(GetChild(body, 0)));
  1350. EXPECT_EQ(GUMBO_TAG_P, GetTag(GetChild(body, 1)));
  1351. }
  1352. TEST_F(GumboParserTest, LinkifiedHeading) {
  1353. Parse("<li><h3><a href=#foo>Text</a></h3><div>Summary</div>");
  1354. GumboNode* body;
  1355. GetAndAssertBody(root_, &body);
  1356. ASSERT_EQ(1, GetChildCount(body));
  1357. GumboNode* li = GetChild(body, 0);
  1358. ASSERT_EQ(GUMBO_NODE_ELEMENT, li->type);
  1359. EXPECT_EQ(GUMBO_TAG_LI, GetTag(li));
  1360. ASSERT_EQ(2, GetChildCount(li));
  1361. GumboNode* h3 = GetChild(li, 0);
  1362. ASSERT_EQ(GUMBO_NODE_ELEMENT, h3->type);
  1363. EXPECT_EQ(GUMBO_TAG_H3, GetTag(h3));
  1364. ASSERT_EQ(1, GetChildCount(h3));
  1365. GumboNode* anchor = GetChild(h3, 0);
  1366. ASSERT_EQ(GUMBO_NODE_ELEMENT, anchor->type);
  1367. EXPECT_EQ(GUMBO_TAG_A, GetTag(anchor));
  1368. ASSERT_EQ(1, GetChildCount(anchor));
  1369. GumboNode* div = GetChild(li, 1);
  1370. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  1371. EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div));
  1372. ASSERT_EQ(1, GetChildCount(div));
  1373. }
  1374. TEST_F(GumboParserTest, MisnestedHeading) {
  1375. Parse(
  1376. "<h1>"
  1377. " <section>"
  1378. " <h2>"
  1379. " <dl><dt>List"
  1380. " </h1>"
  1381. " </section>"
  1382. " Heading1"
  1383. "<h3>Heading3</h4>"
  1384. "After</h3> text");
  1385. // The parse of this is pretty weird: according to the spec, it should be:
  1386. // <html>
  1387. // <head></head>
  1388. // <body>
  1389. // <h1>
  1390. // <section>
  1391. // <h2><dl><dt>List</dt></dl></h2>
  1392. // </section>
  1393. // Heading1
  1394. // </h1>
  1395. // <h3>Heading3</h3>
  1396. // After text
  1397. // </body>
  1398. // </html>
  1399. // Explanation:
  1400. // <html>, <head>, and <body> tags are implied. The opening <h1> and <section
  1401. // tags function as expected. Because the current node is <section>, the <h2>
  1402. // does *not* close the existing <h1>, and then we enter a definition list.
  1403. // The closing </h1>, even though it's misnested, causes the <dt> to be closed
  1404. // implicitly, then also closes the <dl> and <h2> as a parse error. <h1> is
  1405. // still open, and so "Heading1" goes into it. Because the current node is a
  1406. // heading tag, <h3> closes it (as a parse error) and reopens a new <h3> node,
  1407. // which is closed by the </h4> tag. The remaining text goes straight into
  1408. // the <body>; since no heading is open, the </h3> tag is ignored and the
  1409. // second run is condensed into the first.
  1410. // TODO(jdtang): Make sure that parse_flags are set appropriately for this.
  1411. GumboNode* body;
  1412. GetAndAssertBody(root_, &body);
  1413. ASSERT_EQ(3, GetChildCount(body));
  1414. GumboNode* h1 = GetChild(body, 0);
  1415. ASSERT_EQ(GUMBO_NODE_ELEMENT, h1->type);
  1416. EXPECT_EQ(GUMBO_TAG_H1, GetTag(h1));
  1417. ASSERT_EQ(3, GetChildCount(h1));
  1418. // Child 1 is whitespace, as it is for many of these nodes.
  1419. GumboNode* section = GetChild(h1, 1);
  1420. ASSERT_EQ(GUMBO_NODE_ELEMENT, section->type);
  1421. EXPECT_EQ(GUMBO_TAG_SECTION, GetTag(section));
  1422. ASSERT_EQ(3, GetChildCount(section));
  1423. GumboNode* h2 = GetChild(section, 1);
  1424. ASSERT_EQ(GUMBO_NODE_ELEMENT, h2->type);
  1425. EXPECT_EQ(GUMBO_TAG_H2, GetTag(h2));
  1426. ASSERT_EQ(2, GetChildCount(h2));
  1427. GumboNode* dl = GetChild(h2, 1);
  1428. ASSERT_EQ(GUMBO_NODE_ELEMENT, dl->type);
  1429. EXPECT_EQ(GUMBO_TAG_DL, GetTag(dl));
  1430. ASSERT_EQ(1, GetChildCount(dl));
  1431. GumboNode* dt = GetChild(dl, 0);
  1432. ASSERT_EQ(GUMBO_NODE_ELEMENT, dt->type);
  1433. EXPECT_EQ(GUMBO_TAG_DT, GetTag(dt));
  1434. ASSERT_EQ(1, GetChildCount(dt));
  1435. GumboNode* text1 = GetChild(dt, 0);
  1436. ASSERT_EQ(GUMBO_NODE_TEXT, text1->type);
  1437. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text1->parse_flags);
  1438. EXPECT_STREQ("List ", text1->v.text.text);
  1439. GumboNode* text2 = GetChild(h1, 2);
  1440. ASSERT_EQ(GUMBO_NODE_TEXT, text2->type);
  1441. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text2->parse_flags);
  1442. EXPECT_STREQ(" Heading1", text2->v.text.text);
  1443. GumboNode* h3 = GetChild(body, 1);
  1444. ASSERT_EQ(GUMBO_NODE_ELEMENT, h3->type);
  1445. EXPECT_EQ(GUMBO_TAG_H3, GetTag(h3));
  1446. EXPECT_EQ(1, GetChildCount(h3));
  1447. GumboNode* text3 = GetChild(h3, 0);
  1448. ASSERT_EQ(GUMBO_NODE_TEXT, text3->type);
  1449. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text3->parse_flags);
  1450. EXPECT_STREQ("Heading3", text3->v.text.text);
  1451. GumboNode* text4 = GetChild(body, 2);
  1452. ASSERT_EQ(GUMBO_NODE_TEXT, text4->type);
  1453. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text4->parse_flags);
  1454. EXPECT_STREQ("After text", text4->v.text.text);
  1455. }
  1456. TEST_F(GumboParserTest, DoubleBody) {
  1457. Parse("<body class=first><body class=second id=merged>Text</body></body>");
  1458. GumboNode* body;
  1459. GetAndAssertBody(root_, &body);
  1460. ASSERT_EQ(1, GetChildCount(body));
  1461. ASSERT_EQ(2, GetAttributeCount(body));
  1462. GumboAttribute* clas = GetAttribute(body, 0);
  1463. EXPECT_STREQ("class", clas->name);
  1464. EXPECT_STREQ("first", clas->value);
  1465. GumboAttribute* id = GetAttribute(body, 1);
  1466. EXPECT_STREQ("id", id->name);
  1467. EXPECT_STREQ("merged", id->value);
  1468. GumboNode* text = GetChild(body, 0);
  1469. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  1470. EXPECT_EQ(GUMBO_INSERTION_NORMAL, text->parse_flags);
  1471. EXPECT_STREQ("Text", text->v.text.text);
  1472. }
  1473. TEST_F(GumboParserTest, ThInMathMl) {
  1474. Parse("<math><th><mI><table></table><tr></table><div><tr>0");
  1475. GumboNode* body;
  1476. GetAndAssertBody(root_, &body);
  1477. ASSERT_EQ(1, GetChildCount(body));
  1478. GumboNode* math = GetChild(body, 0);
  1479. ASSERT_EQ(GUMBO_NODE_ELEMENT, math->type);
  1480. EXPECT_EQ(GUMBO_TAG_MATH, math->v.element.tag);
  1481. EXPECT_EQ(GUMBO_NAMESPACE_MATHML, math->v.element.tag_namespace);
  1482. ASSERT_EQ(1, GetChildCount(math));
  1483. GumboNode* th = GetChild(math, 0);
  1484. ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type);
  1485. EXPECT_EQ(GUMBO_TAG_TH, th->v.element.tag);
  1486. EXPECT_EQ(GUMBO_NAMESPACE_MATHML, th->v.element.tag_namespace);
  1487. ASSERT_EQ(1, GetChildCount(th));
  1488. GumboNode* mi = GetChild(th, 0);
  1489. ASSERT_EQ(GUMBO_NODE_ELEMENT, mi->type);
  1490. EXPECT_EQ(GUMBO_TAG_MI, mi->v.element.tag);
  1491. EXPECT_EQ(GUMBO_NAMESPACE_MATHML, mi->v.element.tag_namespace);
  1492. ASSERT_EQ(2, GetChildCount(mi));
  1493. GumboNode* table = GetChild(mi, 0);
  1494. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  1495. EXPECT_EQ(GUMBO_TAG_TABLE, table->v.element.tag);
  1496. EXPECT_EQ(GUMBO_NAMESPACE_HTML, table->v.element.tag_namespace);
  1497. ASSERT_EQ(0, GetChildCount(table));
  1498. GumboNode* div = GetChild(mi, 1);
  1499. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  1500. EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag);
  1501. EXPECT_EQ(GUMBO_NAMESPACE_HTML, div->v.element.tag_namespace);
  1502. ASSERT_EQ(1, GetChildCount(div));
  1503. GumboNode* text = GetChild(div, 0);
  1504. ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
  1505. EXPECT_STREQ("0", text->v.text.text);
  1506. }
  1507. TEST_F(GumboParserTest, TdInMathml) {
  1508. Parse("<table><th><math><td></tr>");
  1509. GumboNode* body;
  1510. GetAndAssertBody(root_, &body);
  1511. ASSERT_EQ(1, GetChildCount(body));
  1512. GumboNode* table = GetChild(body, 0);
  1513. ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
  1514. EXPECT_EQ(GUMBO_TAG_TABLE, table->v.element.tag);
  1515. EXPECT_EQ(GUMBO_NAMESPACE_HTML, table->v.element.tag_namespace);
  1516. ASSERT_EQ(1, GetChildCount(table));
  1517. GumboNode* tbody = GetChild(table, 0);
  1518. ASSERT_EQ(GUMBO_NODE_ELEMENT, tbody->type);
  1519. EXPECT_EQ(GUMBO_TAG_TBODY, tbody->v.element.tag);
  1520. EXPECT_EQ(GUMBO_NAMESPACE_HTML, tbody->v.element.tag_namespace);
  1521. ASSERT_EQ(1, GetChildCount(tbody));
  1522. GumboNode* tr = GetChild(tbody, 0);
  1523. ASSERT_EQ(GUMBO_NODE_ELEMENT, tr->type);
  1524. EXPECT_EQ(GUMBO_TAG_TR, tr->v.element.tag);
  1525. EXPECT_EQ(GUMBO_NAMESPACE_HTML, tr->v.element.tag_namespace);
  1526. ASSERT_EQ(1, GetChildCount(tr));
  1527. GumboNode* th = GetChild(tr, 0);
  1528. ASSERT_EQ(GUMBO_NODE_ELEMENT, th->type);
  1529. EXPECT_EQ(GUMBO_TAG_TH, th->v.element.tag);
  1530. EXPECT_EQ(GUMBO_NAMESPACE_HTML, th->v.element.tag_namespace);
  1531. ASSERT_EQ(1, GetChildCount(th));
  1532. GumboNode* math = GetChild(th, 0);
  1533. ASSERT_EQ(GUMBO_NODE_ELEMENT, math->type);
  1534. EXPECT_EQ(GUMBO_TAG_MATH, math->v.element.tag);
  1535. EXPECT_EQ(GUMBO_NAMESPACE_MATHML, math->v.element.tag_namespace);
  1536. ASSERT_EQ(1, GetChildCount(math));
  1537. GumboNode* td = GetChild(math, 0);
  1538. ASSERT_EQ(GUMBO_NODE_ELEMENT, td->type);
  1539. EXPECT_EQ(GUMBO_TAG_TD, td->v.element.tag);
  1540. EXPECT_EQ(GUMBO_NAMESPACE_MATHML, td->v.element.tag_namespace);
  1541. ASSERT_EQ(0, GetChildCount(td));
  1542. }
  1543. TEST_F(GumboParserTest, SelectInForeignContent) {
  1544. Parse("<svg><select><foreignobject><select><select><select>");
  1545. }
  1546. TEST_F(GumboParserTest, TemplateInForeignContent) {
  1547. Parse("<template><svg><template>");
  1548. GumboNode* body;
  1549. GetAndAssertBody(root_, &body);
  1550. EXPECT_EQ(0, GetChildCount(body));
  1551. GumboNode* html = GetChild(root_, 0);
  1552. ASSERT_EQ(2, GetChildCount(html));
  1553. GumboNode* head = GetChild(html, 0);
  1554. ASSERT_EQ(1, GetChildCount(head));
  1555. GumboNode* template_node = GetChild(head, 0);
  1556. ASSERT_EQ(GUMBO_NODE_TEMPLATE, template_node->type);
  1557. EXPECT_EQ(GUMBO_TAG_TEMPLATE, template_node->v.element.tag);
  1558. ASSERT_EQ(1, GetChildCount(template_node));
  1559. GumboNode* svg_node = GetChild(template_node, 0);
  1560. ASSERT_EQ(GUMBO_NODE_ELEMENT, svg_node->type);
  1561. EXPECT_EQ(GUMBO_TAG_SVG, svg_node->v.element.tag);
  1562. EXPECT_EQ(GUMBO_NAMESPACE_SVG, svg_node->v.element.tag_namespace);
  1563. ASSERT_EQ(1, GetChildCount(svg_node));
  1564. GumboNode* svg_template = GetChild(svg_node, 0);
  1565. ASSERT_EQ(GUMBO_NODE_ELEMENT, svg_template->type);
  1566. EXPECT_EQ(GUMBO_TAG_TEMPLATE, svg_template->v.element.tag);
  1567. EXPECT_EQ(GUMBO_NAMESPACE_SVG, svg_template->v.element.tag_namespace);
  1568. EXPECT_EQ(0, GetChildCount(svg_template));
  1569. }
  1570. TEST_F(GumboParserTest, TemplateNull) {
  1571. output_ = gumbo_parse_with_options(
  1572. &options_, "<template>\0", sizeof("<template>\0") - 1);
  1573. root_ = output_->document;
  1574. GumboNode* body;
  1575. GetAndAssertBody(root_, &body);
  1576. EXPECT_EQ(0, GetChildCount(body));
  1577. GumboNode* html = GetChild(root_, 0);
  1578. ASSERT_EQ(2, GetChildCount(html));
  1579. GumboNode* head = GetChild(html, 0);
  1580. ASSERT_EQ(1, GetChildCount(head));
  1581. GumboNode* template_node = GetChild(head, 0);
  1582. ASSERT_EQ(GUMBO_NODE_TEMPLATE, template_node->type);
  1583. EXPECT_EQ(GUMBO_TAG_TEMPLATE, template_node->v.element.tag);
  1584. ASSERT_EQ(0, GetChildCount(template_node));
  1585. }
  1586. TEST_F(GumboParserTest, FragmentWithNamespace) {
  1587. ParseFragment("<div></div>", GUMBO_TAG_TITLE, GUMBO_NAMESPACE_SVG);
  1588. EXPECT_EQ(1, GetChildCount(root_));
  1589. GumboNode* html = GetChild(root_, 0);
  1590. ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
  1591. EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
  1592. EXPECT_EQ(1, GetChildCount(html));
  1593. GumboNode* div = GetChild(html, 0);
  1594. ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type);
  1595. EXPECT_EQ(GUMBO_TAG_DIV, div->v.element.tag);
  1596. EXPECT_EQ(0, GetChildCount(div));
  1597. }
  1598. } // namespace