gumboc_test.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. # Copyright 2012 Google Inc. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. #
  15. """Tests for Gumbo CTypes bindings."""
  16. __author__ = 'jdtang@google.com (Jonathan Tang)'
  17. import StringIO
  18. import unittest
  19. import gumboc
  20. class CtypesTest(unittest.TestCase):
  21. def testWordParse(self):
  22. with gumboc.parse('Test') as output:
  23. doctype_node = output.contents.document.contents
  24. self.assertEquals(gumboc.NodeType.DOCUMENT, doctype_node.type)
  25. document = doctype_node.v.document
  26. self.assertEquals('', document.name)
  27. self.assertEquals('', document.public_identifier)
  28. self.assertEquals('', document.system_identifier)
  29. root = output.contents.root.contents
  30. self.assertEquals(gumboc.NodeType.ELEMENT, root.type)
  31. self.assertEquals(gumboc.Tag.HTML, root.tag)
  32. self.assertEquals(gumboc.Namespace.HTML, root.tag_namespace)
  33. self.assertEquals(2, len(root.children))
  34. head = root.children[0]
  35. self.assertEquals(gumboc.NodeType.ELEMENT, head.type)
  36. self.assertEquals(gumboc.Tag.HEAD, head.tag)
  37. self.assertEquals('head', head.tag_name)
  38. self.assertEquals(gumboc.Namespace.HTML, head.tag_namespace)
  39. self.assertEquals(0, len(head.original_tag))
  40. self.assertEquals('', str(head.original_end_tag))
  41. self.assertEquals(0, head.children.length)
  42. body = root.children[1]
  43. self.assertNotEquals(body, doctype_node)
  44. self.assertEquals(gumboc.NodeType.ELEMENT, body.type)
  45. self.assertEquals(gumboc.Tag.BODY, body.tag)
  46. self.assertEquals('body', body.tag_name)
  47. self.assertEquals(1, len(body.children))
  48. text_node = body.children[0]
  49. self.assertEquals(gumboc.NodeType.TEXT, text_node.type)
  50. self.assertEquals('Test', text_node.text)
  51. def testBufferThatGoesAway(self):
  52. for i in range(10):
  53. source = StringIO.StringIO('<foo bar=quux>1<p>2</foo>')
  54. parse_tree = gumboc.parse(source.read())
  55. source.close()
  56. with parse_tree as output:
  57. root = output.contents.root.contents
  58. body = root.children[1]
  59. foo = body.children[0]
  60. self.assertEquals(gumboc.NodeType.ELEMENT, foo.type)
  61. self.assertEquals(gumboc.Tag.UNKNOWN, foo.tag)
  62. self.assertEquals('<foo bar=quux>', str(foo.original_tag))
  63. self.assertEquals('', str(foo.original_end_tag))
  64. self.assertEquals('foo', foo.tag_name.decode('utf-8'))
  65. self.assertEquals('bar', foo.attributes[0].name)
  66. self.assertEquals('quux', foo.attributes[0].value)
  67. def testUnknownTag(self):
  68. with gumboc.parse('<foo bar=quux>1<p>2</foo>') as output:
  69. root = output.contents.root.contents
  70. body = root.children[1]
  71. foo = body.children[0]
  72. self.assertEquals(gumboc.NodeType.ELEMENT, foo.type)
  73. self.assertEquals(gumboc.Tag.UNKNOWN, foo.tag)
  74. self.assertEquals('<foo bar=quux>', str(foo.original_tag))
  75. self.assertEquals('', str(foo.original_end_tag))
  76. self.assertEquals('foo', foo.tag_name.decode('utf-8'))
  77. self.assertEquals('bar', foo.attributes[0].name)
  78. self.assertEquals('quux', foo.attributes[0].value)
  79. def testSarcasm(self):
  80. with gumboc.parse('<div><sarcasm><div></div></sarcasm></div>') as output:
  81. root = output.contents.root.contents
  82. body = root.children[1]
  83. div = body.children[0]
  84. sarcasm = div.children[0]
  85. self.assertEquals(gumboc.NodeType.ELEMENT, sarcasm.type)
  86. self.assertEquals(gumboc.Tag.UNKNOWN, sarcasm.tag)
  87. self.assertEquals('<sarcasm>', str(sarcasm.original_tag))
  88. self.assertEquals('</sarcasm>', str(sarcasm.original_end_tag))
  89. self.assertEquals('sarcasm', sarcasm.tag_name.decode('utf-8'))
  90. def testEnums(self):
  91. self.assertEquals(gumboc.Tag.A, gumboc.Tag.A)
  92. self.assertEquals(hash(gumboc.Tag.A.value), hash(gumboc.Tag.A))
  93. def testFragment(self):
  94. with gumboc.parse(
  95. '<div></div>',
  96. fragment_context=gumboc.Tag.TITLE,
  97. fragment_namespace=gumboc.Namespace.SVG) as output:
  98. root = output.contents.root.contents
  99. self.assertEquals(1, len(root.children))
  100. div = root.children[0]
  101. self.assertEquals(gumboc.NodeType.ELEMENT, div.type)
  102. self.assertEquals(gumboc.Tag.DIV, div.tag)
  103. self.assertEquals(gumboc.Namespace.HTML, div.tag_namespace)
  104. if __name__ == '__main__':
  105. unittest.main()