thtmlparser.nim 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. discard """
  2. targets: "c js"
  3. output: '''
  4. true
  5. https://example.com/test?format=jpg&name=orig##
  6. https://example.com/test?format=jpg&name=orig##text
  7. https://example.com/test?format=jpg##text
  8. '''
  9. """
  10. import htmlparser
  11. import xmltree
  12. import strutils
  13. from streams import newStringStream
  14. import std/assertions
  15. block t2813:
  16. const
  17. html = """
  18. <html>
  19. <head>
  20. <title>Test</title>
  21. </head>
  22. <body>
  23. <table>
  24. <thead>
  25. <tr><td>A</td></tr>
  26. <tr><td>B</td></tr>
  27. </thead>
  28. <tbody>
  29. <tr><td></td>A<td></td></tr>
  30. <tr><td></td>B<td></td></tr>
  31. <tr><td></td>C<td></td></tr>
  32. </tbody>
  33. <tfoot>
  34. <tr><td>A</td></tr>
  35. </tfoot>
  36. </table>
  37. </body>
  38. </html>
  39. """
  40. var errors: seq[string] = @[]
  41. let tree = parseHtml(newStringStream(html), "test.html", errors)
  42. doAssert errors.len == 0 # Errors: </thead> expected,...
  43. var len = tree.findAll("tr").len # len = 6
  44. var rows: seq[XmlNode] = @[]
  45. for n in tree.findAll("table"):
  46. n.findAll("tr", rows) # len = 2
  47. break
  48. doAssert tree.findAll("tr").len == rows.len
  49. block t2814:
  50. ## builds the two cases below and test that
  51. ## ``//[dd,li]`` has "<p>that</p>" as children
  52. ##
  53. ## <dl>
  54. ## <dt>this</dt>
  55. ## <dd>
  56. ## <p>that</p>
  57. ## </dd>
  58. ## </dl>
  59. ##
  60. ## <ul>
  61. ## <li>
  62. ## <p>that</p>
  63. ## </li>
  64. ## </ul>
  65. for ltype in [["dl","dd"], ["ul","li"]]:
  66. let desc_item = if ltype[0]=="dl": "<dt>this</dt>" else: ""
  67. let item = "$1<$2><p>that</p></$2>" % [desc_item, ltype[1]]
  68. let list = """ <$1>
  69. $2
  70. </$1> """ % [ltype[0], item]
  71. var errors : seq[string] = @[]
  72. let parseH = parseHtml(newStringStream(list),"statichtml", errors =errors)
  73. if $parseH.findAll(ltype[1])[0].child("p") != "<p>that</p>":
  74. echo "case " & ltype[0] & " failed !"
  75. quit(2)
  76. echo "true"
  77. block t6154:
  78. let foo = """
  79. <!DOCTYPE html>
  80. <html>
  81. <head>
  82. <title> foobar </title>
  83. </head>
  84. <body>
  85. <p class=foo id=bar></p>
  86. <p something=&#9;foo&#9;bar&#178;></p>
  87. <p something= &#9;foo&#9;bar&#178; foo =bloo></p>
  88. <p class="foo2" id="bar2"></p>
  89. <p wrong= ></p>
  90. <p data-foo data-bar="correct!" enabled ></p>
  91. <p quux whatever></p>
  92. </body>
  93. </html>
  94. """
  95. var errors: seq[string] = @[]
  96. let html = parseHtml(newStringStream(foo), "statichtml", errors=errors)
  97. doAssert "statichtml(11, 18) Error: attribute value expected" in errors
  98. let ps = html.findAll("p")
  99. doAssert ps.len == 7
  100. doAssert ps[0].attrsLen == 2
  101. doAssert ps[0].attr("class") == "foo"
  102. doAssert ps[0].attr("id") == "bar"
  103. doAssert ps[0].len == 0
  104. doAssert ps[1].attrsLen == 1
  105. doAssert ps[1].attr("something") == "\tfoo\tbar²"
  106. doAssert ps[1].len == 0
  107. doAssert ps[2].attrsLen == 2
  108. doAssert ps[2].attr("something") == "\tfoo\tbar²"
  109. doAssert ps[2].attr("foo") == "bloo"
  110. doAssert ps[2].len == 0
  111. doAssert ps[3].attrsLen == 2
  112. doAssert ps[3].attr("class") == "foo2"
  113. doAssert ps[3].attr("id") == "bar2"
  114. doAssert ps[3].len == 0
  115. doAssert ps[4].attrsLen == 1
  116. doAssert ps[4].attr("wrong") == ""
  117. doAssert ps[5].attrsLen == 3
  118. doAssert ps[5].attr("data-foo") == ""
  119. doAssert ps[5].attr("data-bar") == "correct!"
  120. doAssert ps[5].attr("enabled") == ""
  121. doAssert ps[5].len == 0
  122. doAssert ps[6].attrsLen == 2
  123. doAssert ps[6].attr("quux") == ""
  124. doAssert ps[6].attr("whatever") == ""
  125. doAssert ps[6].len == 0
  126. # bug #11713, #1034
  127. var content = """
  128. # with &
  129. <img src="https://example.com/test?format=jpg&name=orig" alt="">
  130. <img src="https://example.com/test?format=jpg&name=orig" alt="text">
  131. # without &
  132. <img src="https://example.com/test?format=jpg" alt="text">
  133. """
  134. var
  135. stream = newStringStream(content)
  136. body = parseHtml(stream)
  137. for y in body.findAll("img"):
  138. echo y.attr("src"), "##", y.attr("alt")