DOMTreeBuilderTest.php 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711
  1. <?php
  2. /**
  3. * @file
  4. * Test the Tree Builder.
  5. */
  6. namespace Masterminds\HTML5\Tests\Parser;
  7. use Masterminds\HTML5\Parser\Scanner;
  8. use Masterminds\HTML5\Parser\Tokenizer;
  9. use Masterminds\HTML5\Parser\DOMTreeBuilder;
  10. /**
  11. * These tests are functional, not necessarily unit tests.
  12. */
  13. class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
  14. {
  15. protected $errors = array();
  16. /**
  17. * Convenience function for parsing.
  18. */
  19. protected function parse($string, array $options = array())
  20. {
  21. $treeBuilder = new DOMTreeBuilder(false, $options);
  22. $scanner = new Scanner($string);
  23. $parser = new Tokenizer($scanner, $treeBuilder);
  24. $parser->parse();
  25. $this->errors = $treeBuilder->getErrors();
  26. return $treeBuilder->document();
  27. }
  28. /**
  29. * Utility function for parsing a fragment of HTML5.
  30. */
  31. protected function parseFragment($string)
  32. {
  33. $treeBuilder = new DOMTreeBuilder(true);
  34. $scanner = new Scanner($string);
  35. $parser = new Tokenizer($scanner, $treeBuilder);
  36. $parser->parse();
  37. $this->errors = $treeBuilder->getErrors();
  38. return $treeBuilder->fragment();
  39. }
  40. public function testDocument()
  41. {
  42. $html = '<!DOCTYPE html><html></html>';
  43. $doc = $this->parse($html);
  44. $this->assertEquals('UTF-8', $doc->encoding);
  45. $this->assertInstanceOf('\DOMDocument', $doc);
  46. $this->assertEquals('html', $doc->documentElement->tagName);
  47. $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI);
  48. }
  49. public function testBareAmpersand()
  50. {
  51. $html = "<!doctype html>
  52. <html>
  53. <body>
  54. <img src='a&b' />
  55. <img src='a&=' />
  56. <img src='a&=c' />
  57. <img src='a&=9' />
  58. </body>
  59. </html>";
  60. $doc = $this->parse($html);
  61. $this->assertEmpty($this->errors);
  62. $this->assertXmlStringEqualsXmlString('
  63. <!DOCTYPE html>
  64. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  65. <img src="a&amp;b"/>
  66. <img src="a&amp;="/>
  67. <img src="a&amp;=c"/>
  68. <img src="a&amp;=9"/>
  69. </body>
  70. </html>', $doc->saveXML());
  71. }
  72. public function testBareAmpersandNotAllowedInAttributes()
  73. {
  74. $html = "<!doctype html>
  75. <html>
  76. <body>
  77. <img src='a&' />
  78. <img src='a&+' />
  79. </body>
  80. </html>";
  81. $doc = $this->parse($html);
  82. $this->assertCount(2, $this->errors);
  83. $this->assertXmlStringEqualsXmlString('
  84. <!DOCTYPE html>
  85. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  86. <img src="a&amp;"/>
  87. <img src="a&amp;+"/>
  88. </body>
  89. </html>', $doc->saveXML());
  90. }
  91. public function testBareAmpersandNotAllowedInBody()
  92. {
  93. $html = '<!doctype html>
  94. <html>
  95. <body>
  96. a&b
  97. a&=
  98. a&=c
  99. a&=9
  100. a&+
  101. a& -- valid
  102. </body>
  103. </html>';
  104. $doc = $this->parse($html);
  105. $this->assertCount(5, $this->errors);
  106. $this->assertXmlStringEqualsXmlString('
  107. <!DOCTYPE html>
  108. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  109. a&amp;b
  110. a&amp;=
  111. a&amp;=c
  112. a&amp;=9
  113. a&amp;+
  114. a&amp; -- valid
  115. </body>
  116. </html>', $doc->saveXML());
  117. }
  118. public function testStrangeCapitalization()
  119. {
  120. $html = '<!doctype html>
  121. <html>
  122. <head>
  123. <Title>Hello, world!</TitlE>
  124. </head>
  125. <body>TheBody<script>foo</script></body>
  126. </html>';
  127. $doc = $this->parse($html);
  128. $this->assertInstanceOf('\DOMDocument', $doc);
  129. $this->assertEquals('html', $doc->documentElement->tagName);
  130. $xpath = new \DOMXPath($doc);
  131. $xpath->registerNamespace('x', 'http://www.w3.org/1999/xhtml');
  132. $this->assertEquals('Hello, world!', $xpath->query('//x:title')->item(0)->nodeValue);
  133. $this->assertEquals('foo', $xpath->query('//x:script')->item(0)->nodeValue);
  134. }
  135. public function testDocumentWithDisabledNamespaces()
  136. {
  137. $html = '<!DOCTYPE html><html></html>';
  138. $doc = $this->parse($html, array('disable_html_ns' => true));
  139. $this->assertInstanceOf('\DOMDocument', $doc);
  140. $this->assertEquals('html', $doc->documentElement->tagName);
  141. $this->assertNull($doc->documentElement->namespaceURI);
  142. }
  143. public function testDocumentWithATargetDocument()
  144. {
  145. $targetDom = new \DOMDocument();
  146. $html = '<!DOCTYPE html><html></html>';
  147. $doc = $this->parse($html, array('target_document' => $targetDom));
  148. $this->assertInstanceOf('\DOMDocument', $doc);
  149. $this->assertSame($doc, $targetDom);
  150. $this->assertEquals('html', $doc->documentElement->tagName);
  151. }
  152. public function testDocumentFakeAttrAbsence()
  153. {
  154. $html = '<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><body>foo</body></html>';
  155. $doc = $this->parse($html, array('xmlNamespaces' => true));
  156. $xp = new \DOMXPath($doc);
  157. $this->assertEquals(0, $xp->query('//@html5-php-fake-id-attribute')->length);
  158. }
  159. public function testFragment()
  160. {
  161. $html = '<div>test</div><span>test2</span>';
  162. $doc = $this->parseFragment($html);
  163. $this->assertInstanceOf('\DOMDocumentFragment', $doc);
  164. $this->assertTrue($doc->hasChildNodes());
  165. $this->assertEquals('div', $doc->childNodes->item(0)->tagName);
  166. $this->assertEquals('test', $doc->childNodes->item(0)->textContent);
  167. $this->assertEquals('span', $doc->childNodes->item(1)->tagName);
  168. $this->assertEquals('test2', $doc->childNodes->item(1)->textContent);
  169. }
  170. public function testElements()
  171. {
  172. $html = '<!DOCTYPE html><html><head><title></title></head><body></body></html>';
  173. $doc = $this->parse($html);
  174. $root = $doc->documentElement;
  175. $this->assertEquals('html', $root->tagName);
  176. $this->assertEquals('html', $root->localName);
  177. $this->assertEquals('html', $root->nodeName);
  178. $this->assertEquals(2, $root->childNodes->length);
  179. $kids = $root->childNodes;
  180. $this->assertEquals('head', $kids->item(0)->tagName);
  181. $this->assertEquals('body', $kids->item(1)->tagName);
  182. $head = $kids->item(0);
  183. $this->assertEquals(1, $head->childNodes->length);
  184. $this->assertEquals('title', $head->childNodes->item(0)->tagName);
  185. }
  186. public function testImplicitNamespaces()
  187. {
  188. $dom = $this->parse('<!DOCTYPE html><html><body><a xlink:href="bar">foo</a></body></html>');
  189. $a = $dom->getElementsByTagName('a')->item(0);
  190. $attr = $a->getAttributeNode('xlink:href');
  191. $this->assertEquals('http://www.w3.org/1999/xlink', $attr->namespaceURI);
  192. $dom = $this->parse('<!DOCTYPE html><html><body><a xml:base="bar">foo</a></body></html>');
  193. $a = $dom->getElementsByTagName('a')->item(0);
  194. $attr = $a->getAttributeNode('xml:base');
  195. $this->assertEquals('http://www.w3.org/XML/1998/namespace', $attr->namespaceURI);
  196. }
  197. public function testCustomImplicitNamespaces()
  198. {
  199. $dom = $this->parse('<!DOCTYPE html><html><body><a t:href="bar">foo</a></body></html>', array(
  200. 'implicitNamespaces' => array(
  201. 't' => 'http://www.example.com',
  202. ),
  203. ));
  204. $a = $dom->getElementsByTagName('a')->item(0);
  205. $attr = $a->getAttributeNode('t:href');
  206. $this->assertEquals('http://www.example.com', $attr->namespaceURI);
  207. $dom = $this->parse('<!DOCTYPE html><html><body><t:a>foo</t:a></body></html>', array(
  208. 'implicitNamespaces' => array(
  209. 't' => 'http://www.example.com',
  210. ),
  211. ));
  212. $list = $dom->getElementsByTagNameNS('http://www.example.com', 'a');
  213. $this->assertEquals(1, $list->length);
  214. }
  215. public function testXmlNamespaces()
  216. {
  217. $dom = $this->parse(
  218. '<!DOCTYPE html><html>
  219. <t:body xmlns:t="http://www.example.com">
  220. <a t:href="bar">foo</a>
  221. </body>
  222. <div>foo</div>
  223. </html>', array(
  224. 'xmlNamespaces' => true,
  225. ));
  226. $a = $dom->getElementsByTagName('a')->item(0);
  227. $attr = $a->getAttributeNode('t:href');
  228. $this->assertEquals('http://www.example.com', $attr->namespaceURI);
  229. $list = $dom->getElementsByTagNameNS('http://www.example.com', 'body');
  230. $this->assertEquals(1, $list->length);
  231. }
  232. public function testXmlNamespaceNesting()
  233. {
  234. $dom = $this->parse(
  235. '<!DOCTYPE html><html>
  236. <body xmlns:x="http://www.prefixed.com" id="body">
  237. <a id="bar1" xmlns="http://www.prefixed.com/bar1">
  238. <b id="bar4" xmlns="http://www.prefixed.com/bar4"><x:prefixed id="prefixed"/></b>
  239. </a>
  240. <svg id="svg"></svg>
  241. <c id="bar2" xmlns="http://www.prefixed.com/bar2"></c>
  242. <div id="div"></div>
  243. <d id="bar3"></d>
  244. <xn:d xmlns:xn="http://www.prefixed.com/xn" xmlns="http://www.prefixed.com/bar5_x" id="bar5"><x id="bar5_x"/></xn:d>
  245. </body>
  246. </html>', array(
  247. 'xmlNamespaces' => true,
  248. ));
  249. $this->assertEmpty($this->errors);
  250. $div = $dom->getElementById('div');
  251. $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI);
  252. $body = $dom->getElementById('body');
  253. $this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI);
  254. $bar1 = $dom->getElementById('bar1');
  255. $this->assertEquals('http://www.prefixed.com/bar1', $bar1->namespaceURI);
  256. $bar2 = $dom->getElementById('bar2');
  257. $this->assertEquals('http://www.prefixed.com/bar2', $bar2->namespaceURI);
  258. $bar3 = $dom->getElementById('bar3');
  259. $this->assertEquals('http://www.w3.org/1999/xhtml', $bar3->namespaceURI);
  260. $bar4 = $dom->getElementById('bar4');
  261. $this->assertEquals('http://www.prefixed.com/bar4', $bar4->namespaceURI);
  262. $svg = $dom->getElementById('svg');
  263. $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
  264. $prefixed = $dom->getElementById('prefixed');
  265. $this->assertEquals('http://www.prefixed.com', $prefixed->namespaceURI);
  266. $prefixed = $dom->getElementById('bar5');
  267. $this->assertEquals('http://www.prefixed.com/xn', $prefixed->namespaceURI);
  268. $prefixed = $dom->getElementById('bar5_x');
  269. $this->assertEquals('http://www.prefixed.com/bar5_x', $prefixed->namespaceURI);
  270. }
  271. public function testMoveNonInlineElements()
  272. {
  273. $doc = $this->parse('<p>line1<br/><hr/>line2</p>');
  274. $this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1<br/></p><hr/>line2</html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.');
  275. $doc = $this->parse('<p>line1<div>line2</div></p>');
  276. $this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1</p><div>line2</div></html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.');
  277. }
  278. public function testAttributes()
  279. {
  280. $html = "<!DOCTYPE html>
  281. <html>
  282. <head><title></title></head>
  283. <body id='a' class='b c'></body>
  284. </html>";
  285. $doc = $this->parse($html);
  286. $root = $doc->documentElement;
  287. $body = $root->GetElementsByTagName('body')->item(0);
  288. $this->assertEquals('body', $body->tagName);
  289. $this->assertTrue($body->hasAttributes());
  290. $this->assertEquals('a', $body->getAttribute('id'));
  291. $this->assertEquals('b c', $body->getAttribute('class'));
  292. $body2 = $doc->getElementById('a');
  293. $this->assertEquals('body', $body2->tagName);
  294. $this->assertEquals('a', $body2->getAttribute('id'));
  295. }
  296. public function testSVGAttributes()
  297. {
  298. $html = "<!DOCTYPE html>
  299. <html><body>
  300. <svg width='150' viewbox='2'>
  301. <rect textlength='2'/>
  302. <animatecolor>foo</animatecolor>
  303. </svg>
  304. </body></html>";
  305. $doc = $this->parse($html);
  306. $root = $doc->documentElement;
  307. $svg = $root->getElementsByTagName('svg')->item(0);
  308. $this->assertTrue($svg->hasAttribute('viewBox'));
  309. $rect = $root->getElementsByTagName('rect')->item(0);
  310. $this->assertTrue($rect->hasAttribute('textLength'));
  311. $ac = $root->getElementsByTagName('animateColor');
  312. $this->assertEquals(1, $ac->length);
  313. }
  314. public function testMathMLAttribute()
  315. {
  316. $html = '<!doctype html>
  317. <html lang="en">
  318. <body>
  319. <math>
  320. <mi>x</mi>
  321. <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus">
  322. <mo>&PlusMinus;</mo>
  323. </csymbol>
  324. <mi>y</mi>
  325. </math>
  326. </body>
  327. </html>';
  328. $doc = $this->parse($html);
  329. $root = $doc->documentElement;
  330. $csymbol = $root->getElementsByTagName('csymbol')->item(0);
  331. $this->assertTrue($csymbol->hasAttribute('definitionURL'));
  332. }
  333. public function testMissingHtmlTag()
  334. {
  335. $html = '<!DOCTYPE html><title>test</title>';
  336. $doc = $this->parse($html);
  337. $this->assertEquals('html', $doc->documentElement->tagName);
  338. $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName);
  339. }
  340. public function testComment()
  341. {
  342. $html = '<html><!--Hello World.--></html>';
  343. $doc = $this->parse($html);
  344. $comment = $doc->documentElement->childNodes->item(0);
  345. $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
  346. $this->assertEquals('Hello World.', $comment->data);
  347. $html = '<!--Hello World.--><html></html>';
  348. $doc = $this->parse($html);
  349. $comment = $doc->childNodes->item(1);
  350. $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
  351. $this->assertEquals('Hello World.', $comment->data);
  352. $comment = $doc->childNodes->item(2);
  353. $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType);
  354. $this->assertEquals('html', $comment->tagName);
  355. }
  356. public function testCDATA()
  357. {
  358. $html = '<!DOCTYPE html><html><math><![CDATA[test]]></math></html>';
  359. $doc = $this->parse($html);
  360. $wrapper = $doc->getElementsByTagName('math')->item(0);
  361. $this->assertEquals(1, $wrapper->childNodes->length);
  362. $cdata = $wrapper->childNodes->item(0);
  363. $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType);
  364. $this->assertEquals('test', $cdata->data);
  365. }
  366. public function testText()
  367. {
  368. $html = '<!DOCTYPE html><html><head></head><body><math>test</math></body></html>';
  369. $doc = $this->parse($html);
  370. $wrapper = $doc->getElementsByTagName('math')->item(0);
  371. $this->assertEquals(1, $wrapper->childNodes->length);
  372. $data = $wrapper->childNodes->item(0);
  373. $this->assertEquals(XML_TEXT_NODE, $data->nodeType);
  374. $this->assertEquals('test', $data->data);
  375. // The DomTreeBuilder has special handling for text when in before head mode.
  376. $html = '<!DOCTYPE html><html>
  377. Foo<head></head><body></body></html>';
  378. $doc = $this->parse($html);
  379. $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]);
  380. $headElement = $doc->documentElement->firstChild;
  381. $this->assertEquals('head', $headElement->tagName);
  382. }
  383. public function testParseErrors()
  384. {
  385. $html = '<!DOCTYPE html><html><math><![CDATA[test';
  386. $doc = $this->parse($html);
  387. // We're JUST testing that we can access errors. Actual testing of
  388. // error messages happen in the Tokenizer's tests.
  389. $this->assertGreaterThan(0, count($this->errors));
  390. $this->assertTrue(is_string($this->errors[0]));
  391. }
  392. public function testProcessingInstruction()
  393. {
  394. // Test the simple case, which is where PIs are inserted into the DOM.
  395. $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>');
  396. $this->assertEquals(1, $doc->documentElement->childNodes->length);
  397. $pi = $doc->documentElement->firstChild;
  398. $this->assertInstanceOf('\DOMProcessingInstruction', $pi);
  399. $this->assertEquals('foo', $pi->nodeName);
  400. $this->assertEquals('bar', $pi->data);
  401. // Leading xml PIs should be ignored.
  402. $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>');
  403. $this->assertEquals(2, $doc->childNodes->length);
  404. $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0));
  405. $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1));
  406. }
  407. public function testAutocloseP()
  408. {
  409. $html = '<!DOCTYPE html><html><body><p><figure></body></html>';
  410. $doc = $this->parse($html);
  411. $p = $doc->getElementsByTagName('p')->item(0);
  412. $this->assertEquals(0, $p->childNodes->length);
  413. $this->assertEquals('figure', $p->nextSibling->tagName);
  414. }
  415. public function testAutocloseLI()
  416. {
  417. $html = '<!doctype html>
  418. <html lang="en">
  419. <body>
  420. <ul><li>Foo<li>Bar<li>Baz</ul>
  421. </body>
  422. </html>';
  423. $doc = $this->parse($html);
  424. $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length;
  425. $this->assertEquals(3, $length);
  426. }
  427. public function testMathML()
  428. {
  429. $html = '<!doctype html>
  430. <html lang="en">
  431. <body>
  432. <math xmlns="http://www.w3.org/1998/Math/MathML">
  433. <mi>x</mi>
  434. <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus">
  435. <mo>&PlusMinus;</mo>
  436. </csymbol>
  437. <mi>y</mi>
  438. </math>
  439. </body>
  440. </html>';
  441. $doc = $this->parse($html);
  442. $math = $doc->getElementsByTagName('math')->item(0);
  443. $this->assertEquals('math', $math->tagName);
  444. $this->assertEquals('math', $math->nodeName);
  445. $this->assertEquals('math', $math->localName);
  446. $this->assertEquals('http://www.w3.org/1998/Math/MathML', $math->namespaceURI);
  447. }
  448. public function testSVG()
  449. {
  450. $html = '<!doctype html>
  451. <html lang="en">
  452. <body>
  453. <svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg">
  454. <rect width="1" height="2" x="2" fill="#d2232c" />
  455. <text font-family="Verdana" font-size="32">
  456. <textpath xlink:href="#Foo">
  457. Test Text.
  458. </textPath>
  459. </text>
  460. </svg>
  461. </body>
  462. </html>';
  463. $doc = $this->parse($html);
  464. $svg = $doc->getElementsByTagName('svg')->item(0);
  465. $this->assertEquals('svg', $svg->tagName);
  466. $this->assertEquals('svg', $svg->nodeName);
  467. $this->assertEquals('svg', $svg->localName);
  468. $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
  469. $textPath = $doc->getElementsByTagName('textPath')->item(0);
  470. $this->assertEquals('textPath', $textPath->tagName);
  471. }
  472. public function testNoScript()
  473. {
  474. $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
  475. $doc = $this->parse($html);
  476. $this->assertEmpty($this->errors);
  477. $noscript = $doc->getElementsByTagName('noscript')->item(0);
  478. $this->assertEquals('noscript', $noscript->tagName);
  479. $html = '<!DOCTYPE html><html><body><noscript><p>No JS</p></noscript></body></html>';
  480. $doc = $this->parse($html);
  481. $this->assertEmpty($this->errors);
  482. $p = $doc->getElementsByTagName('p')->item(0);
  483. $this->assertEquals('p', $p->tagName);
  484. }
  485. /**
  486. * Regression for issue #13.
  487. */
  488. public function testRegressionHTMLNoBody()
  489. {
  490. $html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
  491. $doc = $this->parse($html);
  492. $span = $doc->getElementById('test');
  493. $this->assertEmpty($this->errors);
  494. $this->assertEquals('span', $span->tagName);
  495. $this->assertEquals('Test', $span->textContent);
  496. }
  497. public function testInstructionProcessor()
  498. {
  499. $string = '<!DOCTYPE html><html><?foo bar ?></html>';
  500. $treeBuilder = new DOMTreeBuilder();
  501. $is = new InstructionProcessorMock();
  502. $treeBuilder->setInstructionProcessor($is);
  503. $scanner = new Scanner($string);
  504. $parser = new Tokenizer($scanner, $treeBuilder);
  505. $parser->parse();
  506. $dom = $treeBuilder->document();
  507. $div = $dom->getElementsByTagName('div')->item(0);
  508. $this->assertEquals(1, $is->count);
  509. $this->assertEquals('foo', $is->name);
  510. $this->assertEquals('bar ', $is->data);
  511. $this->assertEquals('div', $div->tagName);
  512. $this->assertEquals('foo', $div->textContent);
  513. }
  514. public function testSelectGroupedOptions()
  515. {
  516. $html = <<<EOM
  517. <!DOCTYPE html>
  518. <html>
  519. <head>
  520. <title>testSelectGroupedOptions</title>
  521. </head>
  522. <body>
  523. <select>
  524. <optgroup id="first" label="first">
  525. <option value="foo">foo</option>
  526. <option value="bar">bar</option>
  527. <option value="baz">baz</option>
  528. </optgroup>
  529. <optgroup id="second" label="second">
  530. <option value="lorem">lorem</option>
  531. <option value="ipsum">ipsum</option>
  532. </optgroup>
  533. </select>
  534. </body>
  535. </html>
  536. EOM;
  537. $dom = $this->parse($html);
  538. $this->assertSame(3, $dom->getElementById('first')->getElementsByTagName('option')->length);
  539. $this->assertSame(2, $dom->getElementById('second')->getElementsByTagName('option')->length);
  540. }
  541. public function testVoidTag()
  542. {
  543. $html = <<<EOM
  544. <!DOCTYPE html>
  545. <html>
  546. <head>
  547. <title>testVoidTag</title>
  548. <meta>
  549. <meta>
  550. </head>
  551. <body></body>
  552. </html>
  553. EOM;
  554. $dom = $this->parse($html);
  555. $this->assertSame(2, $dom->getElementsByTagName('meta')->length);
  556. $this->assertSame(0, $dom->getElementsByTagName('meta')->item(0)->childNodes->length);
  557. $this->assertSame(0, $dom->getElementsByTagName('meta')->item(1)->childNodes->length);
  558. }
  559. public function testIgnoreSelfClosingTag()
  560. {
  561. $html = <<<EOM
  562. <!DOCTYPE html>
  563. <html>
  564. <head>
  565. <title>testIllegalSelfClosingTag</title>
  566. </head>
  567. <body>
  568. <div /><span>Hello, World!</span></div>
  569. </body>
  570. </html>
  571. EOM;
  572. $dom = $this->parse($html);
  573. $this->assertSame(1, $dom->getElementsByTagName('div')->item(0)->childNodes->length);
  574. }
  575. public function testIAudioInParagraph()
  576. {
  577. $html = <<<EOM
  578. <!DOCTYPE html>
  579. <html>
  580. <head>
  581. <title>testIllegalSelfClosingTag</title>
  582. </head>
  583. <body>
  584. <p>
  585. <audio preload="none" controls="controls">
  586. <source src="https://example.com/test.mp3" type="audio/mpeg" />
  587. Your browser does not support the audio element.
  588. </audio>
  589. </p>
  590. </body>
  591. </html>>
  592. </html>
  593. EOM;
  594. $dom = $this->parse($html);
  595. $audio = $dom->getElementsByTagName('audio')->item(0);
  596. $this->assertSame('p', $audio->parentNode->nodeName);
  597. $this->assertSame(3, $audio->childNodes->length);
  598. }
  599. }