DOMTreeBuilderTest.php 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710
  1. <?php
  2. /**
  3. * @file
  4. * Test the Tree Builder.
  5. */
  6. namespace Masterminds\HTML5\Tests\Parser;
  7. use Masterminds\HTML5\Parser\Scanner;
  8. use Masterminds\HTML5\Parser\Tokenizer;
  9. use Masterminds\HTML5\Parser\DOMTreeBuilder;
  10. /**
  11. * These tests are functional, not necessarily unit tests.
  12. */
  13. class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
  14. {
  15. protected $errors = array();
  16. /**
  17. * Convenience function for parsing.
  18. */
  19. protected function parse($string, array $options = array())
  20. {
  21. $treeBuilder = new DOMTreeBuilder(false, $options);
  22. $scanner = new Scanner($string);
  23. $parser = new Tokenizer($scanner, $treeBuilder);
  24. $parser->parse();
  25. $this->errors = $treeBuilder->getErrors();
  26. return $treeBuilder->document();
  27. }
  28. /**
  29. * Utility function for parsing a fragment of HTML5.
  30. */
  31. protected function parseFragment($string)
  32. {
  33. $treeBuilder = new DOMTreeBuilder(true);
  34. $scanner = new Scanner($string);
  35. $parser = new Tokenizer($scanner, $treeBuilder);
  36. $parser->parse();
  37. $this->errors = $treeBuilder->getErrors();
  38. return $treeBuilder->fragment();
  39. }
  40. public function testDocument()
  41. {
  42. $html = '<!DOCTYPE html><html></html>';
  43. $doc = $this->parse($html);
  44. $this->assertInstanceOf('\DOMDocument', $doc);
  45. $this->assertEquals('html', $doc->documentElement->tagName);
  46. $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI);
  47. }
  48. public function testBareAmpersand()
  49. {
  50. $html = "<!doctype html>
  51. <html>
  52. <body>
  53. <img src='a&b' />
  54. <img src='a&=' />
  55. <img src='a&=c' />
  56. <img src='a&=9' />
  57. </body>
  58. </html>";
  59. $doc = $this->parse($html);
  60. $this->assertEmpty($this->errors);
  61. $this->assertXmlStringEqualsXmlString('
  62. <!DOCTYPE html>
  63. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  64. <img src="a&amp;b"/>
  65. <img src="a&amp;="/>
  66. <img src="a&amp;=c"/>
  67. <img src="a&amp;=9"/>
  68. </body>
  69. </html>', $doc->saveXML());
  70. }
  71. public function testBareAmpersandNotAllowedInAttributes()
  72. {
  73. $html = "<!doctype html>
  74. <html>
  75. <body>
  76. <img src='a&' />
  77. <img src='a&+' />
  78. </body>
  79. </html>";
  80. $doc = $this->parse($html);
  81. $this->assertCount(2, $this->errors);
  82. $this->assertXmlStringEqualsXmlString('
  83. <!DOCTYPE html>
  84. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  85. <img src="a&amp;"/>
  86. <img src="a&amp;+"/>
  87. </body>
  88. </html>', $doc->saveXML());
  89. }
  90. public function testBareAmpersandNotAllowedInBody()
  91. {
  92. $html = '<!doctype html>
  93. <html>
  94. <body>
  95. a&b
  96. a&=
  97. a&=c
  98. a&=9
  99. a&+
  100. a& -- valid
  101. </body>
  102. </html>';
  103. $doc = $this->parse($html);
  104. $this->assertCount(5, $this->errors);
  105. $this->assertXmlStringEqualsXmlString('
  106. <!DOCTYPE html>
  107. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  108. a&amp;b
  109. a&amp;=
  110. a&amp;=c
  111. a&amp;=9
  112. a&amp;+
  113. a&amp; -- valid
  114. </body>
  115. </html>', $doc->saveXML());
  116. }
  117. public function testStrangeCapitalization()
  118. {
  119. $html = '<!doctype html>
  120. <html>
  121. <head>
  122. <Title>Hello, world!</TitlE>
  123. </head>
  124. <body>TheBody<script>foo</script></body>
  125. </html>';
  126. $doc = $this->parse($html);
  127. $this->assertInstanceOf('\DOMDocument', $doc);
  128. $this->assertEquals('html', $doc->documentElement->tagName);
  129. $xpath = new \DOMXPath($doc);
  130. $xpath->registerNamespace('x', 'http://www.w3.org/1999/xhtml');
  131. $this->assertEquals('Hello, world!', $xpath->query('//x:title')->item(0)->nodeValue);
  132. $this->assertEquals('foo', $xpath->query('//x:script')->item(0)->nodeValue);
  133. }
  134. public function testDocumentWithDisabledNamespaces()
  135. {
  136. $html = '<!DOCTYPE html><html></html>';
  137. $doc = $this->parse($html, array('disable_html_ns' => true));
  138. $this->assertInstanceOf('\DOMDocument', $doc);
  139. $this->assertEquals('html', $doc->documentElement->tagName);
  140. $this->assertNull($doc->documentElement->namespaceURI);
  141. }
  142. public function testDocumentWithATargetDocument()
  143. {
  144. $targetDom = new \DOMDocument();
  145. $html = '<!DOCTYPE html><html></html>';
  146. $doc = $this->parse($html, array('target_document' => $targetDom));
  147. $this->assertInstanceOf('\DOMDocument', $doc);
  148. $this->assertSame($doc, $targetDom);
  149. $this->assertEquals('html', $doc->documentElement->tagName);
  150. }
  151. public function testDocumentFakeAttrAbsence()
  152. {
  153. $html = '<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><body>foo</body></html>';
  154. $doc = $this->parse($html, array('xmlNamespaces' => true));
  155. $xp = new \DOMXPath($doc);
  156. $this->assertEquals(0, $xp->query('//@html5-php-fake-id-attribute')->length);
  157. }
  158. public function testFragment()
  159. {
  160. $html = '<div>test</div><span>test2</span>';
  161. $doc = $this->parseFragment($html);
  162. $this->assertInstanceOf('\DOMDocumentFragment', $doc);
  163. $this->assertTrue($doc->hasChildNodes());
  164. $this->assertEquals('div', $doc->childNodes->item(0)->tagName);
  165. $this->assertEquals('test', $doc->childNodes->item(0)->textContent);
  166. $this->assertEquals('span', $doc->childNodes->item(1)->tagName);
  167. $this->assertEquals('test2', $doc->childNodes->item(1)->textContent);
  168. }
  169. public function testElements()
  170. {
  171. $html = '<!DOCTYPE html><html><head><title></title></head><body></body></html>';
  172. $doc = $this->parse($html);
  173. $root = $doc->documentElement;
  174. $this->assertEquals('html', $root->tagName);
  175. $this->assertEquals('html', $root->localName);
  176. $this->assertEquals('html', $root->nodeName);
  177. $this->assertEquals(2, $root->childNodes->length);
  178. $kids = $root->childNodes;
  179. $this->assertEquals('head', $kids->item(0)->tagName);
  180. $this->assertEquals('body', $kids->item(1)->tagName);
  181. $head = $kids->item(0);
  182. $this->assertEquals(1, $head->childNodes->length);
  183. $this->assertEquals('title', $head->childNodes->item(0)->tagName);
  184. }
  185. public function testImplicitNamespaces()
  186. {
  187. $dom = $this->parse('<!DOCTYPE html><html><body><a xlink:href="bar">foo</a></body></html>');
  188. $a = $dom->getElementsByTagName('a')->item(0);
  189. $attr = $a->getAttributeNode('xlink:href');
  190. $this->assertEquals('http://www.w3.org/1999/xlink', $attr->namespaceURI);
  191. $dom = $this->parse('<!DOCTYPE html><html><body><a xml:base="bar">foo</a></body></html>');
  192. $a = $dom->getElementsByTagName('a')->item(0);
  193. $attr = $a->getAttributeNode('xml:base');
  194. $this->assertEquals('http://www.w3.org/XML/1998/namespace', $attr->namespaceURI);
  195. }
  196. public function testCustomImplicitNamespaces()
  197. {
  198. $dom = $this->parse('<!DOCTYPE html><html><body><a t:href="bar">foo</a></body></html>', array(
  199. 'implicitNamespaces' => array(
  200. 't' => 'http://www.example.com',
  201. ),
  202. ));
  203. $a = $dom->getElementsByTagName('a')->item(0);
  204. $attr = $a->getAttributeNode('t:href');
  205. $this->assertEquals('http://www.example.com', $attr->namespaceURI);
  206. $dom = $this->parse('<!DOCTYPE html><html><body><t:a>foo</t:a></body></html>', array(
  207. 'implicitNamespaces' => array(
  208. 't' => 'http://www.example.com',
  209. ),
  210. ));
  211. $list = $dom->getElementsByTagNameNS('http://www.example.com', 'a');
  212. $this->assertEquals(1, $list->length);
  213. }
  214. public function testXmlNamespaces()
  215. {
  216. $dom = $this->parse(
  217. '<!DOCTYPE html><html>
  218. <t:body xmlns:t="http://www.example.com">
  219. <a t:href="bar">foo</a>
  220. </body>
  221. <div>foo</div>
  222. </html>', array(
  223. 'xmlNamespaces' => true,
  224. ));
  225. $a = $dom->getElementsByTagName('a')->item(0);
  226. $attr = $a->getAttributeNode('t:href');
  227. $this->assertEquals('http://www.example.com', $attr->namespaceURI);
  228. $list = $dom->getElementsByTagNameNS('http://www.example.com', 'body');
  229. $this->assertEquals(1, $list->length);
  230. }
  231. public function testXmlNamespaceNesting()
  232. {
  233. $dom = $this->parse(
  234. '<!DOCTYPE html><html>
  235. <body xmlns:x="http://www.prefixed.com" id="body">
  236. <a id="bar1" xmlns="http://www.prefixed.com/bar1">
  237. <b id="bar4" xmlns="http://www.prefixed.com/bar4"><x:prefixed id="prefixed"/></b>
  238. </a>
  239. <svg id="svg"></svg>
  240. <c id="bar2" xmlns="http://www.prefixed.com/bar2"></c>
  241. <div id="div"></div>
  242. <d id="bar3"></d>
  243. <xn:d xmlns:xn="http://www.prefixed.com/xn" xmlns="http://www.prefixed.com/bar5_x" id="bar5"><x id="bar5_x"/></xn:d>
  244. </body>
  245. </html>', array(
  246. 'xmlNamespaces' => true,
  247. ));
  248. $this->assertEmpty($this->errors);
  249. $div = $dom->getElementById('div');
  250. $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI);
  251. $body = $dom->getElementById('body');
  252. $this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI);
  253. $bar1 = $dom->getElementById('bar1');
  254. $this->assertEquals('http://www.prefixed.com/bar1', $bar1->namespaceURI);
  255. $bar2 = $dom->getElementById('bar2');
  256. $this->assertEquals('http://www.prefixed.com/bar2', $bar2->namespaceURI);
  257. $bar3 = $dom->getElementById('bar3');
  258. $this->assertEquals('http://www.w3.org/1999/xhtml', $bar3->namespaceURI);
  259. $bar4 = $dom->getElementById('bar4');
  260. $this->assertEquals('http://www.prefixed.com/bar4', $bar4->namespaceURI);
  261. $svg = $dom->getElementById('svg');
  262. $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
  263. $prefixed = $dom->getElementById('prefixed');
  264. $this->assertEquals('http://www.prefixed.com', $prefixed->namespaceURI);
  265. $prefixed = $dom->getElementById('bar5');
  266. $this->assertEquals('http://www.prefixed.com/xn', $prefixed->namespaceURI);
  267. $prefixed = $dom->getElementById('bar5_x');
  268. $this->assertEquals('http://www.prefixed.com/bar5_x', $prefixed->namespaceURI);
  269. }
  270. public function testMoveNonInlineElements()
  271. {
  272. $doc = $this->parse('<p>line1<br/><hr/>line2</p>');
  273. $this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1<br/></p><hr/>line2</html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.');
  274. $doc = $this->parse('<p>line1<div>line2</div></p>');
  275. $this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1</p><div>line2</div></html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.');
  276. }
  277. public function testAttributes()
  278. {
  279. $html = "<!DOCTYPE html>
  280. <html>
  281. <head><title></title></head>
  282. <body id='a' class='b c'></body>
  283. </html>";
  284. $doc = $this->parse($html);
  285. $root = $doc->documentElement;
  286. $body = $root->GetElementsByTagName('body')->item(0);
  287. $this->assertEquals('body', $body->tagName);
  288. $this->assertTrue($body->hasAttributes());
  289. $this->assertEquals('a', $body->getAttribute('id'));
  290. $this->assertEquals('b c', $body->getAttribute('class'));
  291. $body2 = $doc->getElementById('a');
  292. $this->assertEquals('body', $body2->tagName);
  293. $this->assertEquals('a', $body2->getAttribute('id'));
  294. }
  295. public function testSVGAttributes()
  296. {
  297. $html = "<!DOCTYPE html>
  298. <html><body>
  299. <svg width='150' viewbox='2'>
  300. <rect textlength='2'/>
  301. <animatecolor>foo</animatecolor>
  302. </svg>
  303. </body></html>";
  304. $doc = $this->parse($html);
  305. $root = $doc->documentElement;
  306. $svg = $root->getElementsByTagName('svg')->item(0);
  307. $this->assertTrue($svg->hasAttribute('viewBox'));
  308. $rect = $root->getElementsByTagName('rect')->item(0);
  309. $this->assertTrue($rect->hasAttribute('textLength'));
  310. $ac = $root->getElementsByTagName('animateColor');
  311. $this->assertEquals(1, $ac->length);
  312. }
  313. public function testMathMLAttribute()
  314. {
  315. $html = '<!doctype html>
  316. <html lang="en">
  317. <body>
  318. <math>
  319. <mi>x</mi>
  320. <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus">
  321. <mo>&PlusMinus;</mo>
  322. </csymbol>
  323. <mi>y</mi>
  324. </math>
  325. </body>
  326. </html>';
  327. $doc = $this->parse($html);
  328. $root = $doc->documentElement;
  329. $csymbol = $root->getElementsByTagName('csymbol')->item(0);
  330. $this->assertTrue($csymbol->hasAttribute('definitionURL'));
  331. }
  332. public function testMissingHtmlTag()
  333. {
  334. $html = '<!DOCTYPE html><title>test</title>';
  335. $doc = $this->parse($html);
  336. $this->assertEquals('html', $doc->documentElement->tagName);
  337. $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName);
  338. }
  339. public function testComment()
  340. {
  341. $html = '<html><!--Hello World.--></html>';
  342. $doc = $this->parse($html);
  343. $comment = $doc->documentElement->childNodes->item(0);
  344. $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
  345. $this->assertEquals('Hello World.', $comment->data);
  346. $html = '<!--Hello World.--><html></html>';
  347. $doc = $this->parse($html);
  348. $comment = $doc->childNodes->item(1);
  349. $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
  350. $this->assertEquals('Hello World.', $comment->data);
  351. $comment = $doc->childNodes->item(2);
  352. $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType);
  353. $this->assertEquals('html', $comment->tagName);
  354. }
  355. public function testCDATA()
  356. {
  357. $html = '<!DOCTYPE html><html><math><![CDATA[test]]></math></html>';
  358. $doc = $this->parse($html);
  359. $wrapper = $doc->getElementsByTagName('math')->item(0);
  360. $this->assertEquals(1, $wrapper->childNodes->length);
  361. $cdata = $wrapper->childNodes->item(0);
  362. $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType);
  363. $this->assertEquals('test', $cdata->data);
  364. }
  365. public function testText()
  366. {
  367. $html = '<!DOCTYPE html><html><head></head><body><math>test</math></body></html>';
  368. $doc = $this->parse($html);
  369. $wrapper = $doc->getElementsByTagName('math')->item(0);
  370. $this->assertEquals(1, $wrapper->childNodes->length);
  371. $data = $wrapper->childNodes->item(0);
  372. $this->assertEquals(XML_TEXT_NODE, $data->nodeType);
  373. $this->assertEquals('test', $data->data);
  374. // The DomTreeBuilder has special handling for text when in before head mode.
  375. $html = '<!DOCTYPE html><html>
  376. Foo<head></head><body></body></html>';
  377. $doc = $this->parse($html);
  378. $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]);
  379. $headElement = $doc->documentElement->firstChild;
  380. $this->assertEquals('head', $headElement->tagName);
  381. }
  382. public function testParseErrors()
  383. {
  384. $html = '<!DOCTYPE html><html><math><![CDATA[test';
  385. $doc = $this->parse($html);
  386. // We're JUST testing that we can access errors. Actual testing of
  387. // error messages happen in the Tokenizer's tests.
  388. $this->assertGreaterThan(0, count($this->errors));
  389. $this->assertTrue(is_string($this->errors[0]));
  390. }
  391. public function testProcessingInstruction()
  392. {
  393. // Test the simple case, which is where PIs are inserted into the DOM.
  394. $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>');
  395. $this->assertEquals(1, $doc->documentElement->childNodes->length);
  396. $pi = $doc->documentElement->firstChild;
  397. $this->assertInstanceOf('\DOMProcessingInstruction', $pi);
  398. $this->assertEquals('foo', $pi->nodeName);
  399. $this->assertEquals('bar', $pi->data);
  400. // Leading xml PIs should be ignored.
  401. $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>');
  402. $this->assertEquals(2, $doc->childNodes->length);
  403. $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0));
  404. $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1));
  405. }
  406. public function testAutocloseP()
  407. {
  408. $html = '<!DOCTYPE html><html><body><p><figure></body></html>';
  409. $doc = $this->parse($html);
  410. $p = $doc->getElementsByTagName('p')->item(0);
  411. $this->assertEquals(0, $p->childNodes->length);
  412. $this->assertEquals('figure', $p->nextSibling->tagName);
  413. }
  414. public function testAutocloseLI()
  415. {
  416. $html = '<!doctype html>
  417. <html lang="en">
  418. <body>
  419. <ul><li>Foo<li>Bar<li>Baz</ul>
  420. </body>
  421. </html>';
  422. $doc = $this->parse($html);
  423. $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length;
  424. $this->assertEquals(3, $length);
  425. }
  426. public function testMathML()
  427. {
  428. $html = '<!doctype html>
  429. <html lang="en">
  430. <body>
  431. <math xmlns="http://www.w3.org/1998/Math/MathML">
  432. <mi>x</mi>
  433. <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus">
  434. <mo>&PlusMinus;</mo>
  435. </csymbol>
  436. <mi>y</mi>
  437. </math>
  438. </body>
  439. </html>';
  440. $doc = $this->parse($html);
  441. $math = $doc->getElementsByTagName('math')->item(0);
  442. $this->assertEquals('math', $math->tagName);
  443. $this->assertEquals('math', $math->nodeName);
  444. $this->assertEquals('math', $math->localName);
  445. $this->assertEquals('http://www.w3.org/1998/Math/MathML', $math->namespaceURI);
  446. }
  447. public function testSVG()
  448. {
  449. $html = '<!doctype html>
  450. <html lang="en">
  451. <body>
  452. <svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg">
  453. <rect width="1" height="2" x="2" fill="#d2232c" />
  454. <text font-family="Verdana" font-size="32">
  455. <textpath xlink:href="#Foo">
  456. Test Text.
  457. </textPath>
  458. </text>
  459. </svg>
  460. </body>
  461. </html>';
  462. $doc = $this->parse($html);
  463. $svg = $doc->getElementsByTagName('svg')->item(0);
  464. $this->assertEquals('svg', $svg->tagName);
  465. $this->assertEquals('svg', $svg->nodeName);
  466. $this->assertEquals('svg', $svg->localName);
  467. $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
  468. $textPath = $doc->getElementsByTagName('textPath')->item(0);
  469. $this->assertEquals('textPath', $textPath->tagName);
  470. }
  471. public function testNoScript()
  472. {
  473. $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
  474. $doc = $this->parse($html);
  475. $this->assertEmpty($this->errors);
  476. $noscript = $doc->getElementsByTagName('noscript')->item(0);
  477. $this->assertEquals('noscript', $noscript->tagName);
  478. $html = '<!DOCTYPE html><html><body><noscript><p>No JS</p></noscript></body></html>';
  479. $doc = $this->parse($html);
  480. $this->assertEmpty($this->errors);
  481. $p = $doc->getElementsByTagName('p')->item(0);
  482. $this->assertEquals('p', $p->tagName);
  483. }
  484. /**
  485. * Regression for issue #13.
  486. */
  487. public function testRegressionHTMLNoBody()
  488. {
  489. $html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
  490. $doc = $this->parse($html);
  491. $span = $doc->getElementById('test');
  492. $this->assertEmpty($this->errors);
  493. $this->assertEquals('span', $span->tagName);
  494. $this->assertEquals('Test', $span->textContent);
  495. }
  496. public function testInstructionProcessor()
  497. {
  498. $string = '<!DOCTYPE html><html><?foo bar ?></html>';
  499. $treeBuilder = new DOMTreeBuilder();
  500. $is = new InstructionProcessorMock();
  501. $treeBuilder->setInstructionProcessor($is);
  502. $scanner = new Scanner($string);
  503. $parser = new Tokenizer($scanner, $treeBuilder);
  504. $parser->parse();
  505. $dom = $treeBuilder->document();
  506. $div = $dom->getElementsByTagName('div')->item(0);
  507. $this->assertEquals(1, $is->count);
  508. $this->assertEquals('foo', $is->name);
  509. $this->assertEquals('bar ', $is->data);
  510. $this->assertEquals('div', $div->tagName);
  511. $this->assertEquals('foo', $div->textContent);
  512. }
  513. public function testSelectGroupedOptions()
  514. {
  515. $html = <<<EOM
  516. <!DOCTYPE html>
  517. <html>
  518. <head>
  519. <title>testSelectGroupedOptions</title>
  520. </head>
  521. <body>
  522. <select>
  523. <optgroup id="first" label="first">
  524. <option value="foo">foo</option>
  525. <option value="bar">bar</option>
  526. <option value="baz">baz</option>
  527. </optgroup>
  528. <optgroup id="second" label="second">
  529. <option value="lorem">lorem</option>
  530. <option value="ipsum">ipsum</option>
  531. </optgroup>
  532. </select>
  533. </body>
  534. </html>
  535. EOM;
  536. $dom = $this->parse($html);
  537. $this->assertSame(3, $dom->getElementById('first')->getElementsByTagName('option')->length);
  538. $this->assertSame(2, $dom->getElementById('second')->getElementsByTagName('option')->length);
  539. }
  540. public function testVoidTag()
  541. {
  542. $html = <<<EOM
  543. <!DOCTYPE html>
  544. <html>
  545. <head>
  546. <title>testVoidTag</title>
  547. <meta>
  548. <meta>
  549. </head>
  550. <body></body>
  551. </html>
  552. EOM;
  553. $dom = $this->parse($html);
  554. $this->assertSame(2, $dom->getElementsByTagName('meta')->length);
  555. $this->assertSame(0, $dom->getElementsByTagName('meta')->item(0)->childNodes->length);
  556. $this->assertSame(0, $dom->getElementsByTagName('meta')->item(1)->childNodes->length);
  557. }
  558. public function testIgnoreSelfClosingTag()
  559. {
  560. $html = <<<EOM
  561. <!DOCTYPE html>
  562. <html>
  563. <head>
  564. <title>testIllegalSelfClosingTag</title>
  565. </head>
  566. <body>
  567. <div /><span>Hello, World!</span></div>
  568. </body>
  569. </html>
  570. EOM;
  571. $dom = $this->parse($html);
  572. $this->assertSame(1, $dom->getElementsByTagName('div')->item(0)->childNodes->length);
  573. }
  574. public function testIAudioInParagraph()
  575. {
  576. $html = <<<EOM
  577. <!DOCTYPE html>
  578. <html>
  579. <head>
  580. <title>testIllegalSelfClosingTag</title>
  581. </head>
  582. <body>
  583. <p>
  584. <audio preload="none" controls="controls">
  585. <source src="https://example.com/test.mp3" type="audio/mpeg" />
  586. Your browser does not support the audio element.
  587. </audio>
  588. </p>
  589. </body>
  590. </html>>
  591. </html>
  592. EOM;
  593. $dom = $this->parse($html);
  594. $audio = $dom->getElementsByTagName('audio')->item(0);
  595. $this->assertSame('p', $audio->parentNode->nodeName);
  596. $this->assertSame(3, $audio->childNodes->length);
  597. }
  598. }