Preprocessor_DOM.php 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831
  1. <?php
  2. /**
  3. * Preprocessor using PHP's dom extension
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Parser
  22. * @deprecated since 1.34, use Preprocessor_Hash
  23. */
  24. /**
  25. * @ingroup Parser
  26. */
  27. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  28. class Preprocessor_DOM extends Preprocessor {
  29. /**
  30. * @var Parser
  31. */
  32. public $parser;
  33. public $memoryLimit;
  34. const CACHE_PREFIX = 'preprocess-xml';
  35. /**
  36. * @param Parser $parser
  37. */
  38. public function __construct( $parser ) {
  39. wfDeprecated( __METHOD__, '1.34' ); // T204945
  40. $this->parser = $parser;
  41. $mem = ini_get( 'memory_limit' );
  42. $this->memoryLimit = false;
  43. if ( strval( $mem ) !== '' && $mem != -1 ) {
  44. if ( preg_match( '/^\d+$/', $mem ) ) {
  45. $this->memoryLimit = $mem;
  46. } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
  47. $this->memoryLimit = $m[1] * 1048576;
  48. }
  49. }
  50. }
  51. /**
  52. * @return PPFrame_DOM
  53. */
  54. public function newFrame() {
  55. return new PPFrame_DOM( $this );
  56. }
  57. /**
  58. * @param array $args
  59. * @return PPCustomFrame_DOM
  60. */
  61. public function newCustomFrame( $args ) {
  62. return new PPCustomFrame_DOM( $this, $args );
  63. }
  64. /**
  65. * @param array $values
  66. * @return PPNode_DOM
  67. * @throws MWException
  68. */
  69. public function newPartNodeArray( $values ) {
  70. // NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais)
  71. $xml = "<list>";
  72. foreach ( $values as $k => $val ) {
  73. if ( is_int( $k ) ) {
  74. $xml .= "<part><name index=\"$k\"/><value>"
  75. . htmlspecialchars( $val ) . "</value></part>";
  76. } else {
  77. $xml .= "<part><name>" . htmlspecialchars( $k )
  78. . "</name>=<value>" . htmlspecialchars( $val ) . "</value></part>";
  79. }
  80. }
  81. $xml .= "</list>";
  82. $dom = new DOMDocument();
  83. Wikimedia\suppressWarnings();
  84. $result = $dom->loadXML( $xml );
  85. Wikimedia\restoreWarnings();
  86. if ( !$result ) {
  87. // Try running the XML through UtfNormal to get rid of invalid characters
  88. $xml = UtfNormal\Validator::cleanUp( $xml );
  89. // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2
  90. // don't barf when the XML is >256 levels deep
  91. $result = $dom->loadXML( $xml, 1 << 19 );
  92. }
  93. if ( !$result ) {
  94. throw new MWException( 'Parameters passed to ' . __METHOD__ . ' result in invalid XML' );
  95. }
  96. $root = $dom->documentElement;
  97. $node = new PPNode_DOM( $root->childNodes );
  98. return $node;
  99. }
  100. /**
  101. * @throws MWException
  102. * @return bool
  103. */
  104. public function memCheck() {
  105. if ( $this->memoryLimit === false ) {
  106. return true;
  107. }
  108. $usage = memory_get_usage();
  109. if ( $usage > $this->memoryLimit * 0.9 ) {
  110. $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
  111. throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
  112. }
  113. return $usage <= $this->memoryLimit * 0.8;
  114. }
  115. /**
  116. * Preprocess some wikitext and return the document tree.
  117. * This is the ghost of Parser::replace_variables().
  118. *
  119. * @param string $text The text to parse
  120. * @param int $flags Bitwise combination of:
  121. * Parser::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>"
  122. * as if the text is being included. Default
  123. * is to assume a direct page view.
  124. *
  125. * The generated DOM tree must depend only on the input text and the flags.
  126. * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
  127. *
  128. * Any flag added to the $flags parameter here, or any other parameter liable to cause a
  129. * change in the DOM tree for a given text, must be passed through the section identifier
  130. * in the section edit link and thus back to extractSections().
  131. *
  132. * The output of this function is currently only cached in process memory, but a persistent
  133. * cache may be implemented at a later date which takes further advantage of these strict
  134. * dependency requirements.
  135. *
  136. * @throws MWException
  137. * @return PPNode_DOM
  138. */
  139. public function preprocessToObj( $text, $flags = 0 ) {
  140. $xml = $this->cacheGetTree( $text, $flags );
  141. if ( $xml === false ) {
  142. $xml = $this->preprocessToXml( $text, $flags );
  143. $this->cacheSetTree( $text, $flags, $xml );
  144. }
  145. // Fail if the number of elements exceeds acceptable limits
  146. // Do not attempt to generate the DOM
  147. $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' );
  148. $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
  149. if ( $this->parser->mGeneratedPPNodeCount > $max ) {
  150. // if ( $cacheable ) { ... }
  151. throw new MWException( __METHOD__ . ': generated node count limit exceeded' );
  152. }
  153. $dom = new DOMDocument;
  154. Wikimedia\suppressWarnings();
  155. $result = $dom->loadXML( $xml );
  156. Wikimedia\restoreWarnings();
  157. if ( !$result ) {
  158. // Try running the XML through UtfNormal to get rid of invalid characters
  159. $xml = UtfNormal\Validator::cleanUp( $xml );
  160. // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2
  161. // don't barf when the XML is >256 levels deep.
  162. $result = $dom->loadXML( $xml, 1 << 19 );
  163. }
  164. if ( $result ) {
  165. $obj = new PPNode_DOM( $dom->documentElement );
  166. }
  167. // if ( $cacheable ) { ... }
  168. if ( !$result ) {
  169. throw new MWException( __METHOD__ . ' generated invalid XML' );
  170. }
  171. return $obj;
  172. }
  173. /**
  174. * @param string $text
  175. * @param int $flags
  176. * @return string
  177. */
  178. public function preprocessToXml( $text, $flags = 0 ) {
  179. global $wgDisableLangConversion;
  180. $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
  181. $xmlishElements = $this->parser->getStripList();
  182. $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
  183. $enableOnlyinclude = false;
  184. if ( $forInclusion ) {
  185. $ignoredTags = [ 'includeonly', '/includeonly' ];
  186. $ignoredElements = [ 'noinclude' ];
  187. $xmlishElements[] = 'noinclude';
  188. if ( strpos( $text, '<onlyinclude>' ) !== false
  189. && strpos( $text, '</onlyinclude>' ) !== false
  190. ) {
  191. $enableOnlyinclude = true;
  192. }
  193. } else {
  194. $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
  195. $ignoredElements = [ 'includeonly' ];
  196. $xmlishElements[] = 'includeonly';
  197. }
  198. $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
  199. // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
  200. $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
  201. $stack = new PPDStack;
  202. $searchBase = "[{<\n"; # }
  203. if ( !$wgDisableLangConversion ) {
  204. $searchBase .= '-';
  205. }
  206. // For fast reverse searches
  207. $revText = strrev( $text );
  208. $lengthText = strlen( $text );
  209. // Input pointer, starts out pointing to a pseudo-newline before the start
  210. $i = 0;
  211. // Current accumulator
  212. $accum =& $stack->getAccum();
  213. $accum = '<root>';
  214. // True to find equals signs in arguments
  215. $findEquals = false;
  216. // True to take notice of pipe characters
  217. $findPipe = false;
  218. $headingIndex = 1;
  219. // True if $i is inside a possible heading
  220. $inHeading = false;
  221. // True if there are no more greater-than (>) signs right of $i
  222. $noMoreGT = false;
  223. // Map of tag name => true if there are no more closing tags of given type right of $i
  224. $noMoreClosingTag = [];
  225. // True to ignore all input up to the next <onlyinclude>
  226. $findOnlyinclude = $enableOnlyinclude;
  227. // Do a line-start run without outputting an LF character
  228. $fakeLineStart = true;
  229. while ( true ) {
  230. // $this->memCheck();
  231. if ( $findOnlyinclude ) {
  232. // Ignore all input up to the next <onlyinclude>
  233. $startPos = strpos( $text, '<onlyinclude>', $i );
  234. if ( $startPos === false ) {
  235. // Ignored section runs to the end
  236. $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
  237. break;
  238. }
  239. $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
  240. $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
  241. $i = $tagEndPos;
  242. $findOnlyinclude = false;
  243. }
  244. if ( $fakeLineStart ) {
  245. $found = 'line-start';
  246. $curChar = '';
  247. } else {
  248. # Find next opening brace, closing brace or pipe
  249. $search = $searchBase;
  250. if ( $stack->top === false ) {
  251. $currentClosing = '';
  252. } else {
  253. $currentClosing = $stack->top->close;
  254. $search .= $currentClosing;
  255. }
  256. if ( $findPipe ) {
  257. $search .= '|';
  258. }
  259. if ( $findEquals ) {
  260. // First equals will be for the template
  261. $search .= '=';
  262. }
  263. $rule = null;
  264. # Output literal section, advance input counter
  265. $literalLength = strcspn( $text, $search, $i );
  266. if ( $literalLength > 0 ) {
  267. $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
  268. $i += $literalLength;
  269. }
  270. if ( $i >= $lengthText ) {
  271. if ( $currentClosing == "\n" ) {
  272. // Do a past-the-end run to finish off the heading
  273. $curChar = '';
  274. $found = 'line-end';
  275. } else {
  276. # All done
  277. break;
  278. }
  279. } else {
  280. $curChar = $curTwoChar = $text[$i];
  281. if ( ( $i + 1 ) < $lengthText ) {
  282. $curTwoChar .= $text[$i + 1];
  283. }
  284. if ( $curChar == '|' ) {
  285. $found = 'pipe';
  286. } elseif ( $curChar == '=' ) {
  287. $found = 'equals';
  288. } elseif ( $curChar == '<' ) {
  289. $found = 'angle';
  290. } elseif ( $curChar == "\n" ) {
  291. if ( $inHeading ) {
  292. $found = 'line-end';
  293. } else {
  294. $found = 'line-start';
  295. }
  296. } elseif ( $curTwoChar == $currentClosing ) {
  297. $found = 'close';
  298. $curChar = $curTwoChar;
  299. } elseif ( $curChar == $currentClosing ) {
  300. $found = 'close';
  301. } elseif ( isset( $this->rules[$curTwoChar] ) ) {
  302. $curChar = $curTwoChar;
  303. $found = 'open';
  304. $rule = $this->rules[$curChar];
  305. } elseif ( isset( $this->rules[$curChar] ) ) {
  306. $found = 'open';
  307. $rule = $this->rules[$curChar];
  308. } else {
  309. # Some versions of PHP have a strcspn which stops on
  310. # null characters; ignore these and continue.
  311. # We also may get '-' and '}' characters here which
  312. # don't match -{ or $currentClosing. Add these to
  313. # output and continue.
  314. if ( $curChar == '-' || $curChar == '}' ) {
  315. $accum .= $curChar;
  316. }
  317. ++$i;
  318. continue;
  319. }
  320. }
  321. }
  322. if ( $found == 'angle' ) {
  323. $matches = false;
  324. // Handle </onlyinclude>
  325. if ( $enableOnlyinclude
  326. && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>'
  327. ) {
  328. $findOnlyinclude = true;
  329. continue;
  330. }
  331. // Determine element name
  332. if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
  333. // Element name missing or not listed
  334. $accum .= '&lt;';
  335. ++$i;
  336. continue;
  337. }
  338. // Handle comments
  339. if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
  340. // To avoid leaving blank lines, when a sequence of
  341. // space-separated comments is both preceded and followed by
  342. // a newline (ignoring spaces), then
  343. // trim leading and trailing spaces and the trailing newline.
  344. // Find the end
  345. $endPos = strpos( $text, '-->', $i + 4 );
  346. if ( $endPos === false ) {
  347. // Unclosed comment in input, runs to end
  348. $inner = substr( $text, $i );
  349. $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
  350. $i = $lengthText;
  351. } else {
  352. // Search backwards for leading whitespace
  353. $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0;
  354. // Search forwards for trailing whitespace
  355. // $wsEnd will be the position of the last space (or the '>' if there's none)
  356. $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
  357. // Keep looking forward as long as we're finding more
  358. // comments.
  359. $comments = [ [ $wsStart, $wsEnd ] ];
  360. while ( substr( $text, $wsEnd + 1, 4 ) == '<!--' ) {
  361. $c = strpos( $text, '-->', $wsEnd + 4 );
  362. if ( $c === false ) {
  363. break;
  364. }
  365. $c = $c + 2 + strspn( $text, " \t", $c + 3 );
  366. $comments[] = [ $wsEnd + 1, $c ];
  367. $wsEnd = $c;
  368. }
  369. // Eat the line if possible
  370. // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
  371. // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
  372. // it's a possible beneficial b/c break.
  373. if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
  374. && substr( $text, $wsEnd + 1, 1 ) == "\n"
  375. ) {
  376. // Remove leading whitespace from the end of the accumulator
  377. // Sanity check first though
  378. $wsLength = $i - $wsStart;
  379. if ( $wsLength > 0
  380. && strspn( $accum, " \t", -$wsLength ) === $wsLength
  381. ) {
  382. $accum = substr( $accum, 0, -$wsLength );
  383. }
  384. // Dump all but the last comment to the accumulator
  385. foreach ( $comments as $j => $com ) {
  386. $startPos = $com[0];
  387. $endPos = $com[1] + 1;
  388. if ( $j == ( count( $comments ) - 1 ) ) {
  389. break;
  390. }
  391. $inner = substr( $text, $startPos, $endPos - $startPos );
  392. $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
  393. }
  394. // Do a line-start run next time to look for headings after the comment
  395. $fakeLineStart = true;
  396. } else {
  397. // No line to eat, just take the comment itself
  398. $startPos = $i;
  399. $endPos += 2;
  400. }
  401. if ( $stack->top ) {
  402. $part = $stack->top->getCurrentPart();
  403. if ( !( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) ) {
  404. $part->visualEnd = $wsStart;
  405. }
  406. // Else comments abutting, no change in visual end
  407. $part->commentEnd = $endPos;
  408. }
  409. $i = $endPos + 1;
  410. $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
  411. $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
  412. }
  413. continue;
  414. }
  415. $name = $matches[1];
  416. $lowerName = strtolower( $name );
  417. $attrStart = $i + strlen( $name ) + 1;
  418. // Find end of tag
  419. $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
  420. if ( $tagEndPos === false ) {
  421. // Infinite backtrack
  422. // Disable tag search to prevent worst-case O(N^2) performance
  423. $noMoreGT = true;
  424. $accum .= '&lt;';
  425. ++$i;
  426. continue;
  427. }
  428. // Handle ignored tags
  429. if ( in_array( $lowerName, $ignoredTags ) ) {
  430. $accum .= '<ignore>'
  431. . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) )
  432. . '</ignore>';
  433. $i = $tagEndPos + 1;
  434. continue;
  435. }
  436. $tagStartPos = $i;
  437. if ( $text[$tagEndPos - 1] == '/' ) {
  438. $attrEnd = $tagEndPos - 1;
  439. $inner = null;
  440. $i = $tagEndPos + 1;
  441. $close = '';
  442. } else {
  443. $attrEnd = $tagEndPos;
  444. // Find closing tag
  445. if (
  446. !isset( $noMoreClosingTag[$name] ) &&
  447. preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
  448. $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 )
  449. ) {
  450. $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
  451. $i = $matches[0][1] + strlen( $matches[0][0] );
  452. $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
  453. } else {
  454. // No end tag
  455. if ( in_array( $name, $xmlishAllowMissingEndTag ) ) {
  456. // Let it run out to the end of the text.
  457. $inner = substr( $text, $tagEndPos + 1 );
  458. $i = $lengthText;
  459. $close = '';
  460. } else {
  461. // Don't match the tag, treat opening tag as literal and resume parsing.
  462. $i = $tagEndPos + 1;
  463. $accum .= htmlspecialchars( substr( $text, $tagStartPos, $tagEndPos + 1 - $tagStartPos ) );
  464. // Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
  465. $noMoreClosingTag[$name] = true;
  466. continue;
  467. }
  468. }
  469. }
  470. // <includeonly> and <noinclude> just become <ignore> tags
  471. if ( in_array( $lowerName, $ignoredElements ) ) {
  472. $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
  473. . '</ignore>';
  474. continue;
  475. }
  476. $accum .= '<ext>';
  477. if ( $attrEnd <= $attrStart ) {
  478. $attr = '';
  479. } else {
  480. $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
  481. }
  482. $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
  483. // Note that the attr element contains the whitespace between name and attribute,
  484. // this is necessary for precise reconstruction during pre-save transform.
  485. '<attr>' . htmlspecialchars( $attr ) . '</attr>';
  486. if ( $inner !== null ) {
  487. $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
  488. }
  489. $accum .= $close . '</ext>';
  490. } elseif ( $found == 'line-start' ) {
  491. // Is this the start of a heading?
  492. // Line break belongs before the heading element in any case
  493. if ( $fakeLineStart ) {
  494. $fakeLineStart = false;
  495. } else {
  496. $accum .= $curChar;
  497. $i++;
  498. }
  499. $count = strspn( $text, '=', $i, 6 );
  500. if ( $count == 1 && $findEquals ) {
  501. // DWIM: This looks kind of like a name/value separator.
  502. // Let's let the equals handler have it and break the
  503. // potential heading. This is heuristic, but AFAICT the
  504. // methods for completely correct disambiguation are very
  505. // complex.
  506. } elseif ( $count > 0 ) {
  507. $piece = [
  508. 'open' => "\n",
  509. 'close' => "\n",
  510. 'parts' => [ new PPDPart( str_repeat( '=', $count ) ) ],
  511. 'startPos' => $i,
  512. 'count' => $count ];
  513. $stack->push( $piece );
  514. $accum =& $stack->getAccum();
  515. $stackFlags = $stack->getFlags();
  516. if ( isset( $stackFlags['findEquals'] ) ) {
  517. $findEquals = $stackFlags['findEquals'];
  518. }
  519. if ( isset( $stackFlags['findPipe'] ) ) {
  520. $findPipe = $stackFlags['findPipe'];
  521. }
  522. if ( isset( $stackFlags['inHeading'] ) ) {
  523. $inHeading = $stackFlags['inHeading'];
  524. }
  525. $i += $count;
  526. }
  527. } elseif ( $found == 'line-end' ) {
  528. $piece = $stack->top;
  529. // A heading must be open, otherwise \n wouldn't have been in the search list
  530. // FIXME: Don't use assert()
  531. // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.assert
  532. assert( $piece->open === "\n" );
  533. $part = $piece->getCurrentPart();
  534. // Search back through the input to see if it has a proper close.
  535. // Do this using the reversed string since the other solutions
  536. // (end anchor, etc.) are inefficient.
  537. $wsLength = strspn( $revText, " \t", $lengthText - $i );
  538. $searchStart = $i - $wsLength;
  539. if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
  540. // Comment found at line end
  541. // Search for equals signs before the comment
  542. $searchStart = $part->visualEnd;
  543. $searchStart -= strspn( $revText, " \t", $lengthText - $searchStart );
  544. }
  545. $count = $piece->count;
  546. $equalsLength = strspn( $revText, '=', $lengthText - $searchStart );
  547. if ( $equalsLength > 0 ) {
  548. if ( $searchStart - $equalsLength == $piece->startPos ) {
  549. // This is just a single string of equals signs on its own line
  550. // Replicate the doHeadings behavior /={count}(.+)={count}/
  551. // First find out how many equals signs there really are (don't stop at 6)
  552. $count = $equalsLength;
  553. if ( $count < 3 ) {
  554. $count = 0;
  555. } else {
  556. $count = min( 6, intval( ( $count - 1 ) / 2 ) );
  557. }
  558. } else {
  559. $count = min( $equalsLength, $count );
  560. }
  561. if ( $count > 0 ) {
  562. // Normal match, output <h>
  563. $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
  564. $headingIndex++;
  565. } else {
  566. // Single equals sign on its own line, count=0
  567. $element = $accum;
  568. }
  569. } else {
  570. // No match, no <h>, just pass down the inner text
  571. $element = $accum;
  572. }
  573. // Unwind the stack
  574. $stack->pop();
  575. $accum =& $stack->getAccum();
  576. $stackFlags = $stack->getFlags();
  577. if ( isset( $stackFlags['findEquals'] ) ) {
  578. $findEquals = $stackFlags['findEquals'];
  579. }
  580. if ( isset( $stackFlags['findPipe'] ) ) {
  581. $findPipe = $stackFlags['findPipe'];
  582. }
  583. if ( isset( $stackFlags['inHeading'] ) ) {
  584. $inHeading = $stackFlags['inHeading'];
  585. }
  586. // Append the result to the enclosing accumulator
  587. $accum .= $element;
  588. // Note that we do NOT increment the input pointer.
  589. // This is because the closing linebreak could be the opening linebreak of
  590. // another heading. Infinite loops are avoided because the next iteration MUST
  591. // hit the heading open case above, which unconditionally increments the
  592. // input pointer.
  593. } elseif ( $found == 'open' ) {
  594. # count opening brace characters
  595. $curLen = strlen( $curChar );
  596. $count = ( $curLen > 1 ) ?
  597. # allow the final character to repeat
  598. strspn( $text, $curChar[$curLen - 1], $i + 1 ) + 1 :
  599. strspn( $text, $curChar, $i );
  600. $savedPrefix = '';
  601. $lineStart = ( $i > 0 && $text[$i - 1] == "\n" );
  602. if ( $curChar === "-{" && $count > $curLen ) {
  603. // -{ => {{ transition because rightmost wins
  604. $savedPrefix = '-';
  605. $i++;
  606. $curChar = '{';
  607. $count--;
  608. $rule = $this->rules[$curChar];
  609. }
  610. # we need to add to stack only if opening brace count is enough for one of the rules
  611. if ( $count >= $rule['min'] ) {
  612. # Add it to the stack
  613. $piece = [
  614. 'open' => $curChar,
  615. 'close' => $rule['end'],
  616. 'savedPrefix' => $savedPrefix,
  617. 'count' => $count,
  618. 'lineStart' => $lineStart,
  619. ];
  620. $stack->push( $piece );
  621. $accum =& $stack->getAccum();
  622. $stackFlags = $stack->getFlags();
  623. if ( isset( $stackFlags['findEquals'] ) ) {
  624. $findEquals = $stackFlags['findEquals'];
  625. }
  626. if ( isset( $stackFlags['findPipe'] ) ) {
  627. $findPipe = $stackFlags['findPipe'];
  628. }
  629. if ( isset( $stackFlags['inHeading'] ) ) {
  630. $inHeading = $stackFlags['inHeading'];
  631. }
  632. } else {
  633. # Add literal brace(s)
  634. $accum .= htmlspecialchars( $savedPrefix . str_repeat( $curChar, $count ) );
  635. }
  636. $i += $count;
  637. } elseif ( $found == 'close' ) {
  638. $piece = $stack->top;
  639. # lets check if there are enough characters for closing brace
  640. $maxCount = $piece->count;
  641. if ( $piece->close === '}-' && $curChar === '}' ) {
  642. $maxCount--; # don't try to match closing '-' as a '}'
  643. }
  644. $curLen = strlen( $curChar );
  645. $count = ( $curLen > 1 ) ? $curLen :
  646. strspn( $text, $curChar, $i, $maxCount );
  647. # check for maximum matching characters (if there are 5 closing
  648. # characters, we will probably need only 3 - depending on the rules)
  649. $rule = $this->rules[$piece->open];
  650. if ( $count > $rule['max'] ) {
  651. # The specified maximum exists in the callback array, unless the caller
  652. # has made an error
  653. $matchingCount = $rule['max'];
  654. } else {
  655. # Count is less than the maximum
  656. # Skip any gaps in the callback array to find the true largest match
  657. # Need to use array_key_exists not isset because the callback can be null
  658. $matchingCount = $count;
  659. while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
  660. --$matchingCount;
  661. }
  662. }
  663. if ( $matchingCount <= 0 ) {
  664. # No matching element found in callback array
  665. # Output a literal closing brace and continue
  666. $endText = substr( $text, $i, $count );
  667. $accum .= htmlspecialchars( $endText );
  668. $i += $count;
  669. continue;
  670. }
  671. $name = $rule['names'][$matchingCount];
  672. if ( $name === null ) {
  673. // No element, just literal text
  674. $endText = substr( $text, $i, $matchingCount );
  675. $element = $piece->breakSyntax( $matchingCount ) . $endText;
  676. } else {
  677. # Create XML element
  678. # Note: $parts is already XML, does not need to be encoded further
  679. $parts = $piece->parts;
  680. $title = $parts[0]->out;
  681. unset( $parts[0] );
  682. # The invocation is at the start of the line if lineStart is set in
  683. # the stack, and all opening brackets are used up.
  684. if ( $maxCount == $matchingCount &&
  685. !empty( $piece->lineStart ) &&
  686. strlen( $piece->savedPrefix ) == 0 ) {
  687. $attr = ' lineStart="1"';
  688. } else {
  689. $attr = '';
  690. }
  691. $element = "<$name$attr>";
  692. $element .= "<title>$title</title>";
  693. $argIndex = 1;
  694. foreach ( $parts as $part ) {
  695. if ( isset( $part->eqpos ) ) {
  696. $argName = substr( $part->out, 0, $part->eqpos );
  697. $argValue = substr( $part->out, $part->eqpos + 1 );
  698. $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
  699. } else {
  700. $element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>";
  701. $argIndex++;
  702. }
  703. }
  704. $element .= "</$name>";
  705. }
  706. # Advance input pointer
  707. $i += $matchingCount;
  708. # Unwind the stack
  709. $stack->pop();
  710. $accum =& $stack->getAccum();
  711. # Re-add the old stack element if it still has unmatched opening characters remaining
  712. if ( $matchingCount < $piece->count ) {
  713. $piece->parts = [ new PPDPart ];
  714. $piece->count -= $matchingCount;
  715. # do we still qualify for any callback with remaining count?
  716. $min = $this->rules[$piece->open]['min'];
  717. if ( $piece->count >= $min ) {
  718. $stack->push( $piece );
  719. $accum =& $stack->getAccum();
  720. } elseif ( $piece->count == 1 && $piece->open === '{' && $piece->savedPrefix === '-' ) {
  721. $piece->savedPrefix = '';
  722. $piece->open = '-{';
  723. $piece->count = 2;
  724. $piece->close = $this->rules[$piece->open]['end'];
  725. $stack->push( $piece );
  726. $accum =& $stack->getAccum();
  727. } else {
  728. $s = substr( $piece->open, 0, -1 );
  729. $s .= str_repeat(
  730. substr( $piece->open, -1 ),
  731. $piece->count - strlen( $s )
  732. );
  733. $accum .= $piece->savedPrefix . $s;
  734. }
  735. } elseif ( $piece->savedPrefix !== '' ) {
  736. $accum .= $piece->savedPrefix;
  737. }
  738. $stackFlags = $stack->getFlags();
  739. if ( isset( $stackFlags['findEquals'] ) ) {
  740. $findEquals = $stackFlags['findEquals'];
  741. }
  742. if ( isset( $stackFlags['findPipe'] ) ) {
  743. $findPipe = $stackFlags['findPipe'];
  744. }
  745. if ( isset( $stackFlags['inHeading'] ) ) {
  746. $inHeading = $stackFlags['inHeading'];
  747. }
  748. # Add XML element to the enclosing accumulator
  749. $accum .= $element;
  750. } elseif ( $found == 'pipe' ) {
  751. $findEquals = true; // shortcut for getFlags()
  752. $stack->addPart();
  753. $accum =& $stack->getAccum();
  754. ++$i;
  755. } elseif ( $found == 'equals' ) {
  756. $findEquals = false; // shortcut for getFlags()
  757. $stack->getCurrentPart()->eqpos = strlen( $accum );
  758. $accum .= '=';
  759. ++$i;
  760. }
  761. }
  762. # Output any remaining unclosed brackets
  763. foreach ( $stack->stack as $piece ) {
  764. $stack->rootAccum .= $piece->breakSyntax();
  765. }
  766. $stack->rootAccum .= '</root>';
  767. $xml = $stack->rootAccum;
  768. return $xml;
  769. }
  770. }