Preprocessor_Hash.php 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259
  1. <?php
  2. /**
  3. * Preprocessor using PHP arrays
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Parser
  22. */
  23. /**
  24. * Differences from DOM schema:
  25. * * attribute nodes are children
  26. * * "<h>" nodes that aren't at the top are replaced with <possible-h>
  27. *
  28. * Nodes are stored in a recursive array data structure. A node store is an
  29. * array where each element may be either a scalar (representing a text node)
  30. * or a "descriptor", which is a two-element array where the first element is
  31. * the node name and the second element is the node store for the children.
  32. *
  33. * Attributes are represented as children that have a node name starting with
  34. * "@", and a single text node child.
  35. *
  36. * @todo: Consider replacing descriptor arrays with objects of a new class.
  37. * Benchmark and measure resulting memory impact.
  38. *
  39. * @ingroup Parser
  40. */
  41. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  42. class Preprocessor_Hash extends Preprocessor {
  43. /**
  44. * @var Parser
  45. */
  46. public $parser;
  47. const CACHE_PREFIX = 'preprocess-hash';
  48. const CACHE_VERSION = 2;
  49. public function __construct( $parser ) {
  50. $this->parser = $parser;
  51. }
  52. /**
  53. * @return PPFrame_Hash
  54. */
  55. public function newFrame() {
  56. return new PPFrame_Hash( $this );
  57. }
  58. /**
  59. * @param array $args
  60. * @return PPCustomFrame_Hash
  61. */
  62. public function newCustomFrame( $args ) {
  63. return new PPCustomFrame_Hash( $this, $args );
  64. }
  65. /**
  66. * @param array $values
  67. * @return PPNode_Hash_Array
  68. */
  69. public function newPartNodeArray( $values ) {
  70. $list = [];
  71. foreach ( $values as $k => $val ) {
  72. if ( is_int( $k ) ) {
  73. $store = [ [ 'part', [
  74. [ 'name', [ [ '@index', [ $k ] ] ] ],
  75. [ 'value', [ strval( $val ) ] ],
  76. ] ] ];
  77. } else {
  78. $store = [ [ 'part', [
  79. [ 'name', [ strval( $k ) ] ],
  80. '=',
  81. [ 'value', [ strval( $val ) ] ],
  82. ] ] ];
  83. }
  84. $list[] = new PPNode_Hash_Tree( $store, 0 );
  85. }
  86. $node = new PPNode_Hash_Array( $list );
  87. return $node;
  88. }
  89. /**
  90. * Preprocess some wikitext and return the document tree.
  91. *
  92. * @param string $text The text to parse
  93. * @param int $flags Bitwise combination of:
  94. * Parser::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being
  95. * included. Default is to assume a direct page view.
  96. *
  97. * The generated DOM tree must depend only on the input text and the flags.
  98. * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
  99. *
  100. * Any flag added to the $flags parameter here, or any other parameter liable to cause a
  101. * change in the DOM tree for a given text, must be passed through the section identifier
  102. * in the section edit link and thus back to extractSections().
  103. *
  104. * @throws MWException
  105. * @return PPNode_Hash_Tree
  106. */
  107. public function preprocessToObj( $text, $flags = 0 ) {
  108. global $wgDisableLangConversion;
  109. $tree = $this->cacheGetTree( $text, $flags );
  110. if ( $tree !== false ) {
  111. $store = json_decode( $tree );
  112. if ( is_array( $store ) ) {
  113. return new PPNode_Hash_Tree( $store, 0 );
  114. }
  115. }
  116. $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
  117. $xmlishElements = $this->parser->getStripList();
  118. $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
  119. $enableOnlyinclude = false;
  120. if ( $forInclusion ) {
  121. $ignoredTags = [ 'includeonly', '/includeonly' ];
  122. $ignoredElements = [ 'noinclude' ];
  123. $xmlishElements[] = 'noinclude';
  124. if ( strpos( $text, '<onlyinclude>' ) !== false
  125. && strpos( $text, '</onlyinclude>' ) !== false
  126. ) {
  127. $enableOnlyinclude = true;
  128. }
  129. } else {
  130. $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
  131. $ignoredElements = [ 'includeonly' ];
  132. $xmlishElements[] = 'includeonly';
  133. }
  134. $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
  135. // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
  136. $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
  137. $stack = new PPDStack_Hash;
  138. $searchBase = "[{<\n";
  139. if ( !$wgDisableLangConversion ) {
  140. $searchBase .= '-';
  141. }
  142. // For fast reverse searches
  143. $revText = strrev( $text );
  144. $lengthText = strlen( $text );
  145. // Input pointer, starts out pointing to a pseudo-newline before the start
  146. $i = 0;
  147. // Current accumulator. See the doc comment for Preprocessor_Hash for the format.
  148. $accum =& $stack->getAccum();
  149. // True to find equals signs in arguments
  150. $findEquals = false;
  151. // True to take notice of pipe characters
  152. $findPipe = false;
  153. $headingIndex = 1;
  154. // True if $i is inside a possible heading
  155. $inHeading = false;
  156. // True if there are no more greater-than (>) signs right of $i
  157. $noMoreGT = false;
  158. // Map of tag name => true if there are no more closing tags of given type right of $i
  159. $noMoreClosingTag = [];
  160. // True to ignore all input up to the next <onlyinclude>
  161. $findOnlyinclude = $enableOnlyinclude;
  162. // Do a line-start run without outputting an LF character
  163. $fakeLineStart = true;
  164. while ( true ) {
  165. // $this->memCheck();
  166. if ( $findOnlyinclude ) {
  167. // Ignore all input up to the next <onlyinclude>
  168. $startPos = strpos( $text, '<onlyinclude>', $i );
  169. if ( $startPos === false ) {
  170. // Ignored section runs to the end
  171. $accum[] = [ 'ignore', [ substr( $text, $i ) ] ];
  172. break;
  173. }
  174. $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
  175. $accum[] = [ 'ignore', [ substr( $text, $i, $tagEndPos - $i ) ] ];
  176. $i = $tagEndPos;
  177. $findOnlyinclude = false;
  178. }
  179. if ( $fakeLineStart ) {
  180. $found = 'line-start';
  181. $curChar = '';
  182. } else {
  183. # Find next opening brace, closing brace or pipe
  184. $search = $searchBase;
  185. if ( $stack->top === false ) {
  186. $currentClosing = '';
  187. } else {
  188. $currentClosing = $stack->top->close;
  189. $search .= $currentClosing;
  190. }
  191. if ( $findPipe ) {
  192. $search .= '|';
  193. }
  194. if ( $findEquals ) {
  195. // First equals will be for the template
  196. $search .= '=';
  197. }
  198. $rule = null;
  199. # Output literal section, advance input counter
  200. $literalLength = strcspn( $text, $search, $i );
  201. if ( $literalLength > 0 ) {
  202. self::addLiteral( $accum, substr( $text, $i, $literalLength ) );
  203. $i += $literalLength;
  204. }
  205. if ( $i >= $lengthText ) {
  206. if ( $currentClosing == "\n" ) {
  207. // Do a past-the-end run to finish off the heading
  208. $curChar = '';
  209. $found = 'line-end';
  210. } else {
  211. # All done
  212. break;
  213. }
  214. } else {
  215. $curChar = $curTwoChar = $text[$i];
  216. if ( ( $i + 1 ) < $lengthText ) {
  217. $curTwoChar .= $text[$i + 1];
  218. }
  219. if ( $curChar == '|' ) {
  220. $found = 'pipe';
  221. } elseif ( $curChar == '=' ) {
  222. $found = 'equals';
  223. } elseif ( $curChar == '<' ) {
  224. $found = 'angle';
  225. } elseif ( $curChar == "\n" ) {
  226. if ( $inHeading ) {
  227. $found = 'line-end';
  228. } else {
  229. $found = 'line-start';
  230. }
  231. } elseif ( $curTwoChar == $currentClosing ) {
  232. $found = 'close';
  233. $curChar = $curTwoChar;
  234. } elseif ( $curChar == $currentClosing ) {
  235. $found = 'close';
  236. } elseif ( isset( $this->rules[$curTwoChar] ) ) {
  237. $curChar = $curTwoChar;
  238. $found = 'open';
  239. $rule = $this->rules[$curChar];
  240. } elseif ( isset( $this->rules[$curChar] ) ) {
  241. $found = 'open';
  242. $rule = $this->rules[$curChar];
  243. } else {
  244. # Some versions of PHP have a strcspn which stops on
  245. # null characters; ignore these and continue.
  246. # We also may get '-' and '}' characters here which
  247. # don't match -{ or $currentClosing. Add these to
  248. # output and continue.
  249. if ( $curChar == '-' || $curChar == '}' ) {
  250. self::addLiteral( $accum, $curChar );
  251. }
  252. ++$i;
  253. continue;
  254. }
  255. }
  256. }
  257. if ( $found == 'angle' ) {
  258. $matches = false;
  259. // Handle </onlyinclude>
  260. if ( $enableOnlyinclude
  261. && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>'
  262. ) {
  263. $findOnlyinclude = true;
  264. continue;
  265. }
  266. // Determine element name
  267. if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
  268. // Element name missing or not listed
  269. self::addLiteral( $accum, '<' );
  270. ++$i;
  271. continue;
  272. }
  273. // Handle comments
  274. if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
  275. // To avoid leaving blank lines, when a sequence of
  276. // space-separated comments is both preceded and followed by
  277. // a newline (ignoring spaces), then
  278. // trim leading and trailing spaces and the trailing newline.
  279. // Find the end
  280. $endPos = strpos( $text, '-->', $i + 4 );
  281. if ( $endPos === false ) {
  282. // Unclosed comment in input, runs to end
  283. $inner = substr( $text, $i );
  284. $accum[] = [ 'comment', [ $inner ] ];
  285. $i = $lengthText;
  286. } else {
  287. // Search backwards for leading whitespace
  288. $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0;
  289. // Search forwards for trailing whitespace
  290. // $wsEnd will be the position of the last space (or the '>' if there's none)
  291. $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
  292. // Keep looking forward as long as we're finding more
  293. // comments.
  294. $comments = [ [ $wsStart, $wsEnd ] ];
  295. while ( substr( $text, $wsEnd + 1, 4 ) == '<!--' ) {
  296. $c = strpos( $text, '-->', $wsEnd + 4 );
  297. if ( $c === false ) {
  298. break;
  299. }
  300. $c = $c + 2 + strspn( $text, " \t", $c + 3 );
  301. $comments[] = [ $wsEnd + 1, $c ];
  302. $wsEnd = $c;
  303. }
  304. // Eat the line if possible
  305. // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
  306. // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
  307. // it's a possible beneficial b/c break.
  308. if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
  309. && substr( $text, $wsEnd + 1, 1 ) == "\n"
  310. ) {
  311. // Remove leading whitespace from the end of the accumulator
  312. $wsLength = $i - $wsStart;
  313. $endIndex = count( $accum ) - 1;
  314. // Sanity check
  315. if ( $wsLength > 0
  316. && $endIndex >= 0
  317. && is_string( $accum[$endIndex] )
  318. && strspn( $accum[$endIndex], " \t", -$wsLength ) === $wsLength
  319. ) {
  320. $accum[$endIndex] = substr( $accum[$endIndex], 0, -$wsLength );
  321. }
  322. // Dump all but the last comment to the accumulator
  323. foreach ( $comments as $j => $com ) {
  324. $startPos = $com[0];
  325. $endPos = $com[1] + 1;
  326. if ( $j == ( count( $comments ) - 1 ) ) {
  327. break;
  328. }
  329. $inner = substr( $text, $startPos, $endPos - $startPos );
  330. $accum[] = [ 'comment', [ $inner ] ];
  331. }
  332. // Do a line-start run next time to look for headings after the comment
  333. $fakeLineStart = true;
  334. } else {
  335. // No line to eat, just take the comment itself
  336. $startPos = $i;
  337. $endPos += 2;
  338. }
  339. if ( $stack->top ) {
  340. $part = $stack->top->getCurrentPart();
  341. if ( !( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) ) {
  342. $part->visualEnd = $wsStart;
  343. }
  344. // Else comments abutting, no change in visual end
  345. $part->commentEnd = $endPos;
  346. }
  347. $i = $endPos + 1;
  348. $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
  349. $accum[] = [ 'comment', [ $inner ] ];
  350. }
  351. continue;
  352. }
  353. $name = $matches[1];
  354. $lowerName = strtolower( $name );
  355. $attrStart = $i + strlen( $name ) + 1;
  356. // Find end of tag
  357. $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
  358. if ( $tagEndPos === false ) {
  359. // Infinite backtrack
  360. // Disable tag search to prevent worst-case O(N^2) performance
  361. $noMoreGT = true;
  362. self::addLiteral( $accum, '<' );
  363. ++$i;
  364. continue;
  365. }
  366. // Handle ignored tags
  367. if ( in_array( $lowerName, $ignoredTags ) ) {
  368. $accum[] = [ 'ignore', [ substr( $text, $i, $tagEndPos - $i + 1 ) ] ];
  369. $i = $tagEndPos + 1;
  370. continue;
  371. }
  372. $tagStartPos = $i;
  373. if ( $text[$tagEndPos - 1] == '/' ) {
  374. // Short end tag
  375. $attrEnd = $tagEndPos - 1;
  376. $inner = null;
  377. $i = $tagEndPos + 1;
  378. $close = null;
  379. } else {
  380. $attrEnd = $tagEndPos;
  381. // Find closing tag
  382. if (
  383. !isset( $noMoreClosingTag[$name] ) &&
  384. preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
  385. $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 )
  386. ) {
  387. $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
  388. $i = $matches[0][1] + strlen( $matches[0][0] );
  389. $close = $matches[0][0];
  390. } else {
  391. // No end tag
  392. if ( in_array( $name, $xmlishAllowMissingEndTag ) ) {
  393. // Let it run out to the end of the text.
  394. $inner = substr( $text, $tagEndPos + 1 );
  395. $i = $lengthText;
  396. $close = null;
  397. } else {
  398. // Don't match the tag, treat opening tag as literal and resume parsing.
  399. $i = $tagEndPos + 1;
  400. self::addLiteral( $accum,
  401. substr( $text, $tagStartPos, $tagEndPos + 1 - $tagStartPos ) );
  402. // Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
  403. $noMoreClosingTag[$name] = true;
  404. continue;
  405. }
  406. }
  407. }
  408. // <includeonly> and <noinclude> just become <ignore> tags
  409. if ( in_array( $lowerName, $ignoredElements ) ) {
  410. $accum[] = [ 'ignore', [ substr( $text, $tagStartPos, $i - $tagStartPos ) ] ];
  411. continue;
  412. }
  413. if ( $attrEnd <= $attrStart ) {
  414. $attr = '';
  415. } else {
  416. // Note that the attr element contains the whitespace between name and attribute,
  417. // this is necessary for precise reconstruction during pre-save transform.
  418. $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
  419. }
  420. $children = [
  421. [ 'name', [ $name ] ],
  422. [ 'attr', [ $attr ] ] ];
  423. if ( $inner !== null ) {
  424. $children[] = [ 'inner', [ $inner ] ];
  425. }
  426. if ( $close !== null ) {
  427. $children[] = [ 'close', [ $close ] ];
  428. }
  429. $accum[] = [ 'ext', $children ];
  430. } elseif ( $found == 'line-start' ) {
  431. // Is this the start of a heading?
  432. // Line break belongs before the heading element in any case
  433. if ( $fakeLineStart ) {
  434. $fakeLineStart = false;
  435. } else {
  436. self::addLiteral( $accum, $curChar );
  437. $i++;
  438. }
  439. $count = strspn( $text, '=', $i, 6 );
  440. if ( $count == 1 && $findEquals ) {
  441. // DWIM: This looks kind of like a name/value separator.
  442. // Let's let the equals handler have it and break the potential
  443. // heading. This is heuristic, but AFAICT the methods for
  444. // completely correct disambiguation are very complex.
  445. } elseif ( $count > 0 ) {
  446. $piece = [
  447. 'open' => "\n",
  448. 'close' => "\n",
  449. 'parts' => [ new PPDPart_Hash( str_repeat( '=', $count ) ) ],
  450. 'startPos' => $i,
  451. 'count' => $count ];
  452. $stack->push( $piece );
  453. $accum =& $stack->getAccum();
  454. $stackFlags = $stack->getFlags();
  455. if ( isset( $stackFlags['findEquals'] ) ) {
  456. $findEquals = $stackFlags['findEquals'];
  457. }
  458. if ( isset( $stackFlags['findPipe'] ) ) {
  459. $findPipe = $stackFlags['findPipe'];
  460. }
  461. if ( isset( $stackFlags['inHeading'] ) ) {
  462. $inHeading = $stackFlags['inHeading'];
  463. }
  464. $i += $count;
  465. }
  466. } elseif ( $found == 'line-end' ) {
  467. $piece = $stack->top;
  468. // A heading must be open, otherwise \n wouldn't have been in the search list
  469. assert( $piece->open === "\n" );
  470. $part = $piece->getCurrentPart();
  471. // Search back through the input to see if it has a proper close.
  472. // Do this using the reversed string since the other solutions
  473. // (end anchor, etc.) are inefficient.
  474. $wsLength = strspn( $revText, " \t", $lengthText - $i );
  475. $searchStart = $i - $wsLength;
  476. if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
  477. // Comment found at line end
  478. // Search for equals signs before the comment
  479. $searchStart = $part->visualEnd;
  480. $searchStart -= strspn( $revText, " \t", $lengthText - $searchStart );
  481. }
  482. $count = $piece->count;
  483. $equalsLength = strspn( $revText, '=', $lengthText - $searchStart );
  484. if ( $equalsLength > 0 ) {
  485. if ( $searchStart - $equalsLength == $piece->startPos ) {
  486. // This is just a single string of equals signs on its own line
  487. // Replicate the doHeadings behavior /={count}(.+)={count}/
  488. // First find out how many equals signs there really are (don't stop at 6)
  489. $count = $equalsLength;
  490. if ( $count < 3 ) {
  491. $count = 0;
  492. } else {
  493. $count = min( 6, intval( ( $count - 1 ) / 2 ) );
  494. }
  495. } else {
  496. $count = min( $equalsLength, $count );
  497. }
  498. if ( $count > 0 ) {
  499. // Normal match, output <h>
  500. $element = [ [ 'possible-h',
  501. array_merge(
  502. [
  503. [ '@level', [ $count ] ],
  504. [ '@i', [ $headingIndex++ ] ]
  505. ],
  506. $accum
  507. )
  508. ] ];
  509. } else {
  510. // Single equals sign on its own line, count=0
  511. $element = $accum;
  512. }
  513. } else {
  514. // No match, no <h>, just pass down the inner text
  515. $element = $accum;
  516. }
  517. // Unwind the stack
  518. $stack->pop();
  519. $accum =& $stack->getAccum();
  520. $stackFlags = $stack->getFlags();
  521. if ( isset( $stackFlags['findEquals'] ) ) {
  522. $findEquals = $stackFlags['findEquals'];
  523. }
  524. if ( isset( $stackFlags['findPipe'] ) ) {
  525. $findPipe = $stackFlags['findPipe'];
  526. }
  527. if ( isset( $stackFlags['inHeading'] ) ) {
  528. $inHeading = $stackFlags['inHeading'];
  529. }
  530. // Append the result to the enclosing accumulator
  531. array_splice( $accum, count( $accum ), 0, $element );
  532. // Note that we do NOT increment the input pointer.
  533. // This is because the closing linebreak could be the opening linebreak of
  534. // another heading. Infinite loops are avoided because the next iteration MUST
  535. // hit the heading open case above, which unconditionally increments the
  536. // input pointer.
  537. } elseif ( $found == 'open' ) {
  538. # count opening brace characters
  539. $curLen = strlen( $curChar );
  540. $count = ( $curLen > 1 ) ?
  541. # allow the final character to repeat
  542. strspn( $text, $curChar[$curLen - 1], $i + 1 ) + 1 :
  543. strspn( $text, $curChar, $i );
  544. $savedPrefix = '';
  545. $lineStart = ( $i > 0 && $text[$i - 1] == "\n" );
  546. if ( $curChar === "-{" && $count > $curLen ) {
  547. // -{ => {{ transition because rightmost wins
  548. $savedPrefix = '-';
  549. $i++;
  550. $curChar = '{';
  551. $count--;
  552. $rule = $this->rules[$curChar];
  553. }
  554. # we need to add to stack only if opening brace count is enough for one of the rules
  555. if ( $count >= $rule['min'] ) {
  556. # Add it to the stack
  557. $piece = [
  558. 'open' => $curChar,
  559. 'close' => $rule['end'],
  560. 'savedPrefix' => $savedPrefix,
  561. 'count' => $count,
  562. 'lineStart' => $lineStart,
  563. ];
  564. $stack->push( $piece );
  565. $accum =& $stack->getAccum();
  566. $stackFlags = $stack->getFlags();
  567. if ( isset( $stackFlags['findEquals'] ) ) {
  568. $findEquals = $stackFlags['findEquals'];
  569. }
  570. if ( isset( $stackFlags['findPipe'] ) ) {
  571. $findPipe = $stackFlags['findPipe'];
  572. }
  573. if ( isset( $stackFlags['inHeading'] ) ) {
  574. $inHeading = $stackFlags['inHeading'];
  575. }
  576. } else {
  577. # Add literal brace(s)
  578. self::addLiteral( $accum, $savedPrefix . str_repeat( $curChar, $count ) );
  579. }
  580. $i += $count;
  581. } elseif ( $found == 'close' ) {
  582. $piece = $stack->top;
  583. # lets check if there are enough characters for closing brace
  584. $maxCount = $piece->count;
  585. if ( $piece->close === '}-' && $curChar === '}' ) {
  586. $maxCount--; # don't try to match closing '-' as a '}'
  587. }
  588. $curLen = strlen( $curChar );
  589. $count = ( $curLen > 1 ) ? $curLen :
  590. strspn( $text, $curChar, $i, $maxCount );
  591. # check for maximum matching characters (if there are 5 closing
  592. # characters, we will probably need only 3 - depending on the rules)
  593. $rule = $this->rules[$piece->open];
  594. if ( $count > $rule['max'] ) {
  595. # The specified maximum exists in the callback array, unless the caller
  596. # has made an error
  597. $matchingCount = $rule['max'];
  598. } else {
  599. # Count is less than the maximum
  600. # Skip any gaps in the callback array to find the true largest match
  601. # Need to use array_key_exists not isset because the callback can be null
  602. $matchingCount = $count;
  603. while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
  604. --$matchingCount;
  605. }
  606. }
  607. if ( $matchingCount <= 0 ) {
  608. # No matching element found in callback array
  609. # Output a literal closing brace and continue
  610. $endText = substr( $text, $i, $count );
  611. self::addLiteral( $accum, $endText );
  612. $i += $count;
  613. continue;
  614. }
  615. $name = $rule['names'][$matchingCount];
  616. if ( $name === null ) {
  617. // No element, just literal text
  618. $endText = substr( $text, $i, $matchingCount );
  619. $element = $piece->breakSyntax( $matchingCount );
  620. self::addLiteral( $element, $endText );
  621. } else {
  622. # Create XML element
  623. $parts = $piece->parts;
  624. $titleAccum = $parts[0]->out;
  625. unset( $parts[0] );
  626. $children = [];
  627. # The invocation is at the start of the line if lineStart is set in
  628. # the stack, and all opening brackets are used up.
  629. if ( $maxCount == $matchingCount &&
  630. !empty( $piece->lineStart ) &&
  631. strlen( $piece->savedPrefix ) == 0 ) {
  632. $children[] = [ '@lineStart', [ 1 ] ];
  633. }
  634. $titleNode = [ 'title', $titleAccum ];
  635. $children[] = $titleNode;
  636. $argIndex = 1;
  637. foreach ( $parts as $part ) {
  638. if ( isset( $part->eqpos ) ) {
  639. $equalsNode = $part->out[$part->eqpos];
  640. $nameNode = [ 'name', array_slice( $part->out, 0, $part->eqpos ) ];
  641. $valueNode = [ 'value', array_slice( $part->out, $part->eqpos + 1 ) ];
  642. $partNode = [ 'part', [ $nameNode, $equalsNode, $valueNode ] ];
  643. $children[] = $partNode;
  644. } else {
  645. $nameNode = [ 'name', [ [ '@index', [ $argIndex++ ] ] ] ];
  646. $valueNode = [ 'value', $part->out ];
  647. $partNode = [ 'part', [ $nameNode, $valueNode ] ];
  648. $children[] = $partNode;
  649. }
  650. }
  651. $element = [ [ $name, $children ] ];
  652. }
  653. # Advance input pointer
  654. $i += $matchingCount;
  655. # Unwind the stack
  656. $stack->pop();
  657. $accum =& $stack->getAccum();
  658. # Re-add the old stack element if it still has unmatched opening characters remaining
  659. if ( $matchingCount < $piece->count ) {
  660. $piece->parts = [ new PPDPart_Hash ];
  661. $piece->count -= $matchingCount;
  662. # do we still qualify for any callback with remaining count?
  663. $min = $this->rules[$piece->open]['min'];
  664. if ( $piece->count >= $min ) {
  665. $stack->push( $piece );
  666. $accum =& $stack->getAccum();
  667. } elseif ( $piece->count == 1 && $piece->open === '{' && $piece->savedPrefix === '-' ) {
  668. $piece->savedPrefix = '';
  669. $piece->open = '-{';
  670. $piece->count = 2;
  671. $piece->close = $this->rules[$piece->open]['end'];
  672. $stack->push( $piece );
  673. $accum =& $stack->getAccum();
  674. } else {
  675. $s = substr( $piece->open, 0, -1 );
  676. $s .= str_repeat(
  677. substr( $piece->open, -1 ),
  678. $piece->count - strlen( $s )
  679. );
  680. self::addLiteral( $accum, $piece->savedPrefix . $s );
  681. }
  682. } elseif ( $piece->savedPrefix !== '' ) {
  683. self::addLiteral( $accum, $piece->savedPrefix );
  684. }
  685. $stackFlags = $stack->getFlags();
  686. if ( isset( $stackFlags['findEquals'] ) ) {
  687. $findEquals = $stackFlags['findEquals'];
  688. }
  689. if ( isset( $stackFlags['findPipe'] ) ) {
  690. $findPipe = $stackFlags['findPipe'];
  691. }
  692. if ( isset( $stackFlags['inHeading'] ) ) {
  693. $inHeading = $stackFlags['inHeading'];
  694. }
  695. # Add XML element to the enclosing accumulator
  696. array_splice( $accum, count( $accum ), 0, $element );
  697. } elseif ( $found == 'pipe' ) {
  698. $findEquals = true; // shortcut for getFlags()
  699. $stack->addPart();
  700. $accum =& $stack->getAccum();
  701. ++$i;
  702. } elseif ( $found == 'equals' ) {
  703. $findEquals = false; // shortcut for getFlags()
  704. $accum[] = [ 'equals', [ '=' ] ];
  705. $stack->getCurrentPart()->eqpos = count( $accum ) - 1;
  706. ++$i;
  707. }
  708. }
  709. # Output any remaining unclosed brackets
  710. foreach ( $stack->stack as $piece ) {
  711. array_splice( $stack->rootAccum, count( $stack->rootAccum ), 0, $piece->breakSyntax() );
  712. }
  713. # Enable top-level headings
  714. foreach ( $stack->rootAccum as &$node ) {
  715. if ( is_array( $node ) && $node[PPNode_Hash_Tree::NAME] === 'possible-h' ) {
  716. $node[PPNode_Hash_Tree::NAME] = 'h';
  717. }
  718. }
  719. $rootStore = [ [ 'root', $stack->rootAccum ] ];
  720. $rootNode = new PPNode_Hash_Tree( $rootStore, 0 );
  721. // Cache
  722. $tree = json_encode( $rootStore, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
  723. if ( $tree !== false ) {
  724. $this->cacheSetTree( $text, $flags, $tree );
  725. }
  726. return $rootNode;
  727. }
  728. private static function addLiteral( array &$accum, $text ) {
  729. $n = count( $accum );
  730. if ( $n && is_string( $accum[$n - 1] ) ) {
  731. $accum[$n - 1] .= $text;
  732. } else {
  733. $accum[] = $text;
  734. }
  735. }
  736. }
  737. /**
  738. * Stack class to help Preprocessor::preprocessToObj()
  739. * @ingroup Parser
  740. */
  741. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  742. class PPDStack_Hash extends PPDStack {
  743. public function __construct() {
  744. $this->elementClass = PPDStackElement_Hash::class;
  745. parent::__construct();
  746. $this->rootAccum = [];
  747. }
  748. }
  749. /**
  750. * @ingroup Parser
  751. */
  752. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  753. class PPDStackElement_Hash extends PPDStackElement {
  754. public function __construct( $data = [] ) {
  755. $this->partClass = PPDPart_Hash::class;
  756. parent::__construct( $data );
  757. }
  758. /**
  759. * Get the accumulator that would result if the close is not found.
  760. *
  761. * @param int|bool $openingCount
  762. * @return array
  763. */
  764. public function breakSyntax( $openingCount = false ) {
  765. if ( $this->open == "\n" ) {
  766. $accum = array_merge( [ $this->savedPrefix ], $this->parts[0]->out );
  767. } else {
  768. if ( $openingCount === false ) {
  769. $openingCount = $this->count;
  770. }
  771. $s = substr( $this->open, 0, -1 );
  772. $s .= str_repeat(
  773. substr( $this->open, -1 ),
  774. $openingCount - strlen( $s )
  775. );
  776. $accum = [ $this->savedPrefix . $s ];
  777. $lastIndex = 0;
  778. $first = true;
  779. foreach ( $this->parts as $part ) {
  780. if ( $first ) {
  781. $first = false;
  782. } elseif ( is_string( $accum[$lastIndex] ) ) {
  783. $accum[$lastIndex] .= '|';
  784. } else {
  785. $accum[++$lastIndex] = '|';
  786. }
  787. foreach ( $part->out as $node ) {
  788. if ( is_string( $node ) && is_string( $accum[$lastIndex] ) ) {
  789. $accum[$lastIndex] .= $node;
  790. } else {
  791. $accum[++$lastIndex] = $node;
  792. }
  793. }
  794. }
  795. }
  796. return $accum;
  797. }
  798. }
  799. /**
  800. * @ingroup Parser
  801. */
  802. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  803. class PPDPart_Hash extends PPDPart {
  804. public function __construct( $out = '' ) {
  805. if ( $out !== '' ) {
  806. $accum = [ $out ];
  807. } else {
  808. $accum = [];
  809. }
  810. parent::__construct( $accum );
  811. }
  812. }
  813. /**
  814. * An expansion frame, used as a context to expand the result of preprocessToObj()
  815. * @ingroup Parser
  816. */
  817. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  818. class PPFrame_Hash implements PPFrame {
  819. /**
  820. * @var Parser
  821. */
  822. public $parser;
  823. /**
  824. * @var Preprocessor
  825. */
  826. public $preprocessor;
  827. /**
  828. * @var Title
  829. */
  830. public $title;
  831. public $titleCache;
  832. /**
  833. * Hashtable listing templates which are disallowed for expansion in this frame,
  834. * having been encountered previously in parent frames.
  835. */
  836. public $loopCheckHash;
  837. /**
  838. * Recursion depth of this frame, top = 0
  839. * Note that this is NOT the same as expansion depth in expand()
  840. */
  841. public $depth;
  842. private $volatile = false;
  843. private $ttl = null;
  844. /**
  845. * @var array
  846. */
  847. protected $childExpansionCache;
  848. /**
  849. * Construct a new preprocessor frame.
  850. * @param Preprocessor $preprocessor The parent preprocessor
  851. */
  852. public function __construct( $preprocessor ) {
  853. $this->preprocessor = $preprocessor;
  854. $this->parser = $preprocessor->parser;
  855. $this->title = $this->parser->mTitle;
  856. $this->titleCache = [ $this->title ? $this->title->getPrefixedDBkey() : false ];
  857. $this->loopCheckHash = [];
  858. $this->depth = 0;
  859. $this->childExpansionCache = [];
  860. }
  861. /**
  862. * Create a new child frame
  863. * $args is optionally a multi-root PPNode or array containing the template arguments
  864. *
  865. * @param array|bool|PPNode_Hash_Array $args
  866. * @param Title|bool $title
  867. * @param int $indexOffset
  868. * @throws MWException
  869. * @return PPTemplateFrame_Hash
  870. */
  871. public function newChild( $args = false, $title = false, $indexOffset = 0 ) {
  872. $namedArgs = [];
  873. $numberedArgs = [];
  874. if ( $title === false ) {
  875. $title = $this->title;
  876. }
  877. if ( $args !== false ) {
  878. if ( $args instanceof PPNode_Hash_Array ) {
  879. $args = $args->value;
  880. } elseif ( !is_array( $args ) ) {
  881. throw new MWException( __METHOD__ . ': $args must be array or PPNode_Hash_Array' );
  882. }
  883. foreach ( $args as $arg ) {
  884. $bits = $arg->splitArg();
  885. if ( $bits['index'] !== '' ) {
  886. // Numbered parameter
  887. $index = $bits['index'] - $indexOffset;
  888. if ( isset( $namedArgs[$index] ) || isset( $numberedArgs[$index] ) ) {
  889. $this->parser->getOutput()->addWarning( wfMessage( 'duplicate-args-warning',
  890. wfEscapeWikiText( $this->title ),
  891. wfEscapeWikiText( $title ),
  892. wfEscapeWikiText( $index ) )->text() );
  893. $this->parser->addTrackingCategory( 'duplicate-args-category' );
  894. }
  895. $numberedArgs[$index] = $bits['value'];
  896. unset( $namedArgs[$index] );
  897. } else {
  898. // Named parameter
  899. $name = trim( $this->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
  900. if ( isset( $namedArgs[$name] ) || isset( $numberedArgs[$name] ) ) {
  901. $this->parser->getOutput()->addWarning( wfMessage( 'duplicate-args-warning',
  902. wfEscapeWikiText( $this->title ),
  903. wfEscapeWikiText( $title ),
  904. wfEscapeWikiText( $name ) )->text() );
  905. $this->parser->addTrackingCategory( 'duplicate-args-category' );
  906. }
  907. $namedArgs[$name] = $bits['value'];
  908. unset( $numberedArgs[$name] );
  909. }
  910. }
  911. }
  912. return new PPTemplateFrame_Hash( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
  913. }
  914. /**
  915. * @throws MWException
  916. * @param string|int $key
  917. * @param string|PPNode $root
  918. * @param int $flags
  919. * @return string
  920. */
  921. public function cachedExpand( $key, $root, $flags = 0 ) {
  922. // we don't have a parent, so we don't have a cache
  923. return $this->expand( $root, $flags );
  924. }
  925. /**
  926. * @throws MWException
  927. * @param string|PPNode $root
  928. * @param int $flags
  929. * @return string
  930. */
  931. public function expand( $root, $flags = 0 ) {
  932. static $expansionDepth = 0;
  933. if ( is_string( $root ) ) {
  934. return $root;
  935. }
  936. if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) {
  937. $this->parser->limitationWarn( 'node-count-exceeded',
  938. $this->parser->mPPNodeCount,
  939. $this->parser->mOptions->getMaxPPNodeCount()
  940. );
  941. return '<span class="error">Node-count limit exceeded</span>';
  942. }
  943. if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) {
  944. $this->parser->limitationWarn( 'expansion-depth-exceeded',
  945. $expansionDepth,
  946. $this->parser->mOptions->getMaxPPExpandDepth()
  947. );
  948. return '<span class="error">Expansion depth limit exceeded</span>';
  949. }
  950. ++$expansionDepth;
  951. if ( $expansionDepth > $this->parser->mHighestExpansionDepth ) {
  952. $this->parser->mHighestExpansionDepth = $expansionDepth;
  953. }
  954. $outStack = [ '', '' ];
  955. $iteratorStack = [ false, $root ];
  956. $indexStack = [ 0, 0 ];
  957. while ( count( $iteratorStack ) > 1 ) {
  958. $level = count( $outStack ) - 1;
  959. $iteratorNode =& $iteratorStack[$level];
  960. $out =& $outStack[$level];
  961. $index =& $indexStack[$level];
  962. if ( is_array( $iteratorNode ) ) {
  963. if ( $index >= count( $iteratorNode ) ) {
  964. // All done with this iterator
  965. $iteratorStack[$level] = false;
  966. $contextNode = false;
  967. } else {
  968. $contextNode = $iteratorNode[$index];
  969. $index++;
  970. }
  971. } elseif ( $iteratorNode instanceof PPNode_Hash_Array ) {
  972. if ( $index >= $iteratorNode->getLength() ) {
  973. // All done with this iterator
  974. $iteratorStack[$level] = false;
  975. $contextNode = false;
  976. } else {
  977. $contextNode = $iteratorNode->item( $index );
  978. $index++;
  979. }
  980. } else {
  981. // Copy to $contextNode and then delete from iterator stack,
  982. // because this is not an iterator but we do have to execute it once
  983. $contextNode = $iteratorStack[$level];
  984. $iteratorStack[$level] = false;
  985. }
  986. $newIterator = false;
  987. $contextName = false;
  988. $contextChildren = false;
  989. if ( $contextNode === false ) {
  990. // nothing to do
  991. } elseif ( is_string( $contextNode ) ) {
  992. $out .= $contextNode;
  993. } elseif ( $contextNode instanceof PPNode_Hash_Array ) {
  994. $newIterator = $contextNode;
  995. } elseif ( $contextNode instanceof PPNode_Hash_Attr ) {
  996. // No output
  997. } elseif ( $contextNode instanceof PPNode_Hash_Text ) {
  998. $out .= $contextNode->value;
  999. } elseif ( $contextNode instanceof PPNode_Hash_Tree ) {
  1000. $contextName = $contextNode->name;
  1001. $contextChildren = $contextNode->getRawChildren();
  1002. } elseif ( is_array( $contextNode ) ) {
  1003. // Node descriptor array
  1004. if ( count( $contextNode ) !== 2 ) {
  1005. throw new MWException( __METHOD__.
  1006. ': found an array where a node descriptor should be' );
  1007. }
  1008. list( $contextName, $contextChildren ) = $contextNode;
  1009. } else {
  1010. throw new MWException( __METHOD__ . ': Invalid parameter type' );
  1011. }
  1012. // Handle node descriptor array or tree object
  1013. if ( $contextName === false ) {
  1014. // Not a node, already handled above
  1015. } elseif ( $contextName[0] === '@' ) {
  1016. // Attribute: no output
  1017. } elseif ( $contextName === 'template' ) {
  1018. # Double-brace expansion
  1019. $bits = PPNode_Hash_Tree::splitRawTemplate( $contextChildren );
  1020. if ( $flags & PPFrame::NO_TEMPLATES ) {
  1021. $newIterator = $this->virtualBracketedImplode(
  1022. '{{', '|', '}}',
  1023. $bits['title'],
  1024. $bits['parts']
  1025. );
  1026. } else {
  1027. $ret = $this->parser->braceSubstitution( $bits, $this );
  1028. if ( isset( $ret['object'] ) ) {
  1029. $newIterator = $ret['object'];
  1030. } else {
  1031. $out .= $ret['text'];
  1032. }
  1033. }
  1034. } elseif ( $contextName === 'tplarg' ) {
  1035. # Triple-brace expansion
  1036. $bits = PPNode_Hash_Tree::splitRawTemplate( $contextChildren );
  1037. if ( $flags & PPFrame::NO_ARGS ) {
  1038. $newIterator = $this->virtualBracketedImplode(
  1039. '{{{', '|', '}}}',
  1040. $bits['title'],
  1041. $bits['parts']
  1042. );
  1043. } else {
  1044. $ret = $this->parser->argSubstitution( $bits, $this );
  1045. if ( isset( $ret['object'] ) ) {
  1046. $newIterator = $ret['object'];
  1047. } else {
  1048. $out .= $ret['text'];
  1049. }
  1050. }
  1051. } elseif ( $contextName === 'comment' ) {
  1052. # HTML-style comment
  1053. # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
  1054. # Not in RECOVER_COMMENTS mode (msgnw) though.
  1055. if ( ( $this->parser->ot['html']
  1056. || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
  1057. || ( $flags & PPFrame::STRIP_COMMENTS )
  1058. ) && !( $flags & PPFrame::RECOVER_COMMENTS )
  1059. ) {
  1060. $out .= '';
  1061. } elseif ( $this->parser->ot['wiki'] && !( $flags & PPFrame::RECOVER_COMMENTS ) ) {
  1062. # Add a strip marker in PST mode so that pstPass2() can
  1063. # run some old-fashioned regexes on the result.
  1064. # Not in RECOVER_COMMENTS mode (extractSections) though.
  1065. $out .= $this->parser->insertStripItem( $contextChildren[0] );
  1066. } else {
  1067. # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
  1068. $out .= $contextChildren[0];
  1069. }
  1070. } elseif ( $contextName === 'ignore' ) {
  1071. # Output suppression used by <includeonly> etc.
  1072. # OT_WIKI will only respect <ignore> in substed templates.
  1073. # The other output types respect it unless NO_IGNORE is set.
  1074. # extractSections() sets NO_IGNORE and so never respects it.
  1075. if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] )
  1076. || ( $flags & PPFrame::NO_IGNORE )
  1077. ) {
  1078. $out .= $contextChildren[0];
  1079. } else {
  1080. // $out .= '';
  1081. }
  1082. } elseif ( $contextName === 'ext' ) {
  1083. # Extension tag
  1084. $bits = PPNode_Hash_Tree::splitRawExt( $contextChildren ) +
  1085. [ 'attr' => null, 'inner' => null, 'close' => null ];
  1086. if ( $flags & PPFrame::NO_TAGS ) {
  1087. $s = '<' . $bits['name']->getFirstChild()->value;
  1088. if ( $bits['attr'] ) {
  1089. $s .= $bits['attr']->getFirstChild()->value;
  1090. }
  1091. if ( $bits['inner'] ) {
  1092. $s .= '>' . $bits['inner']->getFirstChild()->value;
  1093. if ( $bits['close'] ) {
  1094. $s .= $bits['close']->getFirstChild()->value;
  1095. }
  1096. } else {
  1097. $s .= '/>';
  1098. }
  1099. $out .= $s;
  1100. } else {
  1101. $out .= $this->parser->extensionSubstitution( $bits, $this );
  1102. }
  1103. } elseif ( $contextName === 'h' ) {
  1104. # Heading
  1105. if ( $this->parser->ot['html'] ) {
  1106. # Expand immediately and insert heading index marker
  1107. $s = $this->expand( $contextChildren, $flags );
  1108. $bits = PPNode_Hash_Tree::splitRawHeading( $contextChildren );
  1109. $titleText = $this->title->getPrefixedDBkey();
  1110. $this->parser->mHeadings[] = [ $titleText, $bits['i'] ];
  1111. $serial = count( $this->parser->mHeadings ) - 1;
  1112. $marker = Parser::MARKER_PREFIX . "-h-$serial-" . Parser::MARKER_SUFFIX;
  1113. $s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] );
  1114. $this->parser->mStripState->addGeneral( $marker, '' );
  1115. $out .= $s;
  1116. } else {
  1117. # Expand in virtual stack
  1118. $newIterator = $contextChildren;
  1119. }
  1120. } else {
  1121. # Generic recursive expansion
  1122. $newIterator = $contextChildren;
  1123. }
  1124. if ( $newIterator !== false ) {
  1125. $outStack[] = '';
  1126. $iteratorStack[] = $newIterator;
  1127. $indexStack[] = 0;
  1128. } elseif ( $iteratorStack[$level] === false ) {
  1129. // Return accumulated value to parent
  1130. // With tail recursion
  1131. while ( $iteratorStack[$level] === false && $level > 0 ) {
  1132. $outStack[$level - 1] .= $out;
  1133. array_pop( $outStack );
  1134. array_pop( $iteratorStack );
  1135. array_pop( $indexStack );
  1136. $level--;
  1137. }
  1138. }
  1139. }
  1140. --$expansionDepth;
  1141. return $outStack[0];
  1142. }
  1143. /**
  1144. * @param string $sep
  1145. * @param int $flags
  1146. * @param string|PPNode $args,...
  1147. * @return string
  1148. */
  1149. public function implodeWithFlags( $sep, $flags /*, ... */ ) {
  1150. $args = array_slice( func_get_args(), 2 );
  1151. $first = true;
  1152. $s = '';
  1153. foreach ( $args as $root ) {
  1154. if ( $root instanceof PPNode_Hash_Array ) {
  1155. $root = $root->value;
  1156. }
  1157. if ( !is_array( $root ) ) {
  1158. $root = [ $root ];
  1159. }
  1160. foreach ( $root as $node ) {
  1161. if ( $first ) {
  1162. $first = false;
  1163. } else {
  1164. $s .= $sep;
  1165. }
  1166. $s .= $this->expand( $node, $flags );
  1167. }
  1168. }
  1169. return $s;
  1170. }
  1171. /**
  1172. * Implode with no flags specified
  1173. * This previously called implodeWithFlags but has now been inlined to reduce stack depth
  1174. * @param string $sep
  1175. * @param string|PPNode $args,...
  1176. * @return string
  1177. */
  1178. public function implode( $sep /*, ... */ ) {
  1179. $args = array_slice( func_get_args(), 1 );
  1180. $first = true;
  1181. $s = '';
  1182. foreach ( $args as $root ) {
  1183. if ( $root instanceof PPNode_Hash_Array ) {
  1184. $root = $root->value;
  1185. }
  1186. if ( !is_array( $root ) ) {
  1187. $root = [ $root ];
  1188. }
  1189. foreach ( $root as $node ) {
  1190. if ( $first ) {
  1191. $first = false;
  1192. } else {
  1193. $s .= $sep;
  1194. }
  1195. $s .= $this->expand( $node );
  1196. }
  1197. }
  1198. return $s;
  1199. }
  1200. /**
  1201. * Makes an object that, when expand()ed, will be the same as one obtained
  1202. * with implode()
  1203. *
  1204. * @param string $sep
  1205. * @param string|PPNode $args,...
  1206. * @return PPNode_Hash_Array
  1207. */
  1208. public function virtualImplode( $sep /*, ... */ ) {
  1209. $args = array_slice( func_get_args(), 1 );
  1210. $out = [];
  1211. $first = true;
  1212. foreach ( $args as $root ) {
  1213. if ( $root instanceof PPNode_Hash_Array ) {
  1214. $root = $root->value;
  1215. }
  1216. if ( !is_array( $root ) ) {
  1217. $root = [ $root ];
  1218. }
  1219. foreach ( $root as $node ) {
  1220. if ( $first ) {
  1221. $first = false;
  1222. } else {
  1223. $out[] = $sep;
  1224. }
  1225. $out[] = $node;
  1226. }
  1227. }
  1228. return new PPNode_Hash_Array( $out );
  1229. }
  1230. /**
  1231. * Virtual implode with brackets
  1232. *
  1233. * @param string $start
  1234. * @param string $sep
  1235. * @param string $end
  1236. * @param string|PPNode $args,...
  1237. * @return PPNode_Hash_Array
  1238. */
  1239. public function virtualBracketedImplode( $start, $sep, $end /*, ... */ ) {
  1240. $args = array_slice( func_get_args(), 3 );
  1241. $out = [ $start ];
  1242. $first = true;
  1243. foreach ( $args as $root ) {
  1244. if ( $root instanceof PPNode_Hash_Array ) {
  1245. $root = $root->value;
  1246. }
  1247. if ( !is_array( $root ) ) {
  1248. $root = [ $root ];
  1249. }
  1250. foreach ( $root as $node ) {
  1251. if ( $first ) {
  1252. $first = false;
  1253. } else {
  1254. $out[] = $sep;
  1255. }
  1256. $out[] = $node;
  1257. }
  1258. }
  1259. $out[] = $end;
  1260. return new PPNode_Hash_Array( $out );
  1261. }
  1262. public function __toString() {
  1263. return 'frame{}';
  1264. }
  1265. /**
  1266. * @param bool $level
  1267. * @return array|bool|string
  1268. */
  1269. public function getPDBK( $level = false ) {
  1270. if ( $level === false ) {
  1271. return $this->title->getPrefixedDBkey();
  1272. } else {
  1273. return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
  1274. }
  1275. }
  1276. /**
  1277. * @return array
  1278. */
  1279. public function getArguments() {
  1280. return [];
  1281. }
  1282. /**
  1283. * @return array
  1284. */
  1285. public function getNumberedArguments() {
  1286. return [];
  1287. }
  1288. /**
  1289. * @return array
  1290. */
  1291. public function getNamedArguments() {
  1292. return [];
  1293. }
  1294. /**
  1295. * Returns true if there are no arguments in this frame
  1296. *
  1297. * @return bool
  1298. */
  1299. public function isEmpty() {
  1300. return true;
  1301. }
  1302. /**
  1303. * @param int|string $name
  1304. * @return bool Always false in this implementation.
  1305. */
  1306. public function getArgument( $name ) {
  1307. return false;
  1308. }
  1309. /**
  1310. * Returns true if the infinite loop check is OK, false if a loop is detected
  1311. *
  1312. * @param Title $title
  1313. *
  1314. * @return bool
  1315. */
  1316. public function loopCheck( $title ) {
  1317. return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
  1318. }
  1319. /**
  1320. * Return true if the frame is a template frame
  1321. *
  1322. * @return bool
  1323. */
  1324. public function isTemplate() {
  1325. return false;
  1326. }
  1327. /**
  1328. * Get a title of frame
  1329. *
  1330. * @return Title
  1331. */
  1332. public function getTitle() {
  1333. return $this->title;
  1334. }
  1335. /**
  1336. * Set the volatile flag
  1337. *
  1338. * @param bool $flag
  1339. */
  1340. public function setVolatile( $flag = true ) {
  1341. $this->volatile = $flag;
  1342. }
  1343. /**
  1344. * Get the volatile flag
  1345. *
  1346. * @return bool
  1347. */
  1348. public function isVolatile() {
  1349. return $this->volatile;
  1350. }
  1351. /**
  1352. * Set the TTL
  1353. *
  1354. * @param int $ttl
  1355. */
  1356. public function setTTL( $ttl ) {
  1357. if ( $ttl !== null && ( $this->ttl === null || $ttl < $this->ttl ) ) {
  1358. $this->ttl = $ttl;
  1359. }
  1360. }
  1361. /**
  1362. * Get the TTL
  1363. *
  1364. * @return int|null
  1365. */
  1366. public function getTTL() {
  1367. return $this->ttl;
  1368. }
  1369. }
  1370. /**
  1371. * Expansion frame with template arguments
  1372. * @ingroup Parser
  1373. */
  1374. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  1375. class PPTemplateFrame_Hash extends PPFrame_Hash {
  1376. public $numberedArgs, $namedArgs, $parent;
  1377. public $numberedExpansionCache, $namedExpansionCache;
  1378. /**
  1379. * @param Preprocessor $preprocessor
  1380. * @param bool|PPFrame $parent
  1381. * @param array $numberedArgs
  1382. * @param array $namedArgs
  1383. * @param bool|Title $title
  1384. */
  1385. public function __construct( $preprocessor, $parent = false, $numberedArgs = [],
  1386. $namedArgs = [], $title = false
  1387. ) {
  1388. parent::__construct( $preprocessor );
  1389. $this->parent = $parent;
  1390. $this->numberedArgs = $numberedArgs;
  1391. $this->namedArgs = $namedArgs;
  1392. $this->title = $title;
  1393. $pdbk = $title ? $title->getPrefixedDBkey() : false;
  1394. $this->titleCache = $parent->titleCache;
  1395. $this->titleCache[] = $pdbk;
  1396. $this->loopCheckHash = /*clone*/ $parent->loopCheckHash;
  1397. if ( $pdbk !== false ) {
  1398. $this->loopCheckHash[$pdbk] = true;
  1399. }
  1400. $this->depth = $parent->depth + 1;
  1401. $this->numberedExpansionCache = $this->namedExpansionCache = [];
  1402. }
  1403. public function __toString() {
  1404. $s = 'tplframe{';
  1405. $first = true;
  1406. $args = $this->numberedArgs + $this->namedArgs;
  1407. foreach ( $args as $name => $value ) {
  1408. if ( $first ) {
  1409. $first = false;
  1410. } else {
  1411. $s .= ', ';
  1412. }
  1413. $s .= "\"$name\":\"" .
  1414. str_replace( '"', '\\"', $value->__toString() ) . '"';
  1415. }
  1416. $s .= '}';
  1417. return $s;
  1418. }
  1419. /**
  1420. * @throws MWException
  1421. * @param string|int $key
  1422. * @param string|PPNode $root
  1423. * @param int $flags
  1424. * @return string
  1425. */
  1426. public function cachedExpand( $key, $root, $flags = 0 ) {
  1427. if ( isset( $this->parent->childExpansionCache[$key] ) ) {
  1428. return $this->parent->childExpansionCache[$key];
  1429. }
  1430. $retval = $this->expand( $root, $flags );
  1431. if ( !$this->isVolatile() ) {
  1432. $this->parent->childExpansionCache[$key] = $retval;
  1433. }
  1434. return $retval;
  1435. }
  1436. /**
  1437. * Returns true if there are no arguments in this frame
  1438. *
  1439. * @return bool
  1440. */
  1441. public function isEmpty() {
  1442. return !count( $this->numberedArgs ) && !count( $this->namedArgs );
  1443. }
  1444. /**
  1445. * @return array
  1446. */
  1447. public function getArguments() {
  1448. $arguments = [];
  1449. foreach ( array_merge(
  1450. array_keys( $this->numberedArgs ),
  1451. array_keys( $this->namedArgs ) ) as $key ) {
  1452. $arguments[$key] = $this->getArgument( $key );
  1453. }
  1454. return $arguments;
  1455. }
  1456. /**
  1457. * @return array
  1458. */
  1459. public function getNumberedArguments() {
  1460. $arguments = [];
  1461. foreach ( array_keys( $this->numberedArgs ) as $key ) {
  1462. $arguments[$key] = $this->getArgument( $key );
  1463. }
  1464. return $arguments;
  1465. }
  1466. /**
  1467. * @return array
  1468. */
  1469. public function getNamedArguments() {
  1470. $arguments = [];
  1471. foreach ( array_keys( $this->namedArgs ) as $key ) {
  1472. $arguments[$key] = $this->getArgument( $key );
  1473. }
  1474. return $arguments;
  1475. }
  1476. /**
  1477. * @param int $index
  1478. * @return string|bool
  1479. */
  1480. public function getNumberedArgument( $index ) {
  1481. if ( !isset( $this->numberedArgs[$index] ) ) {
  1482. return false;
  1483. }
  1484. if ( !isset( $this->numberedExpansionCache[$index] ) ) {
  1485. # No trimming for unnamed arguments
  1486. $this->numberedExpansionCache[$index] = $this->parent->expand(
  1487. $this->numberedArgs[$index],
  1488. PPFrame::STRIP_COMMENTS
  1489. );
  1490. }
  1491. return $this->numberedExpansionCache[$index];
  1492. }
  1493. /**
  1494. * @param string $name
  1495. * @return string|bool
  1496. */
  1497. public function getNamedArgument( $name ) {
  1498. if ( !isset( $this->namedArgs[$name] ) ) {
  1499. return false;
  1500. }
  1501. if ( !isset( $this->namedExpansionCache[$name] ) ) {
  1502. # Trim named arguments post-expand, for backwards compatibility
  1503. $this->namedExpansionCache[$name] = trim(
  1504. $this->parent->expand( $this->namedArgs[$name], PPFrame::STRIP_COMMENTS ) );
  1505. }
  1506. return $this->namedExpansionCache[$name];
  1507. }
  1508. /**
  1509. * @param int|string $name
  1510. * @return string|bool
  1511. */
  1512. public function getArgument( $name ) {
  1513. $text = $this->getNumberedArgument( $name );
  1514. if ( $text === false ) {
  1515. $text = $this->getNamedArgument( $name );
  1516. }
  1517. return $text;
  1518. }
  1519. /**
  1520. * Return true if the frame is a template frame
  1521. *
  1522. * @return bool
  1523. */
  1524. public function isTemplate() {
  1525. return true;
  1526. }
  1527. public function setVolatile( $flag = true ) {
  1528. parent::setVolatile( $flag );
  1529. $this->parent->setVolatile( $flag );
  1530. }
  1531. public function setTTL( $ttl ) {
  1532. parent::setTTL( $ttl );
  1533. $this->parent->setTTL( $ttl );
  1534. }
  1535. }
  1536. /**
  1537. * Expansion frame with custom arguments
  1538. * @ingroup Parser
  1539. */
  1540. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  1541. class PPCustomFrame_Hash extends PPFrame_Hash {
  1542. public $args;
  1543. public function __construct( $preprocessor, $args ) {
  1544. parent::__construct( $preprocessor );
  1545. $this->args = $args;
  1546. }
  1547. public function __toString() {
  1548. $s = 'cstmframe{';
  1549. $first = true;
  1550. foreach ( $this->args as $name => $value ) {
  1551. if ( $first ) {
  1552. $first = false;
  1553. } else {
  1554. $s .= ', ';
  1555. }
  1556. $s .= "\"$name\":\"" .
  1557. str_replace( '"', '\\"', $value->__toString() ) . '"';
  1558. }
  1559. $s .= '}';
  1560. return $s;
  1561. }
  1562. /**
  1563. * @return bool
  1564. */
  1565. public function isEmpty() {
  1566. return !count( $this->args );
  1567. }
  1568. /**
  1569. * @param int|string $index
  1570. * @return string|bool
  1571. */
  1572. public function getArgument( $index ) {
  1573. if ( !isset( $this->args[$index] ) ) {
  1574. return false;
  1575. }
  1576. return $this->args[$index];
  1577. }
  1578. public function getArguments() {
  1579. return $this->args;
  1580. }
  1581. }
  1582. /**
  1583. * @ingroup Parser
  1584. */
  1585. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  1586. class PPNode_Hash_Tree implements PPNode {
  1587. public $name;
  1588. /**
  1589. * The store array for children of this node. It is "raw" in the sense that
  1590. * nodes are two-element arrays ("descriptors") rather than PPNode_Hash_*
  1591. * objects.
  1592. */
  1593. private $rawChildren;
  1594. /**
  1595. * The store array for the siblings of this node, including this node itself.
  1596. */
  1597. private $store;
  1598. /**
  1599. * The index into $this->store which contains the descriptor of this node.
  1600. */
  1601. private $index;
  1602. /**
  1603. * The offset of the name within descriptors, used in some places for
  1604. * readability.
  1605. */
  1606. const NAME = 0;
  1607. /**
  1608. * The offset of the child list within descriptors, used in some places for
  1609. * readability.
  1610. */
  1611. const CHILDREN = 1;
  1612. /**
  1613. * Construct an object using the data from $store[$index]. The rest of the
  1614. * store array can be accessed via getNextSibling().
  1615. *
  1616. * @param array $store
  1617. * @param int $index
  1618. */
  1619. public function __construct( array $store, $index ) {
  1620. $this->store = $store;
  1621. $this->index = $index;
  1622. list( $this->name, $this->rawChildren ) = $this->store[$index];
  1623. }
  1624. /**
  1625. * Construct an appropriate PPNode_Hash_* object with a class that depends
  1626. * on what is at the relevant store index.
  1627. *
  1628. * @param array $store
  1629. * @param int $index
  1630. * @return PPNode_Hash_Tree|PPNode_Hash_Attr|PPNode_Hash_Text
  1631. */
  1632. public static function factory( array $store, $index ) {
  1633. if ( !isset( $store[$index] ) ) {
  1634. return false;
  1635. }
  1636. $descriptor = $store[$index];
  1637. if ( is_string( $descriptor ) ) {
  1638. $class = PPNode_Hash_Text::class;
  1639. } elseif ( is_array( $descriptor ) ) {
  1640. if ( $descriptor[self::NAME][0] === '@' ) {
  1641. $class = PPNode_Hash_Attr::class;
  1642. } else {
  1643. $class = self::class;
  1644. }
  1645. } else {
  1646. throw new MWException( __METHOD__.': invalid node descriptor' );
  1647. }
  1648. return new $class( $store, $index );
  1649. }
  1650. /**
  1651. * Convert a node to XML, for debugging
  1652. */
  1653. public function __toString() {
  1654. $inner = '';
  1655. $attribs = '';
  1656. for ( $node = $this->getFirstChild(); $node; $node = $node->getNextSibling() ) {
  1657. if ( $node instanceof PPNode_Hash_Attr ) {
  1658. $attribs .= ' ' . $node->name . '="' . htmlspecialchars( $node->value ) . '"';
  1659. } else {
  1660. $inner .= $node->__toString();
  1661. }
  1662. }
  1663. if ( $inner === '' ) {
  1664. return "<{$this->name}$attribs/>";
  1665. } else {
  1666. return "<{$this->name}$attribs>$inner</{$this->name}>";
  1667. }
  1668. }
  1669. /**
  1670. * @return PPNode_Hash_Array
  1671. */
  1672. public function getChildren() {
  1673. $children = [];
  1674. foreach ( $this->rawChildren as $i => $child ) {
  1675. $children[] = self::factory( $this->rawChildren, $i );
  1676. }
  1677. return new PPNode_Hash_Array( $children );
  1678. }
  1679. /**
  1680. * Get the first child, or false if there is none. Note that this will
  1681. * return a temporary proxy object: different instances will be returned
  1682. * if this is called more than once on the same node.
  1683. *
  1684. * @return PPNode_Hash_Tree|PPNode_Hash_Attr|PPNode_Hash_Text|bool
  1685. */
  1686. public function getFirstChild() {
  1687. if ( !isset( $this->rawChildren[0] ) ) {
  1688. return false;
  1689. } else {
  1690. return self::factory( $this->rawChildren, 0 );
  1691. }
  1692. }
  1693. /**
  1694. * Get the next sibling, or false if there is none. Note that this will
  1695. * return a temporary proxy object: different instances will be returned
  1696. * if this is called more than once on the same node.
  1697. *
  1698. * @return PPNode_Hash_Tree|PPNode_Hash_Attr|PPNode_Hash_Text|bool
  1699. */
  1700. public function getNextSibling() {
  1701. return self::factory( $this->store, $this->index + 1 );
  1702. }
  1703. /**
  1704. * Get an array of the children with a given node name
  1705. *
  1706. * @param string $name
  1707. * @return PPNode_Hash_Array
  1708. */
  1709. public function getChildrenOfType( $name ) {
  1710. $children = [];
  1711. foreach ( $this->rawChildren as $i => $child ) {
  1712. if ( is_array( $child ) && $child[self::NAME] === $name ) {
  1713. $children[] = self::factory( $this->rawChildren, $i );
  1714. }
  1715. }
  1716. return new PPNode_Hash_Array( $children );
  1717. }
  1718. /**
  1719. * Get the raw child array. For internal use.
  1720. * @return array
  1721. */
  1722. public function getRawChildren() {
  1723. return $this->rawChildren;
  1724. }
  1725. /**
  1726. * @return bool
  1727. */
  1728. public function getLength() {
  1729. return false;
  1730. }
  1731. /**
  1732. * @param int $i
  1733. * @return bool
  1734. */
  1735. public function item( $i ) {
  1736. return false;
  1737. }
  1738. /**
  1739. * @return string
  1740. */
  1741. public function getName() {
  1742. return $this->name;
  1743. }
  1744. /**
  1745. * Split a "<part>" node into an associative array containing:
  1746. * - name PPNode name
  1747. * - index String index
  1748. * - value PPNode value
  1749. *
  1750. * @throws MWException
  1751. * @return array
  1752. */
  1753. public function splitArg() {
  1754. return self::splitRawArg( $this->rawChildren );
  1755. }
  1756. /**
  1757. * Like splitArg() but for a raw child array. For internal use only.
  1758. * @param array $children
  1759. * @return array
  1760. */
  1761. public static function splitRawArg( array $children ) {
  1762. $bits = [];
  1763. foreach ( $children as $i => $child ) {
  1764. if ( !is_array( $child ) ) {
  1765. continue;
  1766. }
  1767. if ( $child[self::NAME] === 'name' ) {
  1768. $bits['name'] = new self( $children, $i );
  1769. if ( isset( $child[self::CHILDREN][0][self::NAME] )
  1770. && $child[self::CHILDREN][0][self::NAME] === '@index'
  1771. ) {
  1772. $bits['index'] = $child[self::CHILDREN][0][self::CHILDREN][0];
  1773. }
  1774. } elseif ( $child[self::NAME] === 'value' ) {
  1775. $bits['value'] = new self( $children, $i );
  1776. }
  1777. }
  1778. if ( !isset( $bits['name'] ) ) {
  1779. throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
  1780. }
  1781. if ( !isset( $bits['index'] ) ) {
  1782. $bits['index'] = '';
  1783. }
  1784. return $bits;
  1785. }
  1786. /**
  1787. * Split an "<ext>" node into an associative array containing name, attr, inner and close
  1788. * All values in the resulting array are PPNodes. Inner and close are optional.
  1789. *
  1790. * @throws MWException
  1791. * @return array
  1792. */
  1793. public function splitExt() {
  1794. return self::splitRawExt( $this->rawChildren );
  1795. }
  1796. /**
  1797. * Like splitExt() but for a raw child array. For internal use only.
  1798. * @param array $children
  1799. * @return array
  1800. */
  1801. public static function splitRawExt( array $children ) {
  1802. $bits = [];
  1803. foreach ( $children as $i => $child ) {
  1804. if ( !is_array( $child ) ) {
  1805. continue;
  1806. }
  1807. switch ( $child[self::NAME] ) {
  1808. case 'name':
  1809. $bits['name'] = new self( $children, $i );
  1810. break;
  1811. case 'attr':
  1812. $bits['attr'] = new self( $children, $i );
  1813. break;
  1814. case 'inner':
  1815. $bits['inner'] = new self( $children, $i );
  1816. break;
  1817. case 'close':
  1818. $bits['close'] = new self( $children, $i );
  1819. break;
  1820. }
  1821. }
  1822. if ( !isset( $bits['name'] ) ) {
  1823. throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
  1824. }
  1825. return $bits;
  1826. }
  1827. /**
  1828. * Split an "<h>" node
  1829. *
  1830. * @throws MWException
  1831. * @return array
  1832. */
  1833. public function splitHeading() {
  1834. if ( $this->name !== 'h' ) {
  1835. throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
  1836. }
  1837. return self::splitRawHeading( $this->rawChildren );
  1838. }
  1839. /**
  1840. * Like splitHeading() but for a raw child array. For internal use only.
  1841. * @param array $children
  1842. * @return array
  1843. */
  1844. public static function splitRawHeading( array $children ) {
  1845. $bits = [];
  1846. foreach ( $children as $i => $child ) {
  1847. if ( !is_array( $child ) ) {
  1848. continue;
  1849. }
  1850. if ( $child[self::NAME] === '@i' ) {
  1851. $bits['i'] = $child[self::CHILDREN][0];
  1852. } elseif ( $child[self::NAME] === '@level' ) {
  1853. $bits['level'] = $child[self::CHILDREN][0];
  1854. }
  1855. }
  1856. if ( !isset( $bits['i'] ) ) {
  1857. throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
  1858. }
  1859. return $bits;
  1860. }
  1861. /**
  1862. * Split a "<template>" or "<tplarg>" node
  1863. *
  1864. * @throws MWException
  1865. * @return array
  1866. */
  1867. public function splitTemplate() {
  1868. return self::splitRawTemplate( $this->rawChildren );
  1869. }
  1870. /**
  1871. * Like splitTemplate() but for a raw child array. For internal use only.
  1872. * @param array $children
  1873. * @return array
  1874. */
  1875. public static function splitRawTemplate( array $children ) {
  1876. $parts = [];
  1877. $bits = [ 'lineStart' => '' ];
  1878. foreach ( $children as $i => $child ) {
  1879. if ( !is_array( $child ) ) {
  1880. continue;
  1881. }
  1882. switch ( $child[self::NAME] ) {
  1883. case 'title':
  1884. $bits['title'] = new self( $children, $i );
  1885. break;
  1886. case 'part':
  1887. $parts[] = new self( $children, $i );
  1888. break;
  1889. case '@lineStart':
  1890. $bits['lineStart'] = '1';
  1891. break;
  1892. }
  1893. }
  1894. if ( !isset( $bits['title'] ) ) {
  1895. throw new MWException( 'Invalid node passed to ' . __METHOD__ );
  1896. }
  1897. $bits['parts'] = new PPNode_Hash_Array( $parts );
  1898. return $bits;
  1899. }
  1900. }
  1901. /**
  1902. * @ingroup Parser
  1903. */
  1904. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  1905. class PPNode_Hash_Text implements PPNode {
  1906. public $value;
  1907. private $store, $index;
  1908. /**
  1909. * Construct an object using the data from $store[$index]. The rest of the
  1910. * store array can be accessed via getNextSibling().
  1911. *
  1912. * @param array $store
  1913. * @param int $index
  1914. */
  1915. public function __construct( array $store, $index ) {
  1916. $this->value = $store[$index];
  1917. if ( !is_scalar( $this->value ) ) {
  1918. throw new MWException( __CLASS__ . ' given object instead of string' );
  1919. }
  1920. $this->store = $store;
  1921. $this->index = $index;
  1922. }
  1923. public function __toString() {
  1924. return htmlspecialchars( $this->value );
  1925. }
  1926. public function getNextSibling() {
  1927. return PPNode_Hash_Tree::factory( $this->store, $this->index + 1 );
  1928. }
  1929. public function getChildren() {
  1930. return false;
  1931. }
  1932. public function getFirstChild() {
  1933. return false;
  1934. }
  1935. public function getChildrenOfType( $name ) {
  1936. return false;
  1937. }
  1938. public function getLength() {
  1939. return false;
  1940. }
  1941. public function item( $i ) {
  1942. return false;
  1943. }
  1944. public function getName() {
  1945. return '#text';
  1946. }
  1947. public function splitArg() {
  1948. throw new MWException( __METHOD__ . ': not supported' );
  1949. }
  1950. public function splitExt() {
  1951. throw new MWException( __METHOD__ . ': not supported' );
  1952. }
  1953. public function splitHeading() {
  1954. throw new MWException( __METHOD__ . ': not supported' );
  1955. }
  1956. }
  1957. /**
  1958. * @ingroup Parser
  1959. */
  1960. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  1961. class PPNode_Hash_Array implements PPNode {
  1962. public $value;
  1963. public function __construct( $value ) {
  1964. $this->value = $value;
  1965. }
  1966. public function __toString() {
  1967. return var_export( $this, true );
  1968. }
  1969. public function getLength() {
  1970. return count( $this->value );
  1971. }
  1972. public function item( $i ) {
  1973. return $this->value[$i];
  1974. }
  1975. public function getName() {
  1976. return '#nodelist';
  1977. }
  1978. public function getNextSibling() {
  1979. return false;
  1980. }
  1981. public function getChildren() {
  1982. return false;
  1983. }
  1984. public function getFirstChild() {
  1985. return false;
  1986. }
  1987. public function getChildrenOfType( $name ) {
  1988. return false;
  1989. }
  1990. public function splitArg() {
  1991. throw new MWException( __METHOD__ . ': not supported' );
  1992. }
  1993. public function splitExt() {
  1994. throw new MWException( __METHOD__ . ': not supported' );
  1995. }
  1996. public function splitHeading() {
  1997. throw new MWException( __METHOD__ . ': not supported' );
  1998. }
  1999. }
  2000. /**
  2001. * @ingroup Parser
  2002. */
  2003. // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
  2004. class PPNode_Hash_Attr implements PPNode {
  2005. public $name, $value;
  2006. private $store, $index;
  2007. /**
  2008. * Construct an object using the data from $store[$index]. The rest of the
  2009. * store array can be accessed via getNextSibling().
  2010. *
  2011. * @param array $store
  2012. * @param int $index
  2013. */
  2014. public function __construct( array $store, $index ) {
  2015. $descriptor = $store[$index];
  2016. if ( $descriptor[PPNode_Hash_Tree::NAME][0] !== '@' ) {
  2017. throw new MWException( __METHOD__.': invalid name in attribute descriptor' );
  2018. }
  2019. $this->name = substr( $descriptor[PPNode_Hash_Tree::NAME], 1 );
  2020. $this->value = $descriptor[PPNode_Hash_Tree::CHILDREN][0];
  2021. $this->store = $store;
  2022. $this->index = $index;
  2023. }
  2024. public function __toString() {
  2025. return "<@{$this->name}>" . htmlspecialchars( $this->value ) . "</@{$this->name}>";
  2026. }
  2027. public function getName() {
  2028. return $this->name;
  2029. }
  2030. public function getNextSibling() {
  2031. return PPNode_Hash_Tree::factory( $this->store, $this->index + 1 );
  2032. }
  2033. public function getChildren() {
  2034. return false;
  2035. }
  2036. public function getFirstChild() {
  2037. return false;
  2038. }
  2039. public function getChildrenOfType( $name ) {
  2040. return false;
  2041. }
  2042. public function getLength() {
  2043. return false;
  2044. }
  2045. public function item( $i ) {
  2046. return false;
  2047. }
  2048. public function splitArg() {
  2049. throw new MWException( __METHOD__ . ': not supported' );
  2050. }
  2051. public function splitExt() {
  2052. throw new MWException( __METHOD__ . ': not supported' );
  2053. }
  2054. public function splitHeading() {
  2055. throw new MWException( __METHOD__ . ': not supported' );
  2056. }
  2057. }