Preprocessor.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. <?php
  2. /**
  3. * Interfaces for preprocessors
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Parser
  22. */
  23. use MediaWiki\Logger\LoggerFactory;
  24. /**
  25. * @ingroup Parser
  26. */
  27. abstract class Preprocessor {
  28. const CACHE_VERSION = 1;
  29. /**
  30. * @var array Brace matching rules.
  31. */
  32. protected $rules = [
  33. '{' => [
  34. 'end' => '}',
  35. 'names' => [
  36. 2 => 'template',
  37. 3 => 'tplarg',
  38. ],
  39. 'min' => 2,
  40. 'max' => 3,
  41. ],
  42. '[' => [
  43. 'end' => ']',
  44. 'names' => [ 2 => null ],
  45. 'min' => 2,
  46. 'max' => 2,
  47. ],
  48. '-{' => [
  49. 'end' => '}-',
  50. 'names' => [ 2 => null ],
  51. 'min' => 2,
  52. 'max' => 2,
  53. ],
  54. ];
  55. /**
  56. * Store a document tree in the cache.
  57. *
  58. * @param string $text
  59. * @param int $flags
  60. * @param string $tree
  61. */
  62. protected function cacheSetTree( $text, $flags, $tree ) {
  63. $config = RequestContext::getMain()->getConfig();
  64. $length = strlen( $text );
  65. $threshold = $config->get( 'PreprocessorCacheThreshold' );
  66. if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
  67. return;
  68. }
  69. $cache = ObjectCache::getLocalClusterInstance();
  70. $key = $cache->makeKey(
  71. defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
  72. md5( $text ), $flags );
  73. $value = sprintf( "%08d", static::CACHE_VERSION ) . $tree;
  74. $cache->set( $key, $value, 86400 );
  75. LoggerFactory::getInstance( 'Preprocessor' )
  76. ->info( "Cached preprocessor output (key: $key)" );
  77. }
  78. /**
  79. * Attempt to load a precomputed document tree for some given wikitext
  80. * from the cache.
  81. *
  82. * @param string $text
  83. * @param int $flags
  84. * @return PPNode_Hash_Tree|bool
  85. */
  86. protected function cacheGetTree( $text, $flags ) {
  87. $config = RequestContext::getMain()->getConfig();
  88. $length = strlen( $text );
  89. $threshold = $config->get( 'PreprocessorCacheThreshold' );
  90. if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
  91. return false;
  92. }
  93. $cache = ObjectCache::getLocalClusterInstance();
  94. $key = $cache->makeKey(
  95. defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
  96. md5( $text ), $flags );
  97. $value = $cache->get( $key );
  98. if ( !$value ) {
  99. return false;
  100. }
  101. $version = intval( substr( $value, 0, 8 ) );
  102. if ( $version !== static::CACHE_VERSION ) {
  103. return false;
  104. }
  105. LoggerFactory::getInstance( 'Preprocessor' )
  106. ->info( "Loaded preprocessor output from cache (key: $key)" );
  107. return substr( $value, 8 );
  108. }
  109. /**
  110. * Create a new top-level frame for expansion of a page
  111. *
  112. * @return PPFrame
  113. */
  114. abstract public function newFrame();
  115. /**
  116. * Create a new custom frame for programmatic use of parameter replacement
  117. * as used in some extensions.
  118. *
  119. * @param array $args
  120. *
  121. * @return PPFrame
  122. */
  123. abstract public function newCustomFrame( $args );
  124. /**
  125. * Create a new custom node for programmatic use of parameter replacement
  126. * as used in some extensions.
  127. *
  128. * @param array $values
  129. */
  130. abstract public function newPartNodeArray( $values );
  131. /**
  132. * Preprocess text to a PPNode
  133. *
  134. * @param string $text
  135. * @param int $flags
  136. *
  137. * @return PPNode
  138. */
  139. abstract public function preprocessToObj( $text, $flags = 0 );
  140. }
  141. /**
  142. * @ingroup Parser
  143. */
  144. interface PPFrame {
  145. const NO_ARGS = 1;
  146. const NO_TEMPLATES = 2;
  147. const STRIP_COMMENTS = 4;
  148. const NO_IGNORE = 8;
  149. const RECOVER_COMMENTS = 16;
  150. const NO_TAGS = 32;
  151. const RECOVER_ORIG = 59; // = 1|2|8|16|32 no constant expression support in PHP yet
  152. /** This constant exists when $indexOffset is supported in newChild() */
  153. const SUPPORTS_INDEX_OFFSET = 1;
  154. /**
  155. * Create a child frame
  156. *
  157. * @param array|bool $args
  158. * @param bool|Title $title
  159. * @param int $indexOffset A number subtracted from the index attributes of the arguments
  160. *
  161. * @return PPFrame
  162. */
  163. public function newChild( $args = false, $title = false, $indexOffset = 0 );
  164. /**
  165. * Expand a document tree node, caching the result on its parent with the given key
  166. * @param string|int $key
  167. * @param string|PPNode $root
  168. * @param int $flags
  169. * @return string
  170. */
  171. public function cachedExpand( $key, $root, $flags = 0 );
  172. /**
  173. * Expand a document tree node
  174. * @param string|PPNode $root
  175. * @param int $flags
  176. * @return string
  177. */
  178. public function expand( $root, $flags = 0 );
  179. /**
  180. * Implode with flags for expand()
  181. * @param string $sep
  182. * @param int $flags
  183. * @param string|PPNode $args,...
  184. * @return string
  185. */
  186. public function implodeWithFlags( $sep, $flags /*, ... */ );
  187. /**
  188. * Implode with no flags specified
  189. * @param string $sep
  190. * @param string|PPNode $args,...
  191. * @return string
  192. */
  193. public function implode( $sep /*, ... */ );
  194. /**
  195. * Makes an object that, when expand()ed, will be the same as one obtained
  196. * with implode()
  197. * @param string $sep
  198. * @param string|PPNode $args,...
  199. * @return PPNode
  200. */
  201. public function virtualImplode( $sep /*, ... */ );
  202. /**
  203. * Virtual implode with brackets
  204. * @param string $start
  205. * @param string $sep
  206. * @param string $end
  207. * @param string|PPNode $args,...
  208. * @return PPNode
  209. */
  210. public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
  211. /**
  212. * Returns true if there are no arguments in this frame
  213. *
  214. * @return bool
  215. */
  216. public function isEmpty();
  217. /**
  218. * Returns all arguments of this frame
  219. * @return array
  220. */
  221. public function getArguments();
  222. /**
  223. * Returns all numbered arguments of this frame
  224. * @return array
  225. */
  226. public function getNumberedArguments();
  227. /**
  228. * Returns all named arguments of this frame
  229. * @return array
  230. */
  231. public function getNamedArguments();
  232. /**
  233. * Get an argument to this frame by name
  234. * @param int|string $name
  235. * @return string|bool
  236. */
  237. public function getArgument( $name );
  238. /**
  239. * Returns true if the infinite loop check is OK, false if a loop is detected
  240. *
  241. * @param Title $title
  242. * @return bool
  243. */
  244. public function loopCheck( $title );
  245. /**
  246. * Return true if the frame is a template frame
  247. * @return bool
  248. */
  249. public function isTemplate();
  250. /**
  251. * Set the "volatile" flag.
  252. *
  253. * Note that this is somewhat of a "hack" in order to make extensions
  254. * with side effects (such as Cite) work with the PHP parser. New
  255. * extensions should be written in a way that they do not need this
  256. * function, because other parsers (such as Parsoid) are not guaranteed
  257. * to respect it, and it may be removed in the future.
  258. *
  259. * @param bool $flag
  260. */
  261. public function setVolatile( $flag = true );
  262. /**
  263. * Get the "volatile" flag.
  264. *
  265. * Callers should avoid caching the result of an expansion if it has the
  266. * volatile flag set.
  267. *
  268. * @see self::setVolatile()
  269. * @return bool
  270. */
  271. public function isVolatile();
  272. /**
  273. * Get the TTL of the frame's output.
  274. *
  275. * This is the maximum amount of time, in seconds, that this frame's
  276. * output should be cached for. A value of null indicates that no
  277. * maximum has been specified.
  278. *
  279. * Note that this TTL only applies to caching frames as parts of pages.
  280. * It is not relevant to caching the entire rendered output of a page.
  281. *
  282. * @return int|null
  283. */
  284. public function getTTL();
  285. /**
  286. * Set the TTL of the output of this frame and all of its ancestors.
  287. * Has no effect if the new TTL is greater than the one already set.
  288. * Note that it is the caller's responsibility to change the cache
  289. * expiry of the page as a whole, if such behavior is desired.
  290. *
  291. * @see self::getTTL()
  292. * @param int $ttl
  293. */
  294. public function setTTL( $ttl );
  295. /**
  296. * Get a title of frame
  297. *
  298. * @return Title
  299. */
  300. public function getTitle();
  301. }
  302. /**
  303. * There are three types of nodes:
  304. * * Tree nodes, which have a name and contain other nodes as children
  305. * * Array nodes, which also contain other nodes but aren't considered part of a tree
  306. * * Leaf nodes, which contain the actual data
  307. *
  308. * This interface provides access to the tree structure and to the contents of array nodes,
  309. * but it does not provide access to the internal structure of leaf nodes. Access to leaf
  310. * data is provided via two means:
  311. * * PPFrame::expand(), which provides expanded text
  312. * * The PPNode::split*() functions, which provide metadata about certain types of tree node
  313. * @ingroup Parser
  314. */
  315. interface PPNode {
  316. /**
  317. * Get an array-type node containing the children of this node.
  318. * Returns false if this is not a tree node.
  319. * @return PPNode
  320. */
  321. public function getChildren();
  322. /**
  323. * Get the first child of a tree node. False if there isn't one.
  324. *
  325. * @return PPNode
  326. */
  327. public function getFirstChild();
  328. /**
  329. * Get the next sibling of any node. False if there isn't one
  330. * @return PPNode
  331. */
  332. public function getNextSibling();
  333. /**
  334. * Get all children of this tree node which have a given name.
  335. * Returns an array-type node, or false if this is not a tree node.
  336. * @param string $type
  337. * @return bool|PPNode
  338. */
  339. public function getChildrenOfType( $type );
  340. /**
  341. * Returns the length of the array, or false if this is not an array-type node
  342. */
  343. public function getLength();
  344. /**
  345. * Returns an item of an array-type node
  346. * @param int $i
  347. * @return bool|PPNode
  348. */
  349. public function item( $i );
  350. /**
  351. * Get the name of this node. The following names are defined here:
  352. *
  353. * h A heading node.
  354. * template A double-brace node.
  355. * tplarg A triple-brace node.
  356. * title The first argument to a template or tplarg node.
  357. * part Subsequent arguments to a template or tplarg node.
  358. * #nodelist An array-type node
  359. *
  360. * The subclass may define various other names for tree and leaf nodes.
  361. * @return string
  362. */
  363. public function getName();
  364. /**
  365. * Split a "<part>" node into an associative array containing:
  366. * name PPNode name
  367. * index String index
  368. * value PPNode value
  369. * @return array
  370. */
  371. public function splitArg();
  372. /**
  373. * Split an "<ext>" node into an associative array containing name, attr, inner and close
  374. * All values in the resulting array are PPNodes. Inner and close are optional.
  375. * @return array
  376. */
  377. public function splitExt();
  378. /**
  379. * Split an "<h>" node
  380. * @return array
  381. */
  382. public function splitHeading();
  383. }