MagicWord.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. <?php
  2. /**
  3. * File for magic words
  4. * See docs/magicword.txt
  5. *
  6. * @file
  7. * @ingroup Parser
  8. */
  9. /**
  10. * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
  11. * Usage:
  12. * if (MagicWord::get( 'redirect' )->match( $text ) )
  13. *
  14. * Possible future improvements:
  15. * * Simultaneous searching for a number of magic words
  16. * * MagicWord::$mObjects in shared memory
  17. *
  18. * Please avoid reading the data out of one of these objects and then writing
  19. * special case code. If possible, add another match()-like function here.
  20. *
  21. * To add magic words in an extension, use the LanguageGetMagic hook. For
  22. * magic words which are also Parser variables, add a MagicWordwgVariableIDs
  23. * hook. Use string keys.
  24. *
  25. * @ingroup Parser
  26. */
  27. class MagicWord {
  28. /**#@+
  29. * @private
  30. */
  31. var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
  32. var $mRegexStart, $mBaseRegex, $mVariableRegex;
  33. var $mModified, $mFound;
  34. static public $mVariableIDsInitialised = false;
  35. static public $mVariableIDs = array(
  36. 'currentmonth',
  37. 'currentmonthname',
  38. 'currentmonthnamegen',
  39. 'currentmonthabbrev',
  40. 'currentday',
  41. 'currentday2',
  42. 'currentdayname',
  43. 'currentyear',
  44. 'currenttime',
  45. 'currenthour',
  46. 'localmonth',
  47. 'localmonthname',
  48. 'localmonthnamegen',
  49. 'localmonthabbrev',
  50. 'localday',
  51. 'localday2',
  52. 'localdayname',
  53. 'localyear',
  54. 'localtime',
  55. 'localhour',
  56. 'numberofarticles',
  57. 'numberoffiles',
  58. 'numberofedits',
  59. 'sitename',
  60. 'server',
  61. 'servername',
  62. 'scriptpath',
  63. 'pagename',
  64. 'pagenamee',
  65. 'fullpagename',
  66. 'fullpagenamee',
  67. 'namespace',
  68. 'namespacee',
  69. 'currentweek',
  70. 'currentdow',
  71. 'localweek',
  72. 'localdow',
  73. 'revisionid',
  74. 'revisionday',
  75. 'revisionday2',
  76. 'revisionmonth',
  77. 'revisionyear',
  78. 'revisiontimestamp',
  79. 'revisionuser',
  80. 'subpagename',
  81. 'subpagenamee',
  82. 'displaytitle',
  83. 'talkspace',
  84. 'talkspacee',
  85. 'subjectspace',
  86. 'subjectspacee',
  87. 'talkpagename',
  88. 'talkpagenamee',
  89. 'subjectpagename',
  90. 'subjectpagenamee',
  91. 'numberofusers',
  92. 'numberofactiveusers',
  93. 'newsectionlink',
  94. 'nonewsectionlink',
  95. 'numberofpages',
  96. 'currentversion',
  97. 'basepagename',
  98. 'basepagenamee',
  99. 'urlencode',
  100. 'currenttimestamp',
  101. 'localtimestamp',
  102. 'directionmark',
  103. 'language',
  104. 'contentlanguage',
  105. 'pagesinnamespace',
  106. 'numberofadmins',
  107. 'numberofviews',
  108. 'defaultsort',
  109. 'pagesincategory',
  110. 'index',
  111. 'noindex',
  112. 'numberingroup',
  113. );
  114. /* Array of caching hints for ParserCache */
  115. static public $mCacheTTLs = array (
  116. 'currentmonth' => 86400,
  117. 'currentmonthname' => 86400,
  118. 'currentmonthnamegen' => 86400,
  119. 'currentmonthabbrev' => 86400,
  120. 'currentday' => 3600,
  121. 'currentday2' => 3600,
  122. 'currentdayname' => 3600,
  123. 'currentyear' => 86400,
  124. 'currenttime' => 3600,
  125. 'currenthour' => 3600,
  126. 'localmonth' => 86400,
  127. 'localmonthname' => 86400,
  128. 'localmonthnamegen' => 86400,
  129. 'localmonthabbrev' => 86400,
  130. 'localday' => 3600,
  131. 'localday2' => 3600,
  132. 'localdayname' => 3600,
  133. 'localyear' => 86400,
  134. 'localtime' => 3600,
  135. 'localhour' => 3600,
  136. 'numberofarticles' => 3600,
  137. 'numberoffiles' => 3600,
  138. 'numberofedits' => 3600,
  139. 'currentweek' => 3600,
  140. 'currentdow' => 3600,
  141. 'localweek' => 3600,
  142. 'localdow' => 3600,
  143. 'numberofusers' => 3600,
  144. 'numberofactiveusers' => 3600,
  145. 'numberofpages' => 3600,
  146. 'currentversion' => 86400,
  147. 'currenttimestamp' => 3600,
  148. 'localtimestamp' => 3600,
  149. 'pagesinnamespace' => 3600,
  150. 'numberofadmins' => 3600,
  151. 'numberofviews' => 3600,
  152. 'numberingroup' => 3600,
  153. );
  154. static public $mDoubleUnderscoreIDs = array(
  155. 'notoc',
  156. 'nogallery',
  157. 'forcetoc',
  158. 'toc',
  159. 'noeditsection',
  160. 'newsectionlink',
  161. 'nonewsectionlink',
  162. 'hiddencat',
  163. 'index',
  164. 'noindex',
  165. 'staticredirect',
  166. );
  167. static public $mObjects = array();
  168. static public $mDoubleUnderscoreArray = null;
  169. /**#@-*/
  170. function __construct($id = 0, $syn = '', $cs = false) {
  171. $this->mId = $id;
  172. $this->mSynonyms = (array)$syn;
  173. $this->mCaseSensitive = $cs;
  174. $this->mRegex = '';
  175. $this->mRegexStart = '';
  176. $this->mVariableRegex = '';
  177. $this->mVariableStartToEndRegex = '';
  178. $this->mModified = false;
  179. }
  180. /**
  181. * Factory: creates an object representing an ID
  182. * @static
  183. */
  184. static function &get( $id ) {
  185. wfProfileIn( __METHOD__ );
  186. if (!array_key_exists( $id, self::$mObjects ) ) {
  187. $mw = new MagicWord();
  188. $mw->load( $id );
  189. self::$mObjects[$id] = $mw;
  190. }
  191. wfProfileOut( __METHOD__ );
  192. return self::$mObjects[$id];
  193. }
  194. /**
  195. * Get an array of parser variable IDs
  196. */
  197. static function getVariableIDs() {
  198. if ( !self::$mVariableIDsInitialised ) {
  199. # Deprecated constant definition hook, available for extensions that need it
  200. $magicWords = array();
  201. wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
  202. foreach ( $magicWords as $word ) {
  203. define( $word, $word );
  204. }
  205. # Get variable IDs
  206. wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
  207. self::$mVariableIDsInitialised = true;
  208. }
  209. return self::$mVariableIDs;
  210. }
  211. /* Allow external reads of TTL array */
  212. static function getCacheTTL($id) {
  213. if (array_key_exists($id,self::$mCacheTTLs)) {
  214. return self::$mCacheTTLs[$id];
  215. } else {
  216. return -1;
  217. }
  218. }
  219. /** Get a MagicWordArray of double-underscore entities */
  220. static function getDoubleUnderscoreArray() {
  221. if ( is_null( self::$mDoubleUnderscoreArray ) ) {
  222. self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
  223. }
  224. return self::$mDoubleUnderscoreArray;
  225. }
  226. # Initialises this object with an ID
  227. function load( $id ) {
  228. global $wgContLang;
  229. $this->mId = $id;
  230. $wgContLang->getMagic( $this );
  231. if ( !$this->mSynonyms ) {
  232. $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
  233. #throw new MWException( "Error: invalid magic word '$id'" );
  234. wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
  235. }
  236. }
  237. /**
  238. * Preliminary initialisation
  239. * @private
  240. */
  241. function initRegex() {
  242. #$variableClass = Title::legalChars();
  243. # This was used for matching "$1" variables, but different uses of the feature will have
  244. # different restrictions, which should be checked *after* the MagicWord has been matched,
  245. # not here. - IMSoP
  246. $escSyn = array();
  247. foreach ( $this->mSynonyms as $synonym )
  248. // In case a magic word contains /, like that's going to happen;)
  249. $escSyn[] = preg_quote( $synonym, '/' );
  250. $this->mBaseRegex = implode( '|', $escSyn );
  251. $case = $this->mCaseSensitive ? '' : 'iu';
  252. $this->mRegex = "/{$this->mBaseRegex}/{$case}";
  253. $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
  254. $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
  255. $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
  256. "/^(?:{$this->mBaseRegex})$/{$case}" );
  257. }
  258. /**
  259. * Gets a regex representing matching the word
  260. */
  261. function getRegex() {
  262. if ($this->mRegex == '' ) {
  263. $this->initRegex();
  264. }
  265. return $this->mRegex;
  266. }
  267. /**
  268. * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
  269. * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
  270. * the complete expression
  271. */
  272. function getRegexCase() {
  273. if ( $this->mRegex === '' )
  274. $this->initRegex();
  275. return $this->mCaseSensitive ? '' : 'iu';
  276. }
  277. /**
  278. * Gets a regex matching the word, if it is at the string start
  279. */
  280. function getRegexStart() {
  281. if ($this->mRegex == '' ) {
  282. $this->initRegex();
  283. }
  284. return $this->mRegexStart;
  285. }
  286. /**
  287. * regex without the slashes and what not
  288. */
  289. function getBaseRegex() {
  290. if ($this->mRegex == '') {
  291. $this->initRegex();
  292. }
  293. return $this->mBaseRegex;
  294. }
  295. /**
  296. * Returns true if the text contains the word
  297. * @return bool
  298. */
  299. function match( $text ) {
  300. return preg_match( $this->getRegex(), $text );
  301. }
  302. /**
  303. * Returns true if the text starts with the word
  304. * @return bool
  305. */
  306. function matchStart( $text ) {
  307. return preg_match( $this->getRegexStart(), $text );
  308. }
  309. /**
  310. * Returns NULL if there's no match, the value of $1 otherwise
  311. * The return code is the matched string, if there's no variable
  312. * part in the regex and the matched variable part ($1) if there
  313. * is one.
  314. */
  315. function matchVariableStartToEnd( $text ) {
  316. $matches = array();
  317. $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
  318. if ( $matchcount == 0 ) {
  319. return NULL;
  320. } else {
  321. # multiple matched parts (variable match); some will be empty because of
  322. # synonyms. The variable will be the second non-empty one so remove any
  323. # blank elements and re-sort the indices.
  324. # See also bug 6526
  325. $matches = array_values(array_filter($matches));
  326. if ( count($matches) == 1 ) { return $matches[0]; }
  327. else { return $matches[1]; }
  328. }
  329. }
  330. /**
  331. * Returns true if the text matches the word, and alters the
  332. * input string, removing all instances of the word
  333. */
  334. function matchAndRemove( &$text ) {
  335. $this->mFound = false;
  336. $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
  337. return $this->mFound;
  338. }
  339. function matchStartAndRemove( &$text ) {
  340. $this->mFound = false;
  341. $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
  342. return $this->mFound;
  343. }
  344. /**
  345. * Used in matchAndRemove()
  346. * @private
  347. **/
  348. function pregRemoveAndRecord( ) {
  349. $this->mFound = true;
  350. return '';
  351. }
  352. /**
  353. * Replaces the word with something else
  354. */
  355. function replace( $replacement, $subject, $limit=-1 ) {
  356. $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
  357. $this->mModified = !($res === $subject);
  358. return $res;
  359. }
  360. /**
  361. * Variable handling: {{SUBST:xxx}} style words
  362. * Calls back a function to determine what to replace xxx with
  363. * Input word must contain $1
  364. */
  365. function substituteCallback( $text, $callback ) {
  366. $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
  367. $this->mModified = !($res === $text);
  368. return $res;
  369. }
  370. /**
  371. * Matches the word, where $1 is a wildcard
  372. */
  373. function getVariableRegex() {
  374. if ( $this->mVariableRegex == '' ) {
  375. $this->initRegex();
  376. }
  377. return $this->mVariableRegex;
  378. }
  379. /**
  380. * Matches the entire string, where $1 is a wildcard
  381. */
  382. function getVariableStartToEndRegex() {
  383. if ( $this->mVariableStartToEndRegex == '' ) {
  384. $this->initRegex();
  385. }
  386. return $this->mVariableStartToEndRegex;
  387. }
  388. /**
  389. * Accesses the synonym list directly
  390. */
  391. function getSynonym( $i ) {
  392. return $this->mSynonyms[$i];
  393. }
  394. function getSynonyms() {
  395. return $this->mSynonyms;
  396. }
  397. /**
  398. * Returns true if the last call to replace() or substituteCallback()
  399. * returned a modified text, otherwise false.
  400. */
  401. function getWasModified(){
  402. return $this->mModified;
  403. }
  404. /**
  405. * $magicarr is an associative array of (magic word ID => replacement)
  406. * This method uses the php feature to do several replacements at the same time,
  407. * thereby gaining some efficiency. The result is placed in the out variable
  408. * $result. The return value is true if something was replaced.
  409. * @static
  410. **/
  411. function replaceMultiple( $magicarr, $subject, &$result ){
  412. $search = array();
  413. $replace = array();
  414. foreach( $magicarr as $id => $replacement ){
  415. $mw = MagicWord::get( $id );
  416. $search[] = $mw->getRegex();
  417. $replace[] = $replacement;
  418. }
  419. $result = preg_replace( $search, $replace, $subject );
  420. return !($result === $subject);
  421. }
  422. /**
  423. * Adds all the synonyms of this MagicWord to an array, to allow quick
  424. * lookup in a list of magic words
  425. */
  426. function addToArray( &$array, $value ) {
  427. global $wgContLang;
  428. foreach ( $this->mSynonyms as $syn ) {
  429. $array[$wgContLang->lc($syn)] = $value;
  430. }
  431. }
  432. function isCaseSensitive() {
  433. return $this->mCaseSensitive;
  434. }
  435. function getId() {
  436. return $this->mId;
  437. }
  438. }
  439. /**
  440. * Class for handling an array of magic words
  441. * @ingroup Parser
  442. */
  443. class MagicWordArray {
  444. var $names = array();
  445. var $hash;
  446. var $baseRegex, $regex;
  447. var $matches;
  448. function __construct( $names = array() ) {
  449. $this->names = $names;
  450. }
  451. /**
  452. * Add a magic word by name
  453. */
  454. public function add( $name ) {
  455. global $wgContLang;
  456. $this->names[] = $name;
  457. $this->hash = $this->baseRegex = $this->regex = null;
  458. }
  459. /**
  460. * Add a number of magic words by name
  461. */
  462. public function addArray( $names ) {
  463. $this->names = array_merge( $this->names, array_values( $names ) );
  464. $this->hash = $this->baseRegex = $this->regex = null;
  465. }
  466. /**
  467. * Get a 2-d hashtable for this array
  468. */
  469. function getHash() {
  470. if ( is_null( $this->hash ) ) {
  471. global $wgContLang;
  472. $this->hash = array( 0 => array(), 1 => array() );
  473. foreach ( $this->names as $name ) {
  474. $magic = MagicWord::get( $name );
  475. $case = intval( $magic->isCaseSensitive() );
  476. foreach ( $magic->getSynonyms() as $syn ) {
  477. if ( !$case ) {
  478. $syn = $wgContLang->lc( $syn );
  479. }
  480. $this->hash[$case][$syn] = $name;
  481. }
  482. }
  483. }
  484. return $this->hash;
  485. }
  486. /**
  487. * Get the base regex
  488. */
  489. function getBaseRegex() {
  490. if ( is_null( $this->baseRegex ) ) {
  491. $this->baseRegex = array( 0 => '', 1 => '' );
  492. foreach ( $this->names as $name ) {
  493. $magic = MagicWord::get( $name );
  494. $case = intval( $magic->isCaseSensitive() );
  495. foreach ( $magic->getSynonyms() as $i => $syn ) {
  496. $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
  497. if ( $this->baseRegex[$case] === '' ) {
  498. $this->baseRegex[$case] = $group;
  499. } else {
  500. $this->baseRegex[$case] .= '|' . $group;
  501. }
  502. }
  503. }
  504. }
  505. return $this->baseRegex;
  506. }
  507. /**
  508. * Get an unanchored regex
  509. */
  510. function getRegex() {
  511. if ( is_null( $this->regex ) ) {
  512. $base = $this->getBaseRegex();
  513. $this->regex = array( '', '' );
  514. if ( $this->baseRegex[0] !== '' ) {
  515. $this->regex[0] = "/{$base[0]}/iuS";
  516. }
  517. if ( $this->baseRegex[1] !== '' ) {
  518. $this->regex[1] = "/{$base[1]}/S";
  519. }
  520. }
  521. return $this->regex;
  522. }
  523. /**
  524. * Get a regex for matching variables
  525. */
  526. function getVariableRegex() {
  527. return str_replace( "\\$1", "(.*?)", $this->getRegex() );
  528. }
  529. /**
  530. * Get an anchored regex for matching variables
  531. */
  532. function getVariableStartToEndRegex() {
  533. $base = $this->getBaseRegex();
  534. $newRegex = array( '', '' );
  535. if ( $base[0] !== '' ) {
  536. $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
  537. }
  538. if ( $base[1] !== '' ) {
  539. $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
  540. }
  541. return $newRegex;
  542. }
  543. /**
  544. * Parse a match array from preg_match
  545. * Returns array(magic word ID, parameter value)
  546. * If there is no parameter value, that element will be false.
  547. */
  548. function parseMatch( $m ) {
  549. reset( $m );
  550. while ( list( $key, $value ) = each( $m ) ) {
  551. if ( $key === 0 || $value === '' ) {
  552. continue;
  553. }
  554. $parts = explode( '_', $key, 2 );
  555. if ( count( $parts ) != 2 ) {
  556. // This shouldn't happen
  557. // continue;
  558. throw new MWException( __METHOD__ . ': bad parameter name' );
  559. }
  560. list( /* $synIndex */, $magicName ) = $parts;
  561. $paramValue = next( $m );
  562. return array( $magicName, $paramValue );
  563. }
  564. // This shouldn't happen either
  565. throw new MWException( __METHOD__.': parameter not found' );
  566. return array( false, false );
  567. }
  568. /**
  569. * Match some text, with parameter capture
  570. * Returns an array with the magic word name in the first element and the
  571. * parameter in the second element.
  572. * Both elements are false if there was no match.
  573. */
  574. public function matchVariableStartToEnd( $text ) {
  575. global $wgContLang;
  576. $regexes = $this->getVariableStartToEndRegex();
  577. foreach ( $regexes as $regex ) {
  578. if ( $regex !== '' ) {
  579. $m = false;
  580. if ( preg_match( $regex, $text, $m ) ) {
  581. return $this->parseMatch( $m );
  582. }
  583. }
  584. }
  585. return array( false, false );
  586. }
  587. /**
  588. * Match some text, without parameter capture
  589. * Returns the magic word name, or false if there was no capture
  590. */
  591. public function matchStartToEnd( $text ) {
  592. $hash = $this->getHash();
  593. if ( isset( $hash[1][$text] ) ) {
  594. return $hash[1][$text];
  595. }
  596. global $wgContLang;
  597. $lc = $wgContLang->lc( $text );
  598. if ( isset( $hash[0][$lc] ) ) {
  599. return $hash[0][$lc];
  600. }
  601. return false;
  602. }
  603. /**
  604. * Returns an associative array, ID => param value, for all items that match
  605. * Removes the matched items from the input string (passed by reference)
  606. */
  607. public function matchAndRemove( &$text ) {
  608. $found = array();
  609. $regexes = $this->getRegex();
  610. foreach ( $regexes as $regex ) {
  611. if ( $regex === '' ) {
  612. continue;
  613. }
  614. preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
  615. foreach ( $matches as $m ) {
  616. list( $name, $param ) = $this->parseMatch( $m );
  617. $found[$name] = $param;
  618. }
  619. $text = preg_replace( $regex, '', $text );
  620. }
  621. return $found;
  622. }
  623. }