MagicWord.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. <?php
  2. /**
  3. * See docs/magicword.txt.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Parser
  22. */
  23. /**
  24. * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
  25. *
  26. * @par Usage:
  27. * @code
  28. * if (MagicWord::get( 'redirect' )->match( $text ) ) {
  29. * // some code
  30. * }
  31. * @endcode
  32. *
  33. * Possible future improvements:
  34. * * Simultaneous searching for a number of magic words
  35. * * MagicWord::$mObjects in shared memory
  36. *
  37. * Please avoid reading the data out of one of these objects and then writing
  38. * special case code. If possible, add another match()-like function here.
  39. *
  40. * To add magic words in an extension, use $magicWords in a file listed in
  41. * $wgExtensionMessagesFiles[].
  42. *
  43. * @par Example:
  44. * @code
  45. * $magicWords = [];
  46. *
  47. * $magicWords['en'] = [
  48. * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
  49. * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
  50. * ];
  51. * @endcode
  52. *
  53. * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
  54. * hook. Use string keys.
  55. *
  56. * @ingroup Parser
  57. */
  58. class MagicWord {
  59. /**#@-*/
  60. /** @var string */
  61. public $mId;
  62. /** @var string[] */
  63. public $mSynonyms;
  64. /** @var bool */
  65. public $mCaseSensitive;
  66. /** @var string */
  67. private $mRegex = '';
  68. /** @var string */
  69. private $mRegexStart = '';
  70. /** @var string */
  71. private $mRegexStartToEnd = '';
  72. /** @var string */
  73. private $mBaseRegex = '';
  74. /** @var string */
  75. private $mVariableRegex = '';
  76. /** @var string */
  77. private $mVariableStartToEndRegex = '';
  78. /** @var bool */
  79. private $mModified = false;
  80. /** @var bool */
  81. private $mFound = false;
  82. /** @var bool */
  83. public static $mVariableIDsInitialised = false;
  84. /** @var string[] */
  85. public static $mVariableIDs = [
  86. '!',
  87. 'currentmonth',
  88. 'currentmonth1',
  89. 'currentmonthname',
  90. 'currentmonthnamegen',
  91. 'currentmonthabbrev',
  92. 'currentday',
  93. 'currentday2',
  94. 'currentdayname',
  95. 'currentyear',
  96. 'currenttime',
  97. 'currenthour',
  98. 'localmonth',
  99. 'localmonth1',
  100. 'localmonthname',
  101. 'localmonthnamegen',
  102. 'localmonthabbrev',
  103. 'localday',
  104. 'localday2',
  105. 'localdayname',
  106. 'localyear',
  107. 'localtime',
  108. 'localhour',
  109. 'numberofarticles',
  110. 'numberoffiles',
  111. 'numberofedits',
  112. 'articlepath',
  113. 'pageid',
  114. 'sitename',
  115. 'server',
  116. 'servername',
  117. 'scriptpath',
  118. 'stylepath',
  119. 'pagename',
  120. 'pagenamee',
  121. 'fullpagename',
  122. 'fullpagenamee',
  123. 'namespace',
  124. 'namespacee',
  125. 'namespacenumber',
  126. 'currentweek',
  127. 'currentdow',
  128. 'localweek',
  129. 'localdow',
  130. 'revisionid',
  131. 'revisionday',
  132. 'revisionday2',
  133. 'revisionmonth',
  134. 'revisionmonth1',
  135. 'revisionyear',
  136. 'revisiontimestamp',
  137. 'revisionuser',
  138. 'revisionsize',
  139. 'subpagename',
  140. 'subpagenamee',
  141. 'talkspace',
  142. 'talkspacee',
  143. 'subjectspace',
  144. 'subjectspacee',
  145. 'talkpagename',
  146. 'talkpagenamee',
  147. 'subjectpagename',
  148. 'subjectpagenamee',
  149. 'numberofusers',
  150. 'numberofactiveusers',
  151. 'numberofpages',
  152. 'currentversion',
  153. 'rootpagename',
  154. 'rootpagenamee',
  155. 'basepagename',
  156. 'basepagenamee',
  157. 'currenttimestamp',
  158. 'localtimestamp',
  159. 'directionmark',
  160. 'contentlanguage',
  161. 'pagelanguage',
  162. 'numberofadmins',
  163. 'cascadingsources',
  164. ];
  165. /** Array of caching hints for ParserCache
  166. * @var array [ string => int ]
  167. */
  168. public static $mCacheTTLs = [
  169. 'currentmonth' => 86400,
  170. 'currentmonth1' => 86400,
  171. 'currentmonthname' => 86400,
  172. 'currentmonthnamegen' => 86400,
  173. 'currentmonthabbrev' => 86400,
  174. 'currentday' => 3600,
  175. 'currentday2' => 3600,
  176. 'currentdayname' => 3600,
  177. 'currentyear' => 86400,
  178. 'currenttime' => 3600,
  179. 'currenthour' => 3600,
  180. 'localmonth' => 86400,
  181. 'localmonth1' => 86400,
  182. 'localmonthname' => 86400,
  183. 'localmonthnamegen' => 86400,
  184. 'localmonthabbrev' => 86400,
  185. 'localday' => 3600,
  186. 'localday2' => 3600,
  187. 'localdayname' => 3600,
  188. 'localyear' => 86400,
  189. 'localtime' => 3600,
  190. 'localhour' => 3600,
  191. 'numberofarticles' => 3600,
  192. 'numberoffiles' => 3600,
  193. 'numberofedits' => 3600,
  194. 'currentweek' => 3600,
  195. 'currentdow' => 3600,
  196. 'localweek' => 3600,
  197. 'localdow' => 3600,
  198. 'numberofusers' => 3600,
  199. 'numberofactiveusers' => 3600,
  200. 'numberofpages' => 3600,
  201. 'currentversion' => 86400,
  202. 'currenttimestamp' => 3600,
  203. 'localtimestamp' => 3600,
  204. 'pagesinnamespace' => 3600,
  205. 'numberofadmins' => 3600,
  206. 'numberingroup' => 3600,
  207. ];
  208. /** @var string[] */
  209. public static $mDoubleUnderscoreIDs = [
  210. 'notoc',
  211. 'nogallery',
  212. 'forcetoc',
  213. 'toc',
  214. 'noeditsection',
  215. 'newsectionlink',
  216. 'nonewsectionlink',
  217. 'hiddencat',
  218. 'index',
  219. 'noindex',
  220. 'staticredirect',
  221. 'notitleconvert',
  222. 'nocontentconvert',
  223. ];
  224. /** @var string[] */
  225. public static $mSubstIDs = [
  226. 'subst',
  227. 'safesubst',
  228. ];
  229. /** @var array [ string => MagicWord ] */
  230. public static $mObjects = [];
  231. /** @var MagicWordArray */
  232. public static $mDoubleUnderscoreArray = null;
  233. /**#@-*/
  234. /**
  235. * Create a new MagicWord object
  236. *
  237. * Use factory instead: MagicWord::get
  238. *
  239. * @param string $id The internal name of the magic word
  240. * @param string[]|string $syn synonyms for the magic word
  241. * @param bool $cs If magic word is case sensitive
  242. */
  243. public function __construct( $id = null, $syn = [], $cs = false ) {
  244. $this->mId = $id;
  245. $this->mSynonyms = (array)$syn;
  246. $this->mCaseSensitive = $cs;
  247. }
  248. /**
  249. * Factory: creates an object representing an ID
  250. *
  251. * @param string $id The internal name of the magic word
  252. *
  253. * @return MagicWord
  254. */
  255. public static function &get( $id ) {
  256. if ( !isset( self::$mObjects[$id] ) ) {
  257. $mw = new MagicWord();
  258. $mw->load( $id );
  259. self::$mObjects[$id] = $mw;
  260. }
  261. return self::$mObjects[$id];
  262. }
  263. /**
  264. * Get an array of parser variable IDs
  265. *
  266. * @return string[]
  267. */
  268. public static function getVariableIDs() {
  269. if ( !self::$mVariableIDsInitialised ) {
  270. # Get variable IDs
  271. Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
  272. self::$mVariableIDsInitialised = true;
  273. }
  274. return self::$mVariableIDs;
  275. }
  276. /**
  277. * Get an array of parser substitution modifier IDs
  278. * @return string[]
  279. */
  280. public static function getSubstIDs() {
  281. return self::$mSubstIDs;
  282. }
  283. /**
  284. * Allow external reads of TTL array
  285. *
  286. * @param string $id
  287. * @return int
  288. */
  289. public static function getCacheTTL( $id ) {
  290. if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
  291. return self::$mCacheTTLs[$id];
  292. } else {
  293. return -1;
  294. }
  295. }
  296. /**
  297. * Get a MagicWordArray of double-underscore entities
  298. *
  299. * @return MagicWordArray
  300. */
  301. public static function getDoubleUnderscoreArray() {
  302. if ( is_null( self::$mDoubleUnderscoreArray ) ) {
  303. Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
  304. self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
  305. }
  306. return self::$mDoubleUnderscoreArray;
  307. }
  308. /**
  309. * Clear the self::$mObjects variable
  310. * For use in parser tests
  311. */
  312. public static function clearCache() {
  313. self::$mObjects = [];
  314. }
  315. /**
  316. * Initialises this object with an ID
  317. *
  318. * @param string $id
  319. * @throws MWException
  320. */
  321. public function load( $id ) {
  322. global $wgContLang;
  323. $this->mId = $id;
  324. $wgContLang->getMagic( $this );
  325. if ( !$this->mSynonyms ) {
  326. $this->mSynonyms = [ 'brionmademeputthishere' ];
  327. throw new MWException( "Error: invalid magic word '$id'" );
  328. }
  329. }
  330. /**
  331. * Preliminary initialisation
  332. * @private
  333. */
  334. public function initRegex() {
  335. // Sort the synonyms by length, descending, so that the longest synonym
  336. // matches in precedence to the shortest
  337. $synonyms = $this->mSynonyms;
  338. usort( $synonyms, [ $this, 'compareStringLength' ] );
  339. $escSyn = [];
  340. foreach ( $synonyms as $synonym ) {
  341. // In case a magic word contains /, like that's going to happen;)
  342. $escSyn[] = preg_quote( $synonym, '/' );
  343. }
  344. $this->mBaseRegex = implode( '|', $escSyn );
  345. $case = $this->mCaseSensitive ? '' : 'iu';
  346. $this->mRegex = "/{$this->mBaseRegex}/{$case}";
  347. $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
  348. $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
  349. $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
  350. $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
  351. "/^(?:{$this->mBaseRegex})$/{$case}" );
  352. }
  353. /**
  354. * A comparison function that returns -1, 0 or 1 depending on whether the
  355. * first string is longer, the same length or shorter than the second
  356. * string.
  357. *
  358. * @param string $s1
  359. * @param string $s2
  360. *
  361. * @return int
  362. */
  363. public function compareStringLength( $s1, $s2 ) {
  364. $l1 = strlen( $s1 );
  365. $l2 = strlen( $s2 );
  366. if ( $l1 < $l2 ) {
  367. return 1;
  368. } elseif ( $l1 > $l2 ) {
  369. return -1;
  370. } else {
  371. return 0;
  372. }
  373. }
  374. /**
  375. * Gets a regex representing matching the word
  376. *
  377. * @return string
  378. */
  379. public function getRegex() {
  380. if ( $this->mRegex == '' ) {
  381. $this->initRegex();
  382. }
  383. return $this->mRegex;
  384. }
  385. /**
  386. * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
  387. * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
  388. * the complete expression
  389. *
  390. * @return string
  391. */
  392. public function getRegexCase() {
  393. if ( $this->mRegex === '' ) {
  394. $this->initRegex();
  395. }
  396. return $this->mCaseSensitive ? '' : 'iu';
  397. }
  398. /**
  399. * Gets a regex matching the word, if it is at the string start
  400. *
  401. * @return string
  402. */
  403. public function getRegexStart() {
  404. if ( $this->mRegex == '' ) {
  405. $this->initRegex();
  406. }
  407. return $this->mRegexStart;
  408. }
  409. /**
  410. * Gets a regex matching the word from start to end of a string
  411. *
  412. * @return string
  413. * @since 1.23
  414. */
  415. public function getRegexStartToEnd() {
  416. if ( $this->mRegexStartToEnd == '' ) {
  417. $this->initRegex();
  418. }
  419. return $this->mRegexStartToEnd;
  420. }
  421. /**
  422. * regex without the slashes and what not
  423. *
  424. * @return string
  425. */
  426. public function getBaseRegex() {
  427. if ( $this->mRegex == '' ) {
  428. $this->initRegex();
  429. }
  430. return $this->mBaseRegex;
  431. }
  432. /**
  433. * Returns true if the text contains the word
  434. *
  435. * @param string $text
  436. *
  437. * @return bool
  438. */
  439. public function match( $text ) {
  440. return (bool)preg_match( $this->getRegex(), $text );
  441. }
  442. /**
  443. * Returns true if the text starts with the word
  444. *
  445. * @param string $text
  446. *
  447. * @return bool
  448. */
  449. public function matchStart( $text ) {
  450. return (bool)preg_match( $this->getRegexStart(), $text );
  451. }
  452. /**
  453. * Returns true if the text matched the word
  454. *
  455. * @param string $text
  456. *
  457. * @return bool
  458. * @since 1.23
  459. */
  460. public function matchStartToEnd( $text ) {
  461. return (bool)preg_match( $this->getRegexStartToEnd(), $text );
  462. }
  463. /**
  464. * Returns NULL if there's no match, the value of $1 otherwise
  465. * The return code is the matched string, if there's no variable
  466. * part in the regex and the matched variable part ($1) if there
  467. * is one.
  468. *
  469. * @param string $text
  470. *
  471. * @return string
  472. */
  473. public function matchVariableStartToEnd( $text ) {
  474. $matches = [];
  475. $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
  476. if ( $matchcount == 0 ) {
  477. return null;
  478. } else {
  479. # multiple matched parts (variable match); some will be empty because of
  480. # synonyms. The variable will be the second non-empty one so remove any
  481. # blank elements and re-sort the indices.
  482. # See also T8526
  483. $matches = array_values( array_filter( $matches ) );
  484. if ( count( $matches ) == 1 ) {
  485. return $matches[0];
  486. } else {
  487. return $matches[1];
  488. }
  489. }
  490. }
  491. /**
  492. * Returns true if the text matches the word, and alters the
  493. * input string, removing all instances of the word
  494. *
  495. * @param string &$text
  496. *
  497. * @return bool
  498. */
  499. public function matchAndRemove( &$text ) {
  500. $this->mFound = false;
  501. $text = preg_replace_callback(
  502. $this->getRegex(),
  503. [ $this, 'pregRemoveAndRecord' ],
  504. $text
  505. );
  506. return $this->mFound;
  507. }
  508. /**
  509. * @param string &$text
  510. * @return bool
  511. */
  512. public function matchStartAndRemove( &$text ) {
  513. $this->mFound = false;
  514. $text = preg_replace_callback(
  515. $this->getRegexStart(),
  516. [ $this, 'pregRemoveAndRecord' ],
  517. $text
  518. );
  519. return $this->mFound;
  520. }
  521. /**
  522. * Used in matchAndRemove()
  523. *
  524. * @return string
  525. */
  526. public function pregRemoveAndRecord() {
  527. $this->mFound = true;
  528. return '';
  529. }
  530. /**
  531. * Replaces the word with something else
  532. *
  533. * @param string $replacement
  534. * @param string $subject
  535. * @param int $limit
  536. *
  537. * @return string
  538. */
  539. public function replace( $replacement, $subject, $limit = -1 ) {
  540. $res = preg_replace(
  541. $this->getRegex(),
  542. StringUtils::escapeRegexReplacement( $replacement ),
  543. $subject,
  544. $limit
  545. );
  546. $this->mModified = $res !== $subject;
  547. return $res;
  548. }
  549. /**
  550. * Variable handling: {{SUBST:xxx}} style words
  551. * Calls back a function to determine what to replace xxx with
  552. * Input word must contain $1
  553. *
  554. * @param string $text
  555. * @param callable $callback
  556. *
  557. * @return string
  558. */
  559. public function substituteCallback( $text, $callback ) {
  560. $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
  561. $this->mModified = $res !== $text;
  562. return $res;
  563. }
  564. /**
  565. * Matches the word, where $1 is a wildcard
  566. *
  567. * @return string
  568. */
  569. public function getVariableRegex() {
  570. if ( $this->mVariableRegex == '' ) {
  571. $this->initRegex();
  572. }
  573. return $this->mVariableRegex;
  574. }
  575. /**
  576. * Matches the entire string, where $1 is a wildcard
  577. *
  578. * @return string
  579. */
  580. public function getVariableStartToEndRegex() {
  581. if ( $this->mVariableStartToEndRegex == '' ) {
  582. $this->initRegex();
  583. }
  584. return $this->mVariableStartToEndRegex;
  585. }
  586. /**
  587. * Accesses the synonym list directly
  588. *
  589. * @param int $i
  590. *
  591. * @return string
  592. */
  593. public function getSynonym( $i ) {
  594. return $this->mSynonyms[$i];
  595. }
  596. /**
  597. * @return string[]
  598. */
  599. public function getSynonyms() {
  600. return $this->mSynonyms;
  601. }
  602. /**
  603. * Returns true if the last call to replace() or substituteCallback()
  604. * returned a modified text, otherwise false.
  605. *
  606. * @return bool
  607. */
  608. public function getWasModified() {
  609. return $this->mModified;
  610. }
  611. /**
  612. * Adds all the synonyms of this MagicWord to an array, to allow quick
  613. * lookup in a list of magic words
  614. *
  615. * @param string[] &$array
  616. * @param string $value
  617. */
  618. public function addToArray( &$array, $value ) {
  619. global $wgContLang;
  620. foreach ( $this->mSynonyms as $syn ) {
  621. $array[$wgContLang->lc( $syn )] = $value;
  622. }
  623. }
  624. /**
  625. * @return bool
  626. */
  627. public function isCaseSensitive() {
  628. return $this->mCaseSensitive;
  629. }
  630. /**
  631. * @return string
  632. */
  633. public function getId() {
  634. return $this->mId;
  635. }
  636. }