MagicWord.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. <?php
  2. /**
  3. * See docs/magicword.txt.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup Parser
  22. */
  23. use MediaWiki\MediaWikiServices;
  24. /**
  25. * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
  26. *
  27. * @par Usage:
  28. * @code
  29. * if ( $magicWordFactory->get( 'redirect' )->match( $text ) ) {
  30. * // some code
  31. * }
  32. * @endcode
  33. *
  34. * Please avoid reading the data out of one of these objects and then writing
  35. * special case code. If possible, add another match()-like function here.
  36. *
  37. * To add magic words in an extension, use $magicWords in a file listed in
  38. * $wgExtensionMessagesFiles[].
  39. *
  40. * @par Example:
  41. * @code
  42. * $magicWords = [];
  43. *
  44. * $magicWords['en'] = [
  45. * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
  46. * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
  47. * ];
  48. * @endcode
  49. *
  50. * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
  51. * hook. Use string keys.
  52. *
  53. * @ingroup Parser
  54. */
  55. class MagicWord {
  56. /** #@- */
  57. /** @var string */
  58. public $mId;
  59. /** @var string[] */
  60. public $mSynonyms;
  61. /** @var bool */
  62. public $mCaseSensitive;
  63. /** @var string */
  64. private $mRegex = '';
  65. /** @var string */
  66. private $mRegexStart = '';
  67. /** @var string */
  68. private $mRegexStartToEnd = '';
  69. /** @var string */
  70. private $mBaseRegex = '';
  71. /** @var string */
  72. private $mVariableRegex = '';
  73. /** @var string */
  74. private $mVariableStartToEndRegex = '';
  75. /** @var bool */
  76. private $mModified = false;
  77. /** @var bool */
  78. private $mFound = false;
  79. /** @var Language */
  80. private $contLang;
  81. /** #@- */
  82. /**
  83. * Create a new MagicWord object
  84. *
  85. * Use factory instead: MagicWordFactory::get
  86. *
  87. * @param string|null $id The internal name of the magic word
  88. * @param string[]|string $syn synonyms for the magic word
  89. * @param bool $cs If magic word is case sensitive
  90. * @param Language|null $contLang Content language
  91. */
  92. public function __construct( $id = null, $syn = [], $cs = false, Language $contLang = null ) {
  93. $this->mId = $id;
  94. $this->mSynonyms = (array)$syn;
  95. $this->mCaseSensitive = $cs;
  96. $this->contLang = $contLang ?: MediaWikiServices::getInstance()->getContentLanguage();
  97. }
  98. /**
  99. * Factory: creates an object representing an ID
  100. *
  101. * @param string $id The internal name of the magic word
  102. *
  103. * @return MagicWord
  104. * @deprecated since 1.32, use MagicWordFactory::get
  105. */
  106. public static function get( $id ) {
  107. wfDeprecated( __METHOD__, '1.32' );
  108. return MediaWikiServices::getInstance()->getMagicWordFactory()->get( $id );
  109. }
  110. /**
  111. * Get an array of parser variable IDs
  112. *
  113. * @return string[]
  114. * @deprecated since 1.32, use MagicWordFactory::getVariableIDs
  115. */
  116. public static function getVariableIDs() {
  117. wfDeprecated( __METHOD__, '1.32' );
  118. return MediaWikiServices::getInstance()->getMagicWordFactory()->getVariableIDs();
  119. }
  120. /**
  121. * Get an array of parser substitution modifier IDs
  122. * @return string[]
  123. * @deprecated since 1.32, use MagicWordFactory::getSubstIDs
  124. */
  125. public static function getSubstIDs() {
  126. wfDeprecated( __METHOD__, '1.32' );
  127. return MediaWikiServices::getInstance()->getMagicWordFactory()->getSubstIDs();
  128. }
  129. /**
  130. * Allow external reads of TTL array
  131. *
  132. * @param string $id
  133. * @return int
  134. * @deprecated since 1.32, use MagicWordFactory::getCacheTTL
  135. */
  136. public static function getCacheTTL( $id ) {
  137. wfDeprecated( __METHOD__, '1.32' );
  138. return MediaWikiServices::getInstance()->getMagicWordFactory()->getCacheTTL( $id );
  139. }
  140. /**
  141. * Get a MagicWordArray of double-underscore entities
  142. *
  143. * @return MagicWordArray
  144. * @deprecated since 1.32, use MagicWordFactory::getDoubleUnderscoreArray
  145. */
  146. public static function getDoubleUnderscoreArray() {
  147. wfDeprecated( __METHOD__, '1.32' );
  148. return MediaWikiServices::getInstance()->getMagicWordFactory()->getDoubleUnderscoreArray();
  149. }
  150. /**
  151. * Initialises this object with an ID
  152. *
  153. * @param string $id
  154. * @throws MWException
  155. */
  156. public function load( $id ) {
  157. $this->mId = $id;
  158. $this->contLang->getMagic( $this );
  159. if ( !$this->mSynonyms ) {
  160. $this->mSynonyms = [ 'brionmademeputthishere' ];
  161. throw new MWException( "Error: invalid magic word '$id'" );
  162. }
  163. }
  164. /**
  165. * Preliminary initialisation
  166. * @private
  167. */
  168. public function initRegex() {
  169. // Sort the synonyms by length, descending, so that the longest synonym
  170. // matches in precedence to the shortest
  171. $synonyms = $this->mSynonyms;
  172. usort( $synonyms, [ $this, 'compareStringLength' ] );
  173. $escSyn = [];
  174. foreach ( $synonyms as $synonym ) {
  175. // In case a magic word contains /, like that's going to happen;)
  176. $escSyn[] = preg_quote( $synonym, '/' );
  177. }
  178. $this->mBaseRegex = implode( '|', $escSyn );
  179. $case = $this->mCaseSensitive ? '' : 'iu';
  180. $this->mRegex = "/{$this->mBaseRegex}/{$case}";
  181. $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
  182. $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
  183. $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
  184. $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
  185. "/^(?:{$this->mBaseRegex})$/{$case}" );
  186. }
  187. /**
  188. * A comparison function that returns -1, 0 or 1 depending on whether the
  189. * first string is longer, the same length or shorter than the second
  190. * string.
  191. *
  192. * @param string $s1
  193. * @param string $s2
  194. *
  195. * @return int
  196. */
  197. public function compareStringLength( $s1, $s2 ) {
  198. $l1 = strlen( $s1 );
  199. $l2 = strlen( $s2 );
  200. return $l2 <=> $l1; // descending
  201. }
  202. /**
  203. * Gets a regex representing matching the word
  204. *
  205. * @return string
  206. */
  207. public function getRegex() {
  208. if ( $this->mRegex == '' ) {
  209. $this->initRegex();
  210. }
  211. return $this->mRegex;
  212. }
  213. /**
  214. * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
  215. * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
  216. * the complete expression
  217. *
  218. * @return string
  219. */
  220. public function getRegexCase() {
  221. if ( $this->mRegex === '' ) {
  222. $this->initRegex();
  223. }
  224. return $this->mCaseSensitive ? '' : 'iu';
  225. }
  226. /**
  227. * Gets a regex matching the word, if it is at the string start
  228. *
  229. * @return string
  230. */
  231. public function getRegexStart() {
  232. if ( $this->mRegex == '' ) {
  233. $this->initRegex();
  234. }
  235. return $this->mRegexStart;
  236. }
  237. /**
  238. * Gets a regex matching the word from start to end of a string
  239. *
  240. * @return string
  241. * @since 1.23
  242. */
  243. public function getRegexStartToEnd() {
  244. if ( $this->mRegexStartToEnd == '' ) {
  245. $this->initRegex();
  246. }
  247. return $this->mRegexStartToEnd;
  248. }
  249. /**
  250. * regex without the slashes and what not
  251. *
  252. * @return string
  253. */
  254. public function getBaseRegex() {
  255. if ( $this->mRegex == '' ) {
  256. $this->initRegex();
  257. }
  258. return $this->mBaseRegex;
  259. }
  260. /**
  261. * Returns true if the text contains the word
  262. *
  263. * @param string $text
  264. *
  265. * @return bool
  266. */
  267. public function match( $text ) {
  268. return (bool)preg_match( $this->getRegex(), $text );
  269. }
  270. /**
  271. * Returns true if the text starts with the word
  272. *
  273. * @param string $text
  274. *
  275. * @return bool
  276. */
  277. public function matchStart( $text ) {
  278. return (bool)preg_match( $this->getRegexStart(), $text );
  279. }
  280. /**
  281. * Returns true if the text matched the word
  282. *
  283. * @param string $text
  284. *
  285. * @return bool
  286. * @since 1.23
  287. */
  288. public function matchStartToEnd( $text ) {
  289. return (bool)preg_match( $this->getRegexStartToEnd(), $text );
  290. }
  291. /**
  292. * Returns NULL if there's no match, the value of $1 otherwise
  293. * The return code is the matched string, if there's no variable
  294. * part in the regex and the matched variable part ($1) if there
  295. * is one.
  296. *
  297. * @param string $text
  298. *
  299. * @return string
  300. */
  301. public function matchVariableStartToEnd( $text ) {
  302. $matches = [];
  303. $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
  304. if ( $matchcount == 0 ) {
  305. return null;
  306. } else {
  307. # multiple matched parts (variable match); some will be empty because of
  308. # synonyms. The variable will be the second non-empty one so remove any
  309. # blank elements and re-sort the indices.
  310. # See also T8526
  311. $matches = array_values( array_filter( $matches ) );
  312. if ( count( $matches ) == 1 ) {
  313. return $matches[0];
  314. } else {
  315. return $matches[1];
  316. }
  317. }
  318. }
  319. /**
  320. * Returns true if the text matches the word, and alters the
  321. * input string, removing all instances of the word
  322. *
  323. * @param string &$text
  324. *
  325. * @return bool
  326. */
  327. public function matchAndRemove( &$text ) {
  328. $this->mFound = false;
  329. $text = preg_replace_callback(
  330. $this->getRegex(),
  331. [ $this, 'pregRemoveAndRecord' ],
  332. $text
  333. );
  334. return $this->mFound;
  335. }
  336. /**
  337. * @param string &$text
  338. * @return bool
  339. */
  340. public function matchStartAndRemove( &$text ) {
  341. $this->mFound = false;
  342. $text = preg_replace_callback(
  343. $this->getRegexStart(),
  344. [ $this, 'pregRemoveAndRecord' ],
  345. $text
  346. );
  347. return $this->mFound;
  348. }
  349. /**
  350. * Used in matchAndRemove()
  351. *
  352. * @return string
  353. */
  354. public function pregRemoveAndRecord() {
  355. $this->mFound = true;
  356. return '';
  357. }
  358. /**
  359. * Replaces the word with something else
  360. *
  361. * @param string $replacement
  362. * @param string $subject
  363. * @param int $limit
  364. *
  365. * @return string
  366. */
  367. public function replace( $replacement, $subject, $limit = -1 ) {
  368. $res = preg_replace(
  369. $this->getRegex(),
  370. StringUtils::escapeRegexReplacement( $replacement ),
  371. $subject,
  372. $limit
  373. );
  374. $this->mModified = $res !== $subject;
  375. return $res;
  376. }
  377. /**
  378. * Variable handling: {{SUBST:xxx}} style words
  379. * Calls back a function to determine what to replace xxx with
  380. * Input word must contain $1
  381. *
  382. * @param string $text
  383. * @param callable $callback
  384. *
  385. * @return string
  386. */
  387. public function substituteCallback( $text, $callback ) {
  388. $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
  389. $this->mModified = $res !== $text;
  390. return $res;
  391. }
  392. /**
  393. * Matches the word, where $1 is a wildcard
  394. *
  395. * @return string
  396. */
  397. public function getVariableRegex() {
  398. if ( $this->mVariableRegex == '' ) {
  399. $this->initRegex();
  400. }
  401. return $this->mVariableRegex;
  402. }
  403. /**
  404. * Matches the entire string, where $1 is a wildcard
  405. *
  406. * @return string
  407. */
  408. public function getVariableStartToEndRegex() {
  409. if ( $this->mVariableStartToEndRegex == '' ) {
  410. $this->initRegex();
  411. }
  412. return $this->mVariableStartToEndRegex;
  413. }
  414. /**
  415. * Accesses the synonym list directly
  416. *
  417. * @param int $i
  418. *
  419. * @return string
  420. */
  421. public function getSynonym( $i ) {
  422. return $this->mSynonyms[$i];
  423. }
  424. /**
  425. * @return string[]
  426. */
  427. public function getSynonyms() {
  428. return $this->mSynonyms;
  429. }
  430. /**
  431. * Returns true if the last call to replace() or substituteCallback()
  432. * returned a modified text, otherwise false.
  433. *
  434. * @return bool
  435. */
  436. public function getWasModified() {
  437. return $this->mModified;
  438. }
  439. /**
  440. * Adds all the synonyms of this MagicWord to an array, to allow quick
  441. * lookup in a list of magic words
  442. *
  443. * @param string[] &$array
  444. * @param string $value
  445. */
  446. public function addToArray( &$array, $value ) {
  447. foreach ( $this->mSynonyms as $syn ) {
  448. $array[$this->contLang->lc( $syn )] = $value;
  449. }
  450. }
  451. /**
  452. * @return bool
  453. */
  454. public function isCaseSensitive() {
  455. return $this->mCaseSensitive;
  456. }
  457. /**
  458. * @return string
  459. */
  460. public function getId() {
  461. return $this->mId;
  462. }
  463. }