ConverterRule.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. <?php
  2. /**
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. * http://www.gnu.org/copyleft/gpl.html
  17. *
  18. * @file
  19. * @ingroup Language
  20. */
  21. /**
  22. * Parser for rules of language conversion , parse rules in -{ }- tag.
  23. * @ingroup Language
  24. * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com>
  25. */
  26. class ConverterRule {
  27. public $mText; // original text in -{text}-
  28. public $mConverter; // LanguageConverter object
  29. public $mRuleDisplay = '';
  30. public $mRuleTitle = false;
  31. public $mRules = '';// string : the text of the rules
  32. public $mRulesAction = 'none';
  33. public $mFlags = [];
  34. public $mVariantFlags = [];
  35. public $mConvTable = [];
  36. public $mBidtable = [];// array of the translation in each variant
  37. public $mUnidtable = [];// array of the translation in each variant
  38. /**
  39. * Constructor
  40. *
  41. * @param string $text The text between -{ and }-
  42. * @param LanguageConverter $converter
  43. */
  44. public function __construct( $text, $converter ) {
  45. $this->mText = $text;
  46. $this->mConverter = $converter;
  47. }
  48. /**
  49. * Check if variants array in convert array.
  50. *
  51. * @param array|string $variants Variant language code
  52. * @return string Translated text
  53. */
  54. public function getTextInBidtable( $variants ) {
  55. $variants = (array)$variants;
  56. if ( !$variants ) {
  57. return false;
  58. }
  59. foreach ( $variants as $variant ) {
  60. if ( isset( $this->mBidtable[$variant] ) ) {
  61. return $this->mBidtable[$variant];
  62. }
  63. }
  64. return false;
  65. }
  66. /**
  67. * Parse flags with syntax -{FLAG| ... }-
  68. * @private
  69. */
  70. function parseFlags() {
  71. $text = $this->mText;
  72. $flags = [];
  73. $variantFlags = [];
  74. $sepPos = strpos( $text, '|' );
  75. if ( $sepPos !== false ) {
  76. $validFlags = $this->mConverter->mFlags;
  77. $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
  78. foreach ( $f as $ff ) {
  79. $ff = trim( $ff );
  80. if ( isset( $validFlags[$ff] ) ) {
  81. $flags[$validFlags[$ff]] = true;
  82. }
  83. }
  84. $text = strval( substr( $text, $sepPos + 1 ) );
  85. }
  86. if ( !$flags ) {
  87. $flags['S'] = true;
  88. } elseif ( isset( $flags['R'] ) ) {
  89. $flags = [ 'R' => true ];// remove other flags
  90. } elseif ( isset( $flags['N'] ) ) {
  91. $flags = [ 'N' => true ];// remove other flags
  92. } elseif ( isset( $flags['-'] ) ) {
  93. $flags = [ '-' => true ];// remove other flags
  94. } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
  95. $flags['H'] = true;
  96. } elseif ( isset( $flags['H'] ) ) {
  97. // replace A flag, and remove other flags except T
  98. $temp = [ '+' => true, 'H' => true ];
  99. if ( isset( $flags['T'] ) ) {
  100. $temp['T'] = true;
  101. }
  102. if ( isset( $flags['D'] ) ) {
  103. $temp['D'] = true;
  104. }
  105. $flags = $temp;
  106. } else {
  107. if ( isset( $flags['A'] ) ) {
  108. $flags['+'] = true;
  109. $flags['S'] = true;
  110. }
  111. if ( isset( $flags['D'] ) ) {
  112. unset( $flags['S'] );
  113. }
  114. // try to find flags like "zh-hans", "zh-hant"
  115. // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
  116. $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
  117. if ( $variantFlags ) {
  118. $variantFlags = array_flip( $variantFlags );
  119. $flags = [];
  120. }
  121. }
  122. $this->mVariantFlags = $variantFlags;
  123. $this->mRules = $text;
  124. $this->mFlags = $flags;
  125. }
  126. /**
  127. * Generate conversion table.
  128. * @private
  129. */
  130. function parseRules() {
  131. $rules = $this->mRules;
  132. $bidtable = [];
  133. $unidtable = [];
  134. $variants = $this->mConverter->mVariants;
  135. $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
  136. // Split according to $varsep_pattern, but ignore semicolons from HTML entities
  137. $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
  138. $choice = preg_split( $varsep_pattern, $rules );
  139. $choice = str_replace( "\x01", ';', $choice );
  140. foreach ( $choice as $c ) {
  141. $v = explode( ':', $c, 2 );
  142. if ( count( $v ) != 2 ) {
  143. // syntax error, skip
  144. continue;
  145. }
  146. $to = trim( $v[1] );
  147. $v = trim( $v[0] );
  148. $u = explode( '=>', $v, 2 );
  149. // if $to is empty (which is also used as $from in bidtable),
  150. // strtr() could return a wrong result.
  151. if ( count( $u ) == 1 && $to !== '' && in_array( $v, $variants ) ) {
  152. $bidtable[$v] = $to;
  153. } elseif ( count( $u ) == 2 ) {
  154. $from = trim( $u[0] );
  155. $v = trim( $u[1] );
  156. // if $from is empty, strtr() could return a wrong result.
  157. if ( array_key_exists( $v, $unidtable )
  158. && !is_array( $unidtable[$v] )
  159. && $from !== ''
  160. && in_array( $v, $variants ) ) {
  161. $unidtable[$v] = [ $from => $to ];
  162. } elseif ( $from !== '' && in_array( $v, $variants ) ) {
  163. $unidtable[$v][$from] = $to;
  164. }
  165. }
  166. // syntax error, pass
  167. if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
  168. $bidtable = [];
  169. $unidtable = [];
  170. break;
  171. }
  172. }
  173. $this->mBidtable = $bidtable;
  174. $this->mUnidtable = $unidtable;
  175. }
  176. /**
  177. * @private
  178. *
  179. * @return string
  180. */
  181. function getRulesDesc() {
  182. $codesep = $this->mConverter->mDescCodeSep;
  183. $varsep = $this->mConverter->mDescVarSep;
  184. $text = '';
  185. foreach ( $this->mBidtable as $k => $v ) {
  186. $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
  187. }
  188. foreach ( $this->mUnidtable as $k => $a ) {
  189. foreach ( $a as $from => $to ) {
  190. $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
  191. "$codesep$to$varsep";
  192. }
  193. }
  194. return $text;
  195. }
  196. /**
  197. * Parse rules conversion.
  198. * @private
  199. *
  200. * @param string $variant
  201. *
  202. * @return string
  203. */
  204. function getRuleConvertedStr( $variant ) {
  205. $bidtable = $this->mBidtable;
  206. $unidtable = $this->mUnidtable;
  207. if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
  208. return $this->mRules;
  209. } else {
  210. // display current variant in bidirectional array
  211. $disp = $this->getTextInBidtable( $variant );
  212. // or display current variant in fallbacks
  213. if ( $disp === false ) {
  214. $disp = $this->getTextInBidtable(
  215. $this->mConverter->getVariantFallbacks( $variant ) );
  216. }
  217. // or display current variant in unidirectional array
  218. if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
  219. $disp = array_values( $unidtable[$variant] )[0];
  220. }
  221. // or display frist text under disable manual convert
  222. if ( $disp === false && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
  223. if ( count( $bidtable ) > 0 ) {
  224. $disp = array_values( $bidtable )[0];
  225. } else {
  226. $disp = array_values( array_values( $unidtable )[0] )[0];
  227. }
  228. }
  229. return $disp;
  230. }
  231. }
  232. /**
  233. * Similar to getRuleConvertedStr(), but this prefers to use original
  234. * page title if $variant === $this->mConverter->mMainLanguageCode
  235. * and may return false in this case (so this title conversion rule
  236. * will be ignored and the original title is shown).
  237. *
  238. * @since 1.22
  239. * @param string $variant The variant code to display page title in
  240. * @return string|bool The converted title or false if just page name
  241. */
  242. function getRuleConvertedTitle( $variant ) {
  243. if ( $variant === $this->mConverter->mMainLanguageCode ) {
  244. // If a string targeting exactly this variant is set,
  245. // use it. Otherwise, just return false, so the real
  246. // page name can be shown (and because variant === main,
  247. // there'll be no further automatic conversion).
  248. $disp = $this->getTextInBidtable( $variant );
  249. if ( $disp ) {
  250. return $disp;
  251. }
  252. if ( array_key_exists( $variant, $this->mUnidtable ) ) {
  253. $disp = array_values( $this->mUnidtable[$variant] )[0];
  254. }
  255. // Assigned above or still false.
  256. return $disp;
  257. } else {
  258. return $this->getRuleConvertedStr( $variant );
  259. }
  260. }
  261. /**
  262. * Generate conversion table for all text.
  263. * @private
  264. */
  265. function generateConvTable() {
  266. // Special case optimisation
  267. if ( !$this->mBidtable && !$this->mUnidtable ) {
  268. $this->mConvTable = [];
  269. return;
  270. }
  271. $bidtable = $this->mBidtable;
  272. $unidtable = $this->mUnidtable;
  273. $manLevel = $this->mConverter->mManualLevel;
  274. $vmarked = [];
  275. foreach ( $this->mConverter->mVariants as $v ) {
  276. /* for bidirectional array
  277. fill in the missing variants, if any,
  278. with fallbacks */
  279. if ( !isset( $bidtable[$v] ) ) {
  280. $variantFallbacks =
  281. $this->mConverter->getVariantFallbacks( $v );
  282. $vf = $this->getTextInBidtable( $variantFallbacks );
  283. if ( $vf ) {
  284. $bidtable[$v] = $vf;
  285. }
  286. }
  287. if ( isset( $bidtable[$v] ) ) {
  288. foreach ( $vmarked as $vo ) {
  289. // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
  290. // or -{H|zh:WordZh;zh-tw:WordTw}-
  291. // or -{-|zh:WordZh;zh-tw:WordTw}-
  292. // to introduce a custom mapping between
  293. // words WordZh and WordTw in the whole text
  294. if ( $manLevel[$v] == 'bidirectional' ) {
  295. $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
  296. }
  297. if ( $manLevel[$vo] == 'bidirectional' ) {
  298. $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
  299. }
  300. }
  301. $vmarked[] = $v;
  302. }
  303. /* for unidirectional array fill to convert tables */
  304. if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
  305. && isset( $unidtable[$v] )
  306. ) {
  307. if ( isset( $this->mConvTable[$v] ) ) {
  308. $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
  309. } else {
  310. $this->mConvTable[$v] = $unidtable[$v];
  311. }
  312. }
  313. }
  314. }
  315. /**
  316. * Parse rules and flags.
  317. * @param string $variant Variant language code
  318. */
  319. public function parse( $variant = null ) {
  320. if ( !$variant ) {
  321. $variant = $this->mConverter->getPreferredVariant();
  322. }
  323. $this->parseFlags();
  324. $flags = $this->mFlags;
  325. // convert to specified variant
  326. // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
  327. if ( $this->mVariantFlags ) {
  328. // check if current variant in flags
  329. if ( isset( $this->mVariantFlags[$variant] ) ) {
  330. // then convert <text to convert> to current language
  331. $this->mRules = $this->mConverter->autoConvert( $this->mRules,
  332. $variant );
  333. } else {
  334. // if current variant no in flags,
  335. // then we check its fallback variants.
  336. $variantFallbacks =
  337. $this->mConverter->getVariantFallbacks( $variant );
  338. if ( is_array( $variantFallbacks ) ) {
  339. foreach ( $variantFallbacks as $variantFallback ) {
  340. // if current variant's fallback exist in flags
  341. if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
  342. // then convert <text to convert> to fallback language
  343. $this->mRules =
  344. $this->mConverter->autoConvert( $this->mRules,
  345. $variantFallback );
  346. break;
  347. }
  348. }
  349. }
  350. }
  351. $this->mFlags = $flags = [ 'R' => true ];
  352. }
  353. if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
  354. // decode => HTML entities modified by Sanitizer::removeHTMLtags
  355. $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
  356. $this->parseRules();
  357. }
  358. $rules = $this->mRules;
  359. if ( !$this->mBidtable && !$this->mUnidtable ) {
  360. if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
  361. // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
  362. if ( $rules !== '' ) {
  363. foreach ( $this->mConverter->mVariants as $v ) {
  364. $this->mBidtable[$v] = $rules;
  365. }
  366. }
  367. } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
  368. $this->mFlags = $flags = [ 'R' => true ];
  369. }
  370. }
  371. $this->mRuleDisplay = false;
  372. foreach ( $flags as $flag => $unused ) {
  373. switch ( $flag ) {
  374. case 'R':
  375. // if we don't do content convert, still strip the -{}- tags
  376. $this->mRuleDisplay = $rules;
  377. break;
  378. case 'N':
  379. // process N flag: output current variant name
  380. $ruleVar = trim( $rules );
  381. if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
  382. $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
  383. } else {
  384. $this->mRuleDisplay = '';
  385. }
  386. break;
  387. case 'D':
  388. // process D flag: output rules description
  389. $this->mRuleDisplay = $this->getRulesDesc();
  390. break;
  391. case 'H':
  392. // process H,- flag or T only: output nothing
  393. $this->mRuleDisplay = '';
  394. break;
  395. case '-':
  396. $this->mRulesAction = 'remove';
  397. $this->mRuleDisplay = '';
  398. break;
  399. case '+':
  400. $this->mRulesAction = 'add';
  401. $this->mRuleDisplay = '';
  402. break;
  403. case 'S':
  404. $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
  405. break;
  406. case 'T':
  407. $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
  408. $this->mRuleDisplay = '';
  409. break;
  410. default:
  411. // ignore unknown flags (but see error case below)
  412. }
  413. }
  414. if ( $this->mRuleDisplay === false ) {
  415. $this->mRuleDisplay = '<span class="error">'
  416. . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
  417. . '</span>';
  418. }
  419. $this->generateConvTable();
  420. }
  421. /**
  422. * Checks if there are conversion rules.
  423. * @return bool
  424. */
  425. public function hasRules() {
  426. return $this->mRules !== '';
  427. }
  428. /**
  429. * Get display text on markup -{...}-
  430. * @return string
  431. */
  432. public function getDisplay() {
  433. return $this->mRuleDisplay;
  434. }
  435. /**
  436. * Get converted title.
  437. * @return string
  438. */
  439. public function getTitle() {
  440. return $this->mRuleTitle;
  441. }
  442. /**
  443. * Return how deal with conversion rules.
  444. * @return string
  445. */
  446. public function getRulesAction() {
  447. return $this->mRulesAction;
  448. }
  449. /**
  450. * Get conversion table. (bidirectional and unidirectional
  451. * conversion table)
  452. * @return array
  453. */
  454. public function getConvTable() {
  455. return $this->mConvTable;
  456. }
  457. /**
  458. * Get conversion rules string.
  459. * @return string
  460. */
  461. public function getRules() {
  462. return $this->mRules;
  463. }
  464. /**
  465. * Get conversion flags.
  466. * @return array
  467. */
  468. public function getFlags() {
  469. return $this->mFlags;
  470. }
  471. }