gettext.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. <?php
  2. /*
  3. Copyright (c) 2003, 2009 Danilo Segan <danilo@kvota.net>.
  4. Copyright (c) 2005 Nico Kaiser <nico@siriux.net>
  5. This file is part of PHP-gettext.
  6. PHP-gettext is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. PHP-gettext is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with PHP-gettext; if not, write to the Free Software
  16. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. */
  18. /**
  19. * Provides a simple gettext replacement that works independently from
  20. * the system's gettext abilities.
  21. * It can read MO files and use them for translating strings.
  22. * The files are passed to gettext_reader as a Stream (see streams.php)
  23. *
  24. * This version has the ability to cache all strings and translations to
  25. * speed up the string lookup.
  26. * While the cache is enabled by default, it can be switched off with the
  27. * second parameter in the constructor (e.g. whenusing very large MO files
  28. * that you don't want to keep in memory)
  29. */
  30. class gettext_reader
  31. {
  32. //public:
  33. public $error = 0; // public variable that holds error code (0 if no error)
  34. //private:
  35. public $BYTEORDER = 0; // 0: low endian, 1: big endian
  36. public $STREAM = null;
  37. public $short_circuit = false;
  38. public $enable_cache = false;
  39. public $originals = null; // offset of original table
  40. public $translations = null; // offset of translation table
  41. public $pluralheader = null; // cache header field for plural forms
  42. public $total = 0; // total string count
  43. public $table_originals = null; // table for original strings (offsets)
  44. public $table_translations = null; // table for translated strings (offsets)
  45. public $cache_translations = null; // original -> translation mapping
  46. /* Methods */
  47. /**
  48. * Reads a 32bit Integer from the Stream
  49. *
  50. * @access private
  51. * @return Integer from the Stream
  52. */
  53. public function readint()
  54. {
  55. if ($this->BYTEORDER == 0) {
  56. // low endian
  57. $input=unpack('V', $this->STREAM->read(4));
  58. return array_shift($input);
  59. } else {
  60. // big endian
  61. $input=unpack('N', $this->STREAM->read(4));
  62. return array_shift($input);
  63. }
  64. }
  65. public function read($bytes)
  66. {
  67. return $this->STREAM->read($bytes);
  68. }
  69. /**
  70. * Reads an array of Integers from the Stream
  71. *
  72. * @param int count How many elements should be read
  73. * @return Array of Integers
  74. */
  75. public function readintarray($count)
  76. {
  77. if ($this->BYTEORDER == 0) {
  78. // low endian
  79. return unpack('V'.$count, $this->STREAM->read(4 * $count));
  80. } else {
  81. // big endian
  82. return unpack('N'.$count, $this->STREAM->read(4 * $count));
  83. }
  84. }
  85. /**
  86. * Constructor
  87. *
  88. * @param object Reader the StreamReader object
  89. * @param boolean enable_cache Enable or disable caching of strings (default on)
  90. */
  91. public function gettext_reader($Reader, $enable_cache = true)
  92. {
  93. // If there isn't a StreamReader, turn on short circuit mode.
  94. if (! $Reader || isset($Reader->error)) {
  95. $this->short_circuit = true;
  96. return;
  97. }
  98. // Caching can be turned off
  99. $this->enable_cache = $enable_cache;
  100. $MAGIC1 = "\x95\x04\x12\xde";
  101. $MAGIC2 = "\xde\x12\x04\x95";
  102. $this->STREAM = $Reader;
  103. $magic = $this->read(4);
  104. if ($magic == $MAGIC1) {
  105. $this->BYTEORDER = 1;
  106. } elseif ($magic == $MAGIC2) {
  107. $this->BYTEORDER = 0;
  108. } else {
  109. $this->error = 1; // not MO file
  110. return false;
  111. }
  112. // FIXME: Do we care about revision? We should.
  113. $revision = $this->readint();
  114. $this->total = $this->readint();
  115. $this->originals = $this->readint();
  116. $this->translations = $this->readint();
  117. }
  118. /**
  119. * Loads the translation tables from the MO file into the cache
  120. * If caching is enabled, also loads all strings into a cache
  121. * to speed up translation lookups
  122. *
  123. * @access private
  124. */
  125. public function load_tables()
  126. {
  127. if (is_array($this->cache_translations) &&
  128. is_array($this->table_originals) &&
  129. is_array($this->table_translations)) {
  130. return;
  131. }
  132. /* get original and translations tables */
  133. if (!is_array($this->table_originals)) {
  134. $this->STREAM->seekto($this->originals);
  135. $this->table_originals = $this->readintarray($this->total * 2);
  136. }
  137. if (!is_array($this->table_translations)) {
  138. $this->STREAM->seekto($this->translations);
  139. $this->table_translations = $this->readintarray($this->total * 2);
  140. }
  141. if ($this->enable_cache) {
  142. $this->cache_translations = array();
  143. /* read all strings in the cache */
  144. for ($i = 0; $i < $this->total; $i++) {
  145. $this->STREAM->seekto($this->table_originals[$i * 2 + 2]);
  146. $original = $this->STREAM->read($this->table_originals[$i * 2 + 1]);
  147. $this->STREAM->seekto($this->table_translations[$i * 2 + 2]);
  148. $translation = $this->STREAM->read($this->table_translations[$i * 2 + 1]);
  149. $this->cache_translations[$original] = $translation;
  150. }
  151. }
  152. }
  153. /**
  154. * Returns a string from the "originals" table
  155. *
  156. * @access private
  157. * @param int num Offset number of original string
  158. * @return string Requested string if found, otherwise ''
  159. */
  160. public function get_original_string($num)
  161. {
  162. $length = $this->table_originals[$num * 2 + 1];
  163. $offset = $this->table_originals[$num * 2 + 2];
  164. if (! $length) {
  165. return '';
  166. }
  167. $this->STREAM->seekto($offset);
  168. $data = $this->STREAM->read($length);
  169. return (string)$data;
  170. }
  171. /**
  172. * Returns a string from the "translations" table
  173. *
  174. * @access private
  175. * @param int num Offset number of original string
  176. * @return string Requested string if found, otherwise ''
  177. */
  178. public function get_translation_string($num)
  179. {
  180. $length = $this->table_translations[$num * 2 + 1];
  181. $offset = $this->table_translations[$num * 2 + 2];
  182. if (! $length) {
  183. return '';
  184. }
  185. $this->STREAM->seekto($offset);
  186. $data = $this->STREAM->read($length);
  187. return (string)$data;
  188. }
  189. /**
  190. * Binary search for string
  191. *
  192. * @access private
  193. * @param string string
  194. * @param int start (internally used in recursive function)
  195. * @param int end (internally used in recursive function)
  196. * @return int string number (offset in originals table)
  197. */
  198. public function find_string($string, $start = -1, $end = -1)
  199. {
  200. if (($start == -1) or ($end == -1)) {
  201. // find_string is called with only one parameter, set start end end
  202. $start = 0;
  203. $end = $this->total;
  204. }
  205. if (abs($start - $end) <= 1) {
  206. // We're done, now we either found the string, or it doesn't exist
  207. $txt = $this->get_original_string($start);
  208. if ($string == $txt) {
  209. return $start;
  210. } else {
  211. return -1;
  212. }
  213. } elseif ($start > $end) {
  214. // start > end -> turn around and start over
  215. return $this->find_string($string, $end, $start);
  216. } else {
  217. // Divide table in two parts
  218. $half = (int)(($start + $end) / 2);
  219. $cmp = strcmp($string, $this->get_original_string($half));
  220. if ($cmp == 0) {
  221. // string is exactly in the middle => return it
  222. return $half;
  223. } elseif ($cmp < 0) {
  224. // The string is in the upper half
  225. return $this->find_string($string, $start, $half);
  226. } else {
  227. // The string is in the lower half
  228. return $this->find_string($string, $half, $end);
  229. }
  230. }
  231. }
  232. /**
  233. * Translates a string
  234. *
  235. * @access public
  236. * @param string string to be translated
  237. * @return string translated string (or original, if not found)
  238. */
  239. public function translate($string)
  240. {
  241. if ($this->short_circuit) {
  242. return $string;
  243. }
  244. $this->load_tables();
  245. if ($this->enable_cache) {
  246. // Caching enabled, get translated string from cache
  247. if (array_key_exists($string, $this->cache_translations)) {
  248. return $this->cache_translations[$string];
  249. } else {
  250. return $string;
  251. }
  252. } else {
  253. // Caching not enabled, try to find string
  254. $num = $this->find_string($string);
  255. if ($num == -1) {
  256. return $string;
  257. } else {
  258. return $this->get_translation_string($num);
  259. }
  260. }
  261. }
  262. /**
  263. * Sanitize plural form expression for use in PHP eval call.
  264. *
  265. * @access private
  266. * @return string sanitized plural form expression
  267. */
  268. public function sanitize_plural_expression($expr)
  269. {
  270. // Get rid of disallowed characters.
  271. $expr = preg_replace('@[^a-zA-Z0-9_:;\(\)\?\|\&=!<>+*/\%-]@', '', $expr);
  272. // Add parenthesis for tertiary '?' operator.
  273. $expr .= ';';
  274. $res = '';
  275. $p = 0;
  276. for ($i = 0; $i < strlen($expr); $i++) {
  277. $ch = $expr[$i];
  278. switch ($ch) {
  279. case '?':
  280. $res .= ' ? (';
  281. $p++;
  282. break;
  283. case ':':
  284. $res .= ') : (';
  285. break;
  286. case ';':
  287. $res .= str_repeat(')', $p) . ';';
  288. $p = 0;
  289. break;
  290. default:
  291. $res .= $ch;
  292. }
  293. }
  294. return $res;
  295. }
  296. /**
  297. * Parse full PO header and extract only plural forms line.
  298. *
  299. * @access private
  300. * @return string verbatim plural form header field
  301. */
  302. public function extract_plural_forms_header_from_po_header($header)
  303. {
  304. if (preg_match("/(^|\n)plural-forms: ([^\n]*)\n/i", $header, $regs)) {
  305. $expr = $regs[2];
  306. } else {
  307. $expr = "nplurals=2; plural=n == 1 ? 0 : 1;";
  308. }
  309. return $expr;
  310. }
  311. /**
  312. * Get possible plural forms from MO header
  313. *
  314. * @access private
  315. * @return string plural form header
  316. */
  317. public function get_plural_forms()
  318. {
  319. // lets assume message number 0 is header
  320. // this is true, right?
  321. $this->load_tables();
  322. // cache header field for plural forms
  323. if (! is_string($this->pluralheader)) {
  324. if ($this->enable_cache) {
  325. $header = $this->cache_translations[""];
  326. } else {
  327. $header = $this->get_translation_string(0);
  328. }
  329. $expr = $this->extract_plural_forms_header_from_po_header($header);
  330. $this->pluralheader = $this->sanitize_plural_expression($expr);
  331. }
  332. return $this->pluralheader;
  333. }
  334. /**
  335. * Detects which plural form to take
  336. *
  337. * @access private
  338. * @param n count
  339. * @return int array index of the right plural form
  340. */
  341. public function select_string($n)
  342. {
  343. if (!is_int($n)) {
  344. throw new InvalidArgumentException(
  345. "Select_string only accepts integers: " . $n
  346. );
  347. }
  348. $string = $this->get_plural_forms();
  349. $string = str_replace('nplurals', "\$total", $string);
  350. $string = str_replace("n", $n, $string);
  351. $string = str_replace('plural', "\$plural", $string);
  352. $total = 0;
  353. $plural = 0;
  354. eval("$string");
  355. if ($plural >= $total) {
  356. $plural = $total - 1;
  357. }
  358. return $plural;
  359. }
  360. /**
  361. * Plural version of gettext
  362. *
  363. * @access public
  364. * @param string single
  365. * @param string plural
  366. * @param string number
  367. * @return translated plural form
  368. */
  369. public function ngettext($single, $plural, $number)
  370. {
  371. if ($this->short_circuit) {
  372. if ($number != 1) {
  373. return $plural;
  374. } else {
  375. return $single;
  376. }
  377. }
  378. // find out the appropriate form
  379. $select = $this->select_string($number);
  380. // this should contains all strings separated by NULLs
  381. $key = $single . chr(0) . $plural;
  382. if ($this->enable_cache) {
  383. if (! array_key_exists($key, $this->cache_translations)) {
  384. return ($number != 1) ? $plural : $single;
  385. } else {
  386. $result = $this->cache_translations[$key];
  387. $list = explode(chr(0), $result);
  388. return $list[$select];
  389. }
  390. } else {
  391. $num = $this->find_string($key);
  392. if ($num == -1) {
  393. return ($number != 1) ? $plural : $single;
  394. } else {
  395. $result = $this->get_translation_string($num);
  396. $list = explode(chr(0), $result);
  397. return $list[$select];
  398. }
  399. }
  400. }
  401. public function pgettext($context, $msgid)
  402. {
  403. $key = $context . chr(4) . $msgid;
  404. $ret = $this->translate($key);
  405. if (strpos($ret, "\004") !== false) {
  406. return $msgid;
  407. } else {
  408. return $ret;
  409. }
  410. }
  411. public function npgettext($context, $singular, $plural, $number)
  412. {
  413. $key = $context . chr(4) . $singular;
  414. $ret = $this->ngettext($key, $plural, $number);
  415. if (strpos($ret, "\004") !== false) {
  416. return $singular;
  417. } else {
  418. return $ret;
  419. }
  420. }
  421. }