pofile.lisp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. ;; This software is Copyright (c) cage, 2012.
  2. ;; cage grants you the rights to distribute
  3. ;; and use this software as governed by the terms
  4. ;; of the Lisp Lesser GNU Public License
  5. ;; (http://opensource.franz.com/preamble.html),
  6. ;; known as the LLGPL
  7. (in-package :cl-i18n)
  8. (alexandria:define-constant +po-comment-line+ "#\\n|^#[^,].*\\n|\\n" :test 'string=)
  9. (defclass po-parsed-file (parsed-file) ())
  10. (defmethod initialize-instance :after ((object po-parsed-file) &key &allow-other-keys)
  11. (with-slots (comment-line) object
  12. (setf comment-line +po-comment-line+)))
  13. (define-parser-skeleton po po-parsed-file)
  14. (alexandria:define-constant +number+ "0|[1-9][0-9]+|[1-9]" :test 'string=)
  15. (alexandria:define-constant +and-op+ "&&" :test 'string=)
  16. (alexandria:define-constant +or-op+ "||" :test 'string=)
  17. (alexandria:define-constant +or-op-regex+ "\\|\\|" :test 'string=)
  18. (alexandria:define-constant +<+ "<" :test 'string=)
  19. (alexandria:define-constant +>+ ">" :test 'string=)
  20. (alexandria:define-constant +<=+ "<=" :test 'string=)
  21. (alexandria:define-constant +>=+ ">=" :test 'string=)
  22. (alexandria:define-constant +!=+ "!=" :test 'string=)
  23. (alexandria:define-constant +==+ "==" :test 'string=)
  24. (alexandria:define-constant +%+ "%" :test 'string=)
  25. (alexandria:define-constant +?+ "?" :test 'string=)
  26. (alexandria:define-constant +?-regex+ "\\?" :test 'string=)
  27. (alexandria:define-constant +colon+ ":" :test 'string=)
  28. (alexandria:define-constant +open-paren+ "(" :test 'string=)
  29. (alexandria:define-constant +close-paren+ ")" :test 'string=)
  30. (alexandria:define-constant +end-expression+ ";" :test 'string=)
  31. (alexandria:define-constant +open-paren-regex+ "\\(" :test 'string=)
  32. (alexandria:define-constant +close-paren-regex+ "\\)" :test 'string=)
  33. (alexandria:define-constant +var+ "n" :test 'string=)
  34. (alexandria:define-constant +escape-newline+ "\\" :test 'equalp)
  35. (alexandria:define-constant +escaped-string-delim+ "\"" :test 'string=)
  36. (alexandria:define-constant +escape-string-escape-char+ "\\" :test 'equalp)
  37. (alexandria:define-constant +escape-string-escaped-newline+ "n" :test 'equalp)
  38. (alexandria:define-constant +newline+ #\NewLine :test 'equalp)
  39. (alexandria:define-constant +agnostic-comment+ "#" :test 'equalp)
  40. (alexandria:define-constant +flag-line+ "#,[ ]*" :test 'equalp)
  41. (alexandria:define-constant +flag-fuzzy+ "fuzzy" :test 'equalp)
  42. (alexandria:define-constant +msgid+ "msgid" :test 'equalp)
  43. (alexandria:define-constant +msgstr+ "msgstr" :test 'equalp)
  44. (alexandria:define-constant +msgid-regexp+ "msgid[ ]+" :test 'equalp)
  45. (alexandria:define-constant +msgstr-regexp+ "msgstr[ ]+" :test 'equalp)
  46. (alexandria:define-constant +msgstr[]+ "msgstr\\[[0-9]\\]" :test 'equalp)
  47. (alexandria:define-constant +msgstr[]-regexp+ "msgstr\\[[0-9]\\]" :test 'equalp)
  48. (alexandria:define-constant +msgid-plural+ "msgid_plural" :test 'equalp)
  49. (alexandria:define-constant +plural-form-label+ "Plural-Forms:" :test 'equalp)
  50. (alexandria:define-constant +nplurals-label+ "nplurals=" :test 'equalp)
  51. (alexandria:define-constant +plural-expression-label+ "plural=" :test 'equalp)
  52. (defparameter *boolean-op* (list +and-op+ +or-op+))
  53. (defparameter *compar-op* (list +>+ +<+ +>=+ +<=+ +==+ +!=+))
  54. (defparameter *aritm-op* (list +%+))
  55. (defparameter *n* 0)
  56. (defun == (a b)
  57. (if (= a b)
  58. 1
  59. 0))
  60. (defun != (a b)
  61. (if (/= a b)
  62. 1
  63. 0))
  64. (defun % (a b)
  65. (mod a b))
  66. (defun && (a b)
  67. (if (and (> a 0)
  68. (> b 0))
  69. 1
  70. 0))
  71. (defun bool-or (a b)
  72. (if (or (> a 0)
  73. (> b 0))
  74. 1
  75. 0))
  76. (defun >* (a b)
  77. (if (> a b)
  78. 1
  79. 0))
  80. (defun >=* (a b)
  81. (if (>= a b)
  82. 1
  83. 0))
  84. (defun <* (a b)
  85. (if (< a b)
  86. 1
  87. 0))
  88. (defun <=* (a b)
  89. (if (<= a b)
  90. 1
  91. 0))
  92. (defun stack-if (&rest args)
  93. (let ((if-term (first args))
  94. (then (second args))
  95. (else (third args)))
  96. (cond
  97. ((numberp if-term)
  98. (if (/= 0 if-term)
  99. then
  100. else))
  101. (t
  102. (if if-term then else)))))
  103. (defun string->function (fun)
  104. (cond
  105. ((string= fun "||")
  106. #'bool-or)
  107. ((string= fun "?")
  108. #'stack-if)
  109. ((string= fun ">")
  110. #'>*)
  111. ((string= fun ">=")
  112. #'>=*)
  113. ((string= fun "<")
  114. #'<*)
  115. ((string= fun "<=")
  116. #'<=*)
  117. (t
  118. (symbol-function (intern fun :cl-i18n)))))
  119. (defun unescaped-char (char)
  120. (cond
  121. ((string= char +escape-string-escaped-newline+)
  122. (format nil "~%"))
  123. ((string= char #\newline)
  124. "")
  125. ((string= char #\")
  126. "\"")))
  127. (define-tokenizer (po-parsed-file +po-comment-line+ +open-paren-regex+ +close-paren-regex+ +number+ +and-op+ +or-op-regex+ +<+ +>+ +<=+ +>=+ +!=+ +==+ +%+ +?-regex+ +colon+ +var+ +end-expression+ +plural-expression-label+ +msgid-regexp+ +msgstr-regexp+ +flag-line+ +flag-fuzzy+ +msgstr[]-regexp+ +msgid-plural+)
  128. ((string= (char@) +escape-newline+)
  129. (multiple-increment 2)
  130. (next-token *file*))
  131. ((member (char@) *blank-space* :test #'string=)
  132. (increment-pointer *file*)
  133. (next-token *file*)))
  134. (define-is-stuff-p string= +and-op+ +or-op+ +<+ +>+ +<=+ +>=+ +!=+ +==+ +%+ +?+ +colon+ +open-paren+ +close-paren+ +var+ +end-expression+ +fuzzy-flag+)
  135. (define-is-stuff-p cl-ppcre:scan +msgid-regexp+ +msgstr-regexp+ +flag-line+ +msgstr[]-regexp+ +msgid-plural+)
  136. (defun is-number-p (str)
  137. (cl-ppcre:scan +number+ str))
  138. (defmacro parse-token ((var predicate msg &rest predicate-arg) &body body)
  139. `(let-noerr ((,var (next-token *file*)))
  140. (with-error (,predicate ,msg ,@predicate-arg) ,@body)))
  141. (defun parse-any-operator (oper-list &key (test #'string=))
  142. (parse-token (operator (lambda (v) (member v
  143. oper-list
  144. :test test))
  145. (format nil "Error: expected operator like ~{~a ~} got ~s instead." oper-list operator)
  146. operator)
  147. operator))
  148. (defmacro define-parse-operators (names test &rest operators-list)
  149. `(progn
  150. ,@(mapcar #'(lambda (name operator)
  151. `(defun ,(alexandria:format-symbol t "~:@(~a~)" name) ()
  152. (parse-any-operator ,operator :test ,test)))
  153. names operators-list)))
  154. (define-parse-operators (parse-comparision-operator parse-arithmetic-operator
  155. parse-boolean-operator)
  156. #'string=
  157. *compar-op* *aritm-op* *boolean-op*)
  158. (define-parse-operators (parse-open-parent parse-close-parent
  159. parse-if-symbol parse-then-symbol)
  160. #'string=
  161. (list +open-paren+) (list +close-paren+) (list +?+) (list +colon+))
  162. (define-parse-operators (parse-msgid parse-msgstr
  163. parse-msgid-plural
  164. parse-msgstr-plural)
  165. #'(lambda (a b) (cl-ppcre:scan b a))
  166. (list +msgid-regexp+) (list +msgstr-regexp+) (list +msgid-plural+)
  167. (list +msgstr[]-regexp+))
  168. (defun is-bool-op-p (str)
  169. (or (is-and-op-p str)
  170. (is-or-op-p str)))
  171. (defun is-binary-operator (op)
  172. (or
  173. (eq #'bool-or op)
  174. (eq #'&& op)
  175. (eq #'== op)
  176. (eq #'!= op)
  177. (eq #'% op)
  178. (eq #'>* op)
  179. (eq #'<* op)
  180. (eq #'>=* op)
  181. (eq #'<=* op)))
  182. (defun is-ternary-operator (op)
  183. (eq #'stack-if op))
  184. (defnocfun parse-msgid-group ()
  185. (let-noerr ((msgid (parse-msgid))
  186. (string (parse-escaped-string)))
  187. (values string msgid)))
  188. (defnocfun parse-msgid-plural-group ()
  189. (let-noerr ((msgid (parse-msgid-plural))
  190. (string (parse-escaped-string)))
  191. (values string msgid)))
  192. (defmacro with-line-mode (&body body)
  193. `(progn
  194. (setf (line-mode *file*) t)
  195. ,@body))
  196. (defnocfun parse-po-file ()
  197. (with-line-mode
  198. (let-noerr ((plural-function (parse-header))
  199. (entries (parse-entries)))
  200. (values entries plural-function *has-errors* *parsing-errors*))))
  201. (defnocfun parse-entries (&optional (res (make-hash-table :test 'equal)))
  202. (with-no-errors
  203. (if (peek-valid-stream)
  204. (let-noerr ((peek (peek-token *file*))
  205. (flag :untranslated))
  206. (when (is-flag-line-p peek)
  207. (next-token *file*)
  208. (setf flag (alexandria:make-keyword (format nil "~:@(~a~)" (next-token *file*)))))
  209. (when-debug
  210. (format t "flag ~s~%errors ~s~%" flag *parsing-errors*))
  211. (let-noerr ((msgid (parse-msgid-group)))
  212. (when-debug
  213. (format t "msgid ~s errors ~s~%" msgid *parsing-errors*))
  214. (multiple-value-bind (first-translation plural-forms)
  215. (parse-msgstr-group)
  216. (when-debug
  217. (format t "msgsrt ~s ~s~%errors ~s~%" first-translation plural-forms *parsing-errors*))
  218. (let ((translation (make-translation
  219. (if (not (null plural-forms))
  220. (first plural-forms)
  221. first-translation)
  222. flag
  223. first-translation
  224. (if (not (null plural-forms))
  225. (rest plural-forms)
  226. '()))))
  227. (setf (gethash msgid res) translation)
  228. (parse-entries res)))))
  229. res)))
  230. (defun parse-msgstr-group ()
  231. (let-noerr ((peek (peek-token *file*)))
  232. (with-no-errors
  233. (cond
  234. ((is-msgid-plural-p peek)
  235. (let-noerr ((plural (parse-msgid-plural-group))
  236. (plural-forms (parse-msgstr-plural-group)))
  237. (values plural plural-forms)))
  238. ((is-msgstr-regexp-p peek)
  239. (with-no-errors
  240. (parse-msgstr)
  241. (let-noerr ((string (parse-escaped-string)))
  242. (values string nil))))
  243. (t
  244. (setf *has-errors* t)
  245. (push "Junk found while parsing-for entries" *parsing-errors*))))))
  246. (defun parse-msgstr-plural-group (&optional (res '()))
  247. (with-no-errors
  248. (parse-msgstr-plural)
  249. (let-noerr ((string (parse-escaped-string)))
  250. (if (and (peek-valid-stream)
  251. (is-msgstr[]-regexp-p (peek-token *file*)))
  252. (progn
  253. (parse-msgstr-plural-group (push string res)))
  254. (reverse (push string res))))))
  255. (defnocfun parse-header ()
  256. (parse-msgid-group)
  257. (parse-msgstr)
  258. (let-noerr ((header (parse-escaped-string)))
  259. (when-debug
  260. (format t "header~%~s~%" header))
  261. (extract-plural-function header)))
  262. (defun extract-plural-function (header)
  263. (when-debug
  264. (format t "header~%~s~%" header))
  265. (with-po-file (:buffer (cl-ppcre:regex-replace-all "(?m)\\n" header " "))
  266. (with-no-errors
  267. (next-token *file* :hook-to-stringpos nil);; the plural expression starts here
  268. (when-debug
  269. (format t "plural-expr: ****~a***~%" (peek-token *file*)))
  270. (multiple-value-bind (fun stack)
  271. (parse-plural-expression)
  272. (when-debug
  273. (format t "stack (~%~{~s~%~})~% fun ~a 1 -> ~a~%" stack fun (funcall fun 1)))
  274. fun))))
  275. (defun parse-escaped-string (&optional (res "") (delimc nil))
  276. (if (peek-valid-stream)
  277. (handler-bind ((i18n-conditions:out-of-bounds
  278. #'(lambda(e)
  279. (declare (ignore e))
  280. (invoke-restart 'use-value ""))))
  281. (let-noerr ((char (char@1+)))
  282. (cond
  283. ((string= char +escape-string-escape-char+)
  284. (let-noerr ((char-esc (char@1+)))
  285. (parse-escaped-string
  286. (concatenate 'string res (unescaped-char char-esc)) delimc)))
  287. ((string= char +escaped-string-delim+)
  288. (parse-escaped-string res (not delimc)))
  289. ((string= char " ")
  290. (if delimc
  291. (parse-escaped-string (concatenate 'string res char) delimc)
  292. (parse-escaped-string res delimc)))
  293. ((string= char +newline+)
  294. (cond
  295. ((or
  296. (string= (char@) " ")
  297. (string= (char@) +escaped-string-delim+)
  298. (string= (char@) +newline+))
  299. (parse-escaped-string res delimc))
  300. (t
  301. res)))
  302. (t
  303. (parse-escaped-string
  304. (concatenate 'string res char) delimc)))))
  305. res))
  306. (defun parse-plural-expression ()
  307. (let-noerr* ((peek (peek-token *file*))
  308. (stack (if (is-number-p peek)
  309. (list (parse-integer (next-token *file*)))
  310. (parse-ternary-expression))))
  311. (values #'(lambda (n) (let ((*n* n))
  312. (execute-expression stack)))
  313. stack)))
  314. (defun parse-ternary-expression ()
  315. (let-noerr ((first-term (parse-boolean-expression)))
  316. (if (not (is-end-expression-p (peek-token *file*)))
  317. (let-noerr ((if-symbol (parse-if-symbol))
  318. (then-term (parse-expression)))
  319. (with-no-errors
  320. (parse-then-symbol)
  321. (let-noerr ((else-term (parse-expression)))
  322. (list (string->function if-symbol)
  323. first-term
  324. then-term
  325. else-term))))
  326. (list first-term))))
  327. (defun parse-expression ()
  328. (let-noerr ((peek (peek-token *file*)))
  329. (cond
  330. ((is-number-p peek)
  331. (parse-integer (next-token *file*)))
  332. (t
  333. (parse-ternary-expression)))))
  334. (defun parse-boolean-expression (&optional (stack '()))
  335. (if (peek-valid-stream)
  336. (with-no-errors
  337. (let ((peek (peek-token *file*)))
  338. (cond
  339. ((is-close-paren-p peek)
  340. (parse-close-parent)
  341. stack)
  342. ((is-open-paren-p peek)
  343. (parse-open-parent)
  344. (setf stack (parse-boolean-expression stack)) ;; parse the subexpression
  345. (setf stack (parse-boolean-expression stack)) ;; parse the close parent
  346. (setf stack (parse-boolean-expression stack))) ;; parse the rest of the expression
  347. ((is-var-p peek)
  348. (setf stack (parse-arithmetic-expression stack))
  349. (setf stack (parse-boolean-expression stack)))
  350. ((is-bool-op-p (peek-token *file*))
  351. (let-noerr ((boolean-op (parse-boolean-operator)))
  352. (setf stack (parse-boolean-expression stack))
  353. (push (string->function boolean-op) stack)))
  354. ((is-end-expression-p peek)
  355. stack)
  356. ((is-?-p peek)
  357. stack)
  358. ((is-colon-p peek)
  359. stack)
  360. (t
  361. (push (format nil "Error: ~s, ~s or ~s expected, got ~s instead." +var+ +open-paren+ +close-paren+ peek)
  362. *parsing-errors*)
  363. (setf *has-errors* t)))))
  364. stack))
  365. (defun parse-arithmetic-expression (&optional stack)
  366. (let ((local-stack '()))
  367. (parse-token (var (lambda (v) (string= v +var+))
  368. (format nil "Error: expected ~s got ~s instead." +var+ var)
  369. var)
  370. (let-noerr ((operator (parse-any-operator (concatenate 'list *aritm-op* *compar-op*))))
  371. (cond
  372. ((member operator *compar-op* :test #'string=)
  373. (let-noerr ((number (parse-number)))
  374. (push (parse-integer number) local-stack)
  375. (push (quote *n*) local-stack)
  376. (push (string->function operator) local-stack)))
  377. ((member operator *aritm-op* :test #'string=) ; %
  378. (let-noerr ((number1 (parse-number)) ; n%100==1 -> (== % n 100 1) -> (1 100 n % ==)
  379. (compare-op (parse-comparision-operator))
  380. (number2 (parse-number)))
  381. (with-no-errors
  382. (push (parse-integer number2) local-stack)
  383. (push (parse-integer number1) local-stack)
  384. (push (quote *n*) local-stack)
  385. (push (string->function operator) local-stack)
  386. (push (string->function compare-op) local-stack))))))
  387. (push local-stack stack))))
  388. (defun parse-number ()
  389. (parse-token (number (lambda (v) (cl-ppcre:scan +number+ v))
  390. (format nil "Error: Number expected got ~s instead." number)
  391. number)
  392. number))
  393. (defmacro pop-apply-binary-operator (stack operator)
  394. (alexandria:with-gensyms (op1 op2)
  395. `(let ((,op1 (pop ,stack))
  396. (,op2 (pop ,stack)))
  397. (push (funcall ,operator ,op1 ,op2) ,stack))))
  398. (defmacro pop-apply-ternary-operator (stack operator)
  399. (alexandria:with-gensyms (if-term then else)
  400. `(let ((,if-term (pop ,stack))
  401. (,then (pop ,stack))
  402. (,else (pop ,stack)))
  403. (push (funcall ,operator ,if-term ,then ,else) ,stack))))
  404. (defun execute-expression (stack)
  405. (let ((exec-stack '()))
  406. (macrolet ((pop-stack (s) (pop s)))
  407. (labels ((execute ()
  408. (if (> (length stack) 0)
  409. (progn
  410. (let ((elem (pop stack)))
  411. (cond
  412. ((is-binary-operator elem)
  413. (pop-apply-binary-operator exec-stack elem))
  414. ((is-ternary-operator elem)
  415. (pop-apply-ternary-operator exec-stack elem))
  416. ((null elem)
  417. (push elem exec-stack))
  418. ((listp elem)
  419. (push (execute-expression elem) stack))
  420. ((symbolp elem)
  421. (push (symbol-value elem) exec-stack))
  422. (t
  423. (push elem exec-stack))))
  424. (execute))
  425. (pop exec-stack))))
  426. (setf stack (reverse stack))
  427. (execute)))))