i18n-utils.lisp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. ;; This software is Copyright (c) Leslie P. Polzer, 2011.
  2. ;; Leslie P. Polzer grants you the rights to distribute
  3. ;; and use this software as governed by the terms
  4. ;; of the Lisp Lesser GNU Public License
  5. ;; (http://opensource.franz.com/preamble.html),
  6. ;; known as the LLGPL
  7. (in-package :cl-i18n-utils)
  8. (defun similar-phrase (phrase dict &key (threshold 3))
  9. "Scan the translation table looking for the best matching string of \"phrase\""
  10. (let ((min (list nil 1000)))
  11. (maphash #'(lambda (k v)
  12. (let ((dist (levenshtein-distance phrase k))
  13. (trsl (cl-i18n:translated v)))
  14. (if (and
  15. (string/= trsl "")
  16. (< dist threshold)
  17. (< dist (second min)))
  18. (setf min (list trsl dist)))))
  19. dict)
  20. (first min)))
  21. (defun generate-i18n-file (source-filename localization-filename
  22. &key
  23. (fuzziness 3)
  24. (plural-function #'cl-i18n:english-plural-form)
  25. (prefix-re "#!\""))
  26. "Reads a Lisp source file, get all strings and generate the translation
  27. resource, or merge with it if the translation resource already exists.
  28. Untranslated strings that show levenshtein distance less than :fuzziness
  29. with a translated one get the translation of the latter; such translation
  30. are marked as \"fuzzy\" in the output file"
  31. (let* ((path-splitted (cl-ppcre:split cl-i18n:*directory-sep-regexp* localization-filename))
  32. (root (if (> (length path-splitted) 1)
  33. (reduce #'(lambda (a b) (concatenate 'string a cl-i18n:*directory-sep* b))
  34. (subseq path-splitted 0 (1- (length path-splitted))))
  35. "."))
  36. (output-filename (car (last path-splitted)))
  37. (cl-i18n:*translation-file-root* root))
  38. (multiple-value-bind (i18n-table readed-plural-function)
  39. (cl-i18n:init-translation-table output-filename
  40. :store-hashtable nil
  41. :store-plural-function nil)
  42. (when (null readed-plural-function)
  43. (setf readed-plural-function plural-function))
  44. (let ((new-strings (get-strings source-filename prefix-re)))
  45. (mapc #'(lambda (s)
  46. (when (not (gethash s i18n-table))
  47. (let ((similar (similar-phrase s i18n-table :threshold fuzziness)))
  48. (setf (gethash s i18n-table)
  49. (cl-i18n:make-translation (if (not (null similar))
  50. similar
  51. "")
  52. (if (not (null similar))
  53. cl-i18n:+fuzzy-flag+
  54. cl-i18n:+untranslated-flag+))))))
  55. new-strings)
  56. (cl-i18n:save-language nil
  57. localization-filename
  58. i18n-table
  59. readed-plural-function)))))
  60. (defun get-strings (filename &optional (prefix-re "#!\""))
  61. "Get all strings on the form 'prefix-re'\"foo\", and collect them uniquely in a list."
  62. (let ((cl-i18n:*extr-function-re* prefix-re))
  63. (cl-i18n:with-extract-parsed-file (:filename filename)
  64. (remove-duplicates (cl-i18n:parse-extract-parsed-file) :test #'string=))))
  65. (defun read-i18n-file (filename)
  66. "Reads the i18n file, if it exists, and put the strings into a hash table"
  67. (if (probe-file filename)
  68. (with-open-file (stream filename)
  69. (cl-i18n:translation-list->hash-table (read stream) (make-hash-table :test 'equal)))
  70. (make-hash-table :test 'equal)))
  71. (defun levenshtein-distance (string1 string2)
  72. "Compute the levenshtein distance (i. e. how much are similars) between two strings"
  73. (macrolet ((matrix-elt (mat i j)
  74. `(nth ,j (nth ,i ,mat))))
  75. (labels ((gen-matrix (l1 l2)
  76. (let ((mat (copy-tree (make-list (1+ l2) :initial-element (make-list (1+ l1) :initial-element -1)))))
  77. (loop for i from 0 below (1+ l1) do (setf (matrix-elt mat 0 i) i))
  78. (loop for i from 0 below (1+ l2) do (setf (matrix-elt mat i 0) i))
  79. mat)))
  80. (let* ((l1 (length string1))
  81. (l2 (length string2))
  82. (mat (gen-matrix l1 l2)))
  83. (loop for i from 0 below l2 do
  84. (loop for j from 0 below l1 do
  85. (if (char= (char string2 i) (char string1 j))
  86. (setf (matrix-elt mat (1+ i) (1+ j)) (matrix-elt mat i j))
  87. (setf (matrix-elt mat (1+ i) (1+ j))
  88. (min
  89. (1+ (matrix-elt mat i (1+ j))) ; a deletion
  90. (1+ (matrix-elt mat (1+ i) j)) ; an insertion
  91. (1+ (matrix-elt mat i j))))))) ; a substitution
  92. (values (matrix-elt mat l2 l1) mat)))))
  93. (defun gen-translation-file (path output &key
  94. (ext "lisp$")
  95. (prefix-re "#!\""))
  96. "Scan a directory for sources files and collect all translatable strings.
  97. The strings are merged with a translation file (if exists)"
  98. (mapc #'(lambda (f) (generate-i18n-file (namestring f) output :prefix-re prefix-re))
  99. (remove-if-not #'(lambda (p) (cl-ppcre:scan ext (file-namestring p)))
  100. (uiop/filesystem:directory-files path))))
  101. (defun convert-dictionary-format (old &key (plural-function 'cl-i18n:n/=1-plural-form))
  102. "Convert an 0.4 translation table file format to the new one"
  103. (with-open-file (istream old :direction :input :if-does-not-exist :error)
  104. (let ((old-format (read istream)))
  105. (format nil "~s~%(~%~a)~%"
  106. (symbol-name plural-function)
  107. (with-output-to-string (ostream)
  108. (loop
  109. for ct = 0 then (+ ct 3) while (< ct (length old-format)) do
  110. (progn
  111. (format ostream "~a ~s~%~a ~s~%~a ~s~%~a ~s~%~a ~s~%"
  112. cl-i18n:+id+ (nth ct old-format)
  113. cl-i18n:+translation+ (nth (+ 2 ct) old-format)
  114. cl-i18n:+plurals-form+ (nth ct old-format)
  115. cl-i18n:+status+ cl-i18n:+translated-flag+
  116. cl-i18n:+plurals+ '()))))))))
  117. (defun convert-save-dictionary (old new)
  118. "Convert an 0.4 translation table file format to the new one and save in a new file"
  119. (with-open-file (stream new :direction :output :if-exists :error)
  120. (princ (convert-dictionary-format old) stream)))