po.scm 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2019, 2021 Julien Lepiller <julien@lepiller.eu>
  3. ;;; Copyright © 2020 Ludovic Courtès <ludo@gnu.org>
  4. ;;;
  5. ;;; This file is part of GNU Guix.
  6. ;;;
  7. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  8. ;;; under the terms of the GNU General Public License as published by
  9. ;;; the Free Software Foundation; either version 3 of the License, or (at
  10. ;;; your option) any later version.
  11. ;;;
  12. ;;; GNU Guix is distributed in the hope that it will be useful, but
  13. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;;; GNU General Public License for more details.
  16. ;;;
  17. ;;; You should have received a copy of the GNU General Public License
  18. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  19. (define-module (guix build po)
  20. #:use-module (ice-9 match)
  21. #:use-module (ice-9 peg)
  22. #:use-module (ice-9 regex)
  23. #:use-module (ice-9 textual-ports)
  24. #:use-module (ice-9 vlist)
  25. #:use-module (srfi srfi-1)
  26. #:export (read-po-file
  27. translate-cross-references))
  28. ;; A small parser for po files
  29. (define-peg-pattern po-file body (* (or entry whitespace)))
  30. (define-peg-pattern whitespace body (or " " "\t" "\n"))
  31. (define-peg-pattern comment-chr body (range #\space #\頋))
  32. (define-peg-pattern comment none (and "#" (* comment-chr) "\n"))
  33. (define-peg-pattern flags all (and (ignore "#, ") (* comment-chr) (ignore "\n")))
  34. (define-peg-pattern entry all
  35. (and (* (or flags comment (ignore (* whitespace))))
  36. (ignore "msgid ") msgid (ignore (* whitespace))
  37. (ignore "msgstr ") msgstr))
  38. (define-peg-pattern escape body (or "\\\\" "\\\"" "\\n"))
  39. (define-peg-pattern str-chr body (or " " "!" (and (ignore "\\") "\"")
  40. "\\n" (and (ignore "\\") "\\")
  41. (range #\# #\頋)))
  42. (define-peg-pattern msgid all content)
  43. (define-peg-pattern msgstr all content)
  44. (define-peg-pattern content body
  45. (and (ignore "\"") (* str-chr) (ignore "\"")
  46. (? (and (ignore (* whitespace)) content))))
  47. (define (interpret-newline-escape str)
  48. "Replace '\\n' sequences in STR with a newline character."
  49. (let loop ((str str)
  50. (result '()))
  51. (match (string-contains str "\\n")
  52. (#f (string-concatenate-reverse (cons str result)))
  53. (index
  54. (let ((prefix (string-take str index)))
  55. (loop (string-drop str (+ 2 index))
  56. (append (list "\n" prefix) result)))))))
  57. (define (parse-tree->assoc parse-tree)
  58. "Converts a po PARSE-TREE to an association list, where the key is the msgid
  59. and the value is the msgstr. The result only contains non fuzzy strings."
  60. (define (comments->flags comments)
  61. (match comments
  62. (('flags flags)
  63. (map (lambda (flag) (string->symbol (string-trim-both flag #\space)))
  64. (string-split flags #\,)))
  65. ((? list? comments)
  66. (fold
  67. (lambda (comment res)
  68. (match comment
  69. ((? string? _) res)
  70. (flags
  71. (append (comments->flags flags)
  72. res))))
  73. '()
  74. comments))))
  75. (match parse-tree
  76. (() '())
  77. ((entry . parse-tree)
  78. (match entry
  79. ((? string? entry)
  80. (parse-tree->assoc parse-tree))
  81. ;; empty msgid
  82. (('entry ('msgid ('msgstr msgstr)))
  83. (parse-tree->assoc parse-tree))
  84. ;; empty msgstr
  85. (('entry ('msgid msgid) 'msgstr)
  86. (parse-tree->assoc parse-tree))
  87. (('entry _ ('msgid msgid) 'msgstr)
  88. (parse-tree->assoc parse-tree))
  89. (('entry ('msgid msgid) ('msgstr msgstr))
  90. (acons (interpret-newline-escape msgid)
  91. (interpret-newline-escape msgstr)
  92. (parse-tree->assoc parse-tree)))
  93. (('entry ('msgid msgid) ('msgstr msgstr))
  94. (acons (interpret-newline-escape msgid)
  95. (interpret-newline-escape msgstr)
  96. (parse-tree->assoc parse-tree)))
  97. (('entry comments ('msgid msgid) ('msgstr msgstr))
  98. (if (member 'fuzzy (comments->flags comments))
  99. (parse-tree->assoc parse-tree)
  100. (acons (interpret-newline-escape msgid)
  101. (interpret-newline-escape msgstr)
  102. (parse-tree->assoc parse-tree))))))))
  103. (define (read-po-file port)
  104. "Read a .po file from PORT and return an alist of msgid and msgstr."
  105. (let ((tree (peg:tree (match-pattern
  106. po-file
  107. (get-string-all port)))))
  108. (parse-tree->assoc tree)))
  109. (define (canonicalize-whitespace str)
  110. "Change whitespace (newlines, etc.) in STR to @code{#\\space}."
  111. (string-map (lambda (chr)
  112. (if (char-set-contains? char-set:whitespace chr)
  113. #\space
  114. chr))
  115. str))
  116. (define xref-regexp
  117. ;; Texinfo cross-reference regexp.
  118. (make-regexp "@(px|x)?ref\\{([^,}]+)"))
  119. (define (translate-cross-references texi pofile)
  120. "Translate the cross-references that appear in @var{texi}, the initial
  121. translation of a Texinfo file, using the msgid/msgstr pairs from @var{pofile}."
  122. (define translations
  123. (call-with-input-file pofile read-po-file))
  124. (define content
  125. (call-with-input-file texi get-string-all))
  126. (define matches
  127. (list-matches xref-regexp content))
  128. (define translation-map
  129. (fold (match-lambda*
  130. (((msgid . str) result)
  131. (vhash-cons msgid str result)))
  132. vlist-null
  133. translations))
  134. (define translated
  135. ;; Iterate over MATCHES and replace cross-references with their
  136. ;; translation found in TRANSLATION-MAP. (We can't use
  137. ;; 'substitute*' because matches can span multiple lines.)
  138. (let loop ((matches matches)
  139. (offset 0)
  140. (result '()))
  141. (match matches
  142. (()
  143. (string-concatenate-reverse
  144. (cons (string-drop content offset) result)))
  145. ((head . tail)
  146. (let ((prefix (match:substring head 1))
  147. (ref (canonicalize-whitespace (match:substring head 2))))
  148. (define translated
  149. (string-append "@" (or prefix "")
  150. "ref{"
  151. (match (vhash-assoc ref translation-map)
  152. (#f ref)
  153. ((_ . str) str))))
  154. (loop tail
  155. (match:end head)
  156. (append (list translated
  157. (string-take
  158. (string-drop content offset)
  159. (- (match:start head) offset)))
  160. result)))))))
  161. (format (current-error-port)
  162. "translated ~a cross-references in '~a'~%"
  163. (length matches) texi)
  164. (call-with-output-file texi
  165. (lambda (port)
  166. (display translated port))))