po.el 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
  2. ;; Copyright (C) 1995-1998, 2000-2012 Free Software Foundation, Inc.
  3. ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
  4. ;; Greg McGary <gkm@magilla.cichlid.com>,
  5. ;; Bruno Haible <bruno@clisp.org>.
  6. ;; Keywords: i18n, files
  7. ;; This file is part of GNU Emacs.
  8. ;; GNU Emacs is free software: you can redistribute it and/or modify
  9. ;; it under the terms of the GNU General Public License as published by
  10. ;; the Free Software Foundation, either version 3 of the License, or
  11. ;; (at your option) any later version.
  12. ;; GNU Emacs is distributed in the hope that it will be useful,
  13. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;; GNU General Public License for more details.
  16. ;; You should have received a copy of the GNU General Public License
  17. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  18. ;;; Commentary:
  19. ;; This package makes sure visiting PO files decodes them correctly,
  20. ;; according to the Charset= header in the PO file. For more support
  21. ;; for editing PO files, see po-mode.el.
  22. ;;; Code:
  23. (defconst po-content-type-charset-alist
  24. '(("ASCII" . undecided)
  25. ("ANSI_X3.4-1968" . undecided)
  26. ("US-ASCII" . undecided))
  27. "Alist of coding system versus GNU libc/libiconv canonical charset name.
  28. Contains canonical charset names that don't correspond to coding systems.")
  29. (defun po-find-charset (filename)
  30. "Return PO charset value for FILENAME.
  31. If FILENAME is a cons cell, its CDR is a buffer that already contains
  32. the PO file (but not yet decoded)."
  33. (let ((charset-regexp
  34. "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
  35. (buf (and (consp filename) (cdr filename)))
  36. (short-read nil))
  37. (when buf
  38. (set-buffer buf)
  39. (goto-char (point-min)))
  40. ;; Try the first 4096 bytes. In case we cannot find the charset value
  41. ;; within the first 4096 bytes (the PO file might start with a long
  42. ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
  43. ;; we've checked the empty header entry entirely.
  44. (while (not (or short-read (re-search-forward "^msgid" nil t) buf))
  45. (save-excursion
  46. (goto-char (point-max))
  47. (let ((pair (insert-file-contents-literally filename nil
  48. (1- (point))
  49. (1- (+ (point) 4096)))))
  50. (setq short-read (< (nth 1 pair) 4096)))))
  51. (cond ((re-search-forward charset-regexp nil t) (match-string 1))
  52. ((or short-read buf) nil)
  53. ;; We've found the first msgid; maybe, only a part of the msgstr
  54. ;; value was loaded. Load the next 1024 bytes; if charset still
  55. ;; isn't available, give up.
  56. (t (save-excursion
  57. (goto-char (point-max))
  58. (insert-file-contents-literally filename nil
  59. (1- (point))
  60. (1- (+ (point) 1024))))
  61. (if (re-search-forward charset-regexp nil t)
  62. (match-string 1))))))
  63. (defun po-find-file-coding-system-guts (operation filename)
  64. "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
  65. Do so according to FILENAME's declared charset.
  66. FILENAME may be a cons (NAME . BUFFER). In that case, detect charset
  67. in BUFFER."
  68. (and
  69. (eq operation 'insert-file-contents)
  70. (or (if (consp filename) (buffer-live-p (cdr filename)))
  71. (file-exists-p filename))
  72. (with-temp-buffer
  73. (let* ((coding-system-for-read 'no-conversion)
  74. (charset (or (po-find-charset filename) "ascii"))
  75. assoc)
  76. (list (cond
  77. ((setq assoc
  78. (assoc-string charset
  79. po-content-type-charset-alist
  80. t))
  81. (cdr assoc))
  82. ((or (setq assoc (assoc-string charset coding-system-alist t))
  83. (setq assoc
  84. (assoc-string (subst-char-in-string ?_ ?-
  85. charset)
  86. coding-system-alist t)))
  87. (intern (car assoc)))
  88. ;; In principle we should also check the `mime-charset'
  89. ;; property of everything in the base coding system
  90. ;; list, but there should always be a coding system
  91. ;; corresponding to the MIME name.
  92. ((featurep 'code-pages)
  93. ;; Give up.
  94. 'raw-text)
  95. (t
  96. ;; Try again with code-pages loaded. Maybe it's best
  97. ;; to require it initially?
  98. (require 'code-pages nil t)
  99. (if (or
  100. (setq assoc (assoc-string charset coding-system-alist t))
  101. (setq assoc (assoc-string (subst-char-in-string
  102. ?_ ?- charset)
  103. coding-system-alist t)))
  104. (intern (car assoc))
  105. 'raw-text))))))))
  106. ;;;###autoload
  107. (defun po-find-file-coding-system (arg-list)
  108. "Return a (DECODING . ENCODING) pair, according to PO file's charset.
  109. Called through `file-coding-system-alist', before the file is visited for real."
  110. (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
  111. ;; This is for XEmacs.
  112. ;(defun po-find-file-coding-system (operation filename)
  113. ; "\
  114. ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
  115. ;Called through file-coding-system-alist, before the file is visited for real."
  116. ; (po-find-file-coding-system-guts operation filename))
  117. (provide 'po)
  118. ;;; po.el ends here