utf-7.el 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. ;;; utf-7.el --- utf-7 coding system
  2. ;; Copyright (C) 2003-2012 Free Software Foundation, Inc.
  3. ;; Author: Dave Love <fx@gnu.org>
  4. ;; Keywords: i18n, mail
  5. ;; This file is part of GNU Emacs.
  6. ;; GNU Emacs is free software: you can redistribute it and/or modify
  7. ;; it under the terms of the GNU General Public License as published by
  8. ;; the Free Software Foundation, either version 3 of the License, or
  9. ;; (at your option) any later version.
  10. ;; GNU Emacs is distributed in the hope that it will be useful,
  11. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ;; GNU General Public License for more details.
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  16. ;;; Commentary:
  17. ;; Defines a coding system for UTF-7, defined in RFC 2152. Non-ASCII
  18. ;; segments are encoded as base64-encoded big endian UTF-16. Also
  19. ;; defines a variation required for IMAP (RFC 2060).
  20. ;; The encoding and decoding was originally taken from Jon K Hellan's
  21. ;; implementation in Gnus, but has been substantially re-done.
  22. ;; This probably needs more attention. In particular, it's not
  23. ;; completely consistent with iconv's behavior. It's arguable
  24. ;; whether the IMAP version should be a coding system since it's
  25. ;; apparently only used for IMAP mailbox names, so it's commented out.
  26. ;;; Code:
  27. (defun utf-7-decode (len imap)
  28. "Decode LEN bytes of UTF-7 at point.
  29. IMAP non-nil means use the IMAP version."
  30. (save-excursion
  31. (save-restriction
  32. (narrow-to-region (point) (+ (point) len))
  33. (let ((not-esc (if imap "^&" "^+"))
  34. (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/")))
  35. (while (not (eobp))
  36. (skip-chars-forward not-esc)
  37. (unless (eobp)
  38. (forward-char)
  39. (let ((p (point))
  40. (run-length (skip-chars-forward skip-chars)))
  41. (if (eq ?- (char-after))
  42. (delete-char 1))
  43. (unless (= run-length 0) ; encoded lone esc-char
  44. (let ((pl (mod (- run-length) 4)))
  45. (insert-char ?= pl)
  46. (if imap
  47. (subst-char-in-region p (point) ?, ?/))
  48. (base64-decode-region p (point)))
  49. (decode-coding-region p (point) 'utf-16be)
  50. (save-excursion
  51. (goto-char p)
  52. (delete-char -1)))))))
  53. (- (point-max) (point-min)))))
  54. ;;;###autoload
  55. (defun utf-7-post-read-conversion (len)
  56. (utf-7-decode len nil))
  57. ;;;###autoload
  58. (defun utf-7-imap-post-read-conversion (len)
  59. (utf-7-decode len t))
  60. (defun utf-7-encode (from to imap)
  61. "Encode bytes between FROM and TO to UTF-7.
  62. ESC and SKIP-CHARS are adjusted for the normal and IMAP versions."
  63. (let* ((old-buf (current-buffer))
  64. (esc (if imap ?& ?+))
  65. ;; These are characters which can be encoded asis.
  66. (skip-chars (if imap
  67. "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060
  68. ;; This includes the rfc2152 optional set.
  69. ;; Perhaps it shouldn't (like iconv).
  70. "\t\n\r -*,-[]-}"))
  71. (not-skip-chars (format "^%s%c" skip-chars esc)))
  72. (set-buffer (generate-new-buffer " *temp*"))
  73. (if (stringp from)
  74. (insert from)
  75. (insert-buffer-substring old-buf from to))
  76. (goto-char (point-min))
  77. (while (not (eobp))
  78. (skip-chars-forward skip-chars)
  79. (if (eq esc (char-after))
  80. (progn (forward-char)
  81. (insert ?-))
  82. (unless (eobp)
  83. (insert esc)
  84. (let ((p (point)))
  85. (skip-chars-forward not-skip-chars)
  86. (save-restriction
  87. ;; encode-coding-region doesn't preserve point
  88. (narrow-to-region p (point))
  89. (encode-coding-region p (point-max) 'utf-16be)
  90. (base64-encode-region p (point-max))
  91. (if imap
  92. (subst-char-in-region p (point-max) ?/ ?,))
  93. (goto-char p)
  94. ;; As I read the RFC, this isn't correct, but it's
  95. ;; consistent with iconv, at least regarding `='.
  96. (skip-chars-forward "^= \t\n")
  97. (delete-region (point) (point-max))))
  98. ;; RFC2060 stipulates that all names MUST end in US-ASCII (i.e.
  99. ;; a name that ends with a Unicode octet MUST end with a "-").
  100. (if (or imap (not (eobp)))
  101. (insert ?-)))))
  102. nil))
  103. ;;;###autoload
  104. (defun utf-7-pre-write-conversion (from to)
  105. (utf-7-encode from to nil))
  106. ;;;###autoload
  107. (defun utf-7-imap-pre-write-conversion (from to)
  108. (utf-7-encode from to t))
  109. (provide 'utf-7)
  110. ;;; utf-7.el ends here