rfc2231.el 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. ;;; rfc2231.el --- Functions for decoding rfc2231 headers
  2. ;; Copyright (C) 1998-2012 Free Software Foundation, Inc.
  3. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
  4. ;; This file is part of GNU Emacs.
  5. ;; GNU Emacs is free software: you can redistribute it and/or modify
  6. ;; it under the terms of the GNU General Public License as published by
  7. ;; the Free Software Foundation, either version 3 of the License, or
  8. ;; (at your option) any later version.
  9. ;; GNU Emacs is distributed in the hope that it will be useful,
  10. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. ;; GNU General Public License for more details.
  13. ;; You should have received a copy of the GNU General Public License
  14. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  15. ;;; Commentary:
  16. ;;; Code:
  17. (eval-when-compile (require 'cl))
  18. (require 'ietf-drums)
  19. (require 'rfc2047)
  20. (autoload 'mm-encode-body "mm-bodies")
  21. (autoload 'mail-header-remove-whitespace "mail-parse")
  22. (autoload 'mail-header-remove-comments "mail-parse")
  23. (defun rfc2231-get-value (ct attribute)
  24. "Return the value of ATTRIBUTE from CT."
  25. (cdr (assq attribute (cdr ct))))
  26. (defun rfc2231-parse-qp-string (string)
  27. "Parse QP-encoded string using `rfc2231-parse-string'.
  28. N.B. This is in violation with RFC2047, but it seem to be in common use."
  29. (rfc2231-parse-string (rfc2047-decode-string string)))
  30. (defun rfc2231-parse-string (string &optional signal-error)
  31. "Parse STRING and return a list.
  32. The list will be on the form
  33. `(name (attribute . value) (attribute . value)...)'.
  34. If the optional SIGNAL-ERROR is non-nil, signal an error when this
  35. function fails in parsing of parameters. Otherwise, this function
  36. must never cause a Lisp error."
  37. (with-temp-buffer
  38. (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token))
  39. (stoken (ietf-drums-token-to-list ietf-drums-tspecials))
  40. (ntoken (ietf-drums-token-to-list "0-9"))
  41. c type attribute encoded number parameters value)
  42. (ietf-drums-init
  43. (condition-case nil
  44. (mail-header-remove-whitespace
  45. (mail-header-remove-comments string))
  46. ;; The most likely cause of an error is unbalanced parentheses
  47. ;; or double-quotes. If all parentheses and double-quotes are
  48. ;; quoted meaninglessly with backslashes, removing them might
  49. ;; make it parsable. Let's try...
  50. (error
  51. (let (mod)
  52. (when (and (string-match "\\\\\"" string)
  53. (not (string-match "\\`\"\\|[^\\]\"" string)))
  54. (setq string (mm-replace-in-string string "\\\\\"" "\"")
  55. mod t))
  56. (when (and (string-match "\\\\(" string)
  57. (string-match "\\\\)" string)
  58. (not (string-match "\\`(\\|[^\\][()]" string)))
  59. (setq string (mm-replace-in-string string "\\\\\\([()]\\)" "\\1")
  60. mod t))
  61. (or (and mod
  62. (ignore-errors
  63. (mail-header-remove-whitespace
  64. (mail-header-remove-comments string))))
  65. ;; Finally, attempt to extract only type.
  66. (if (string-match
  67. (concat "\\`[\t\n ]*\\([^" ietf-drums-tspecials "\t\n ]+"
  68. "\\(?:/[^" ietf-drums-tspecials
  69. "\t\n ]+\\)?\\)\\(?:[\t\n ;]\\|\\'\\)")
  70. string)
  71. (match-string 1 string)
  72. ""))))))
  73. (let ((table (copy-syntax-table ietf-drums-syntax-table)))
  74. (modify-syntax-entry ?\' "w" table)
  75. (modify-syntax-entry ?* " " table)
  76. (modify-syntax-entry ?\; " " table)
  77. (modify-syntax-entry ?= " " table)
  78. ;; The following isn't valid, but one should be liberal
  79. ;; in what one receives.
  80. (modify-syntax-entry ?\: "w" table)
  81. (set-syntax-table table))
  82. (setq c (char-after))
  83. (when (and (memq c ttoken)
  84. (not (memq c stoken))
  85. (setq type (ignore-errors
  86. (downcase
  87. (buffer-substring (point) (progn
  88. (forward-sexp 1)
  89. (point)))))))
  90. ;; Do the params
  91. (condition-case err
  92. (progn
  93. (while (not (eobp))
  94. (setq c (char-after))
  95. (unless (eq c ?\;)
  96. (error "Invalid header: %s" string))
  97. (forward-char 1)
  98. ;; If c in nil, then this is an invalid header, but
  99. ;; since elm generates invalid headers on this form,
  100. ;; we allow it.
  101. (when (setq c (char-after))
  102. (if (and (memq c ttoken)
  103. (not (memq c stoken)))
  104. (setq attribute
  105. (intern
  106. (downcase
  107. (buffer-substring
  108. (point) (progn (forward-sexp 1) (point))))))
  109. (error "Invalid header: %s" string))
  110. (setq c (char-after))
  111. (if (eq c ?*)
  112. (progn
  113. (forward-char 1)
  114. (setq c (char-after))
  115. (if (not (memq c ntoken))
  116. (setq encoded t
  117. number nil)
  118. (setq number
  119. (string-to-number
  120. (buffer-substring
  121. (point) (progn (forward-sexp 1) (point)))))
  122. (setq c (char-after))
  123. (when (eq c ?*)
  124. (setq encoded t)
  125. (forward-char 1)
  126. (setq c (char-after)))))
  127. (setq number nil
  128. encoded nil))
  129. (unless (eq c ?=)
  130. (error "Invalid header: %s" string))
  131. (forward-char 1)
  132. (setq c (char-after))
  133. (cond
  134. ((eq c ?\")
  135. (setq value (buffer-substring (1+ (point))
  136. (progn
  137. (forward-sexp 1)
  138. (1- (point)))))
  139. (when encoded
  140. (setq value (mapconcat (lambda (c) (format "%%%02x" c))
  141. value ""))))
  142. ((and (or (memq c ttoken)
  143. ;; EXTENSION: Support non-ascii chars.
  144. (> c ?\177))
  145. (not (memq c stoken)))
  146. (setq value
  147. (buffer-substring
  148. (point)
  149. (progn
  150. ;; Jump over asterisk, non-ASCII
  151. ;; and non-boundary characters.
  152. (while (and c
  153. (or (eq c ?*)
  154. (> c ?\177)
  155. (not (eq (char-syntax c) ? ))))
  156. (forward-char 1)
  157. (setq c (char-after)))
  158. (point)))))
  159. (t
  160. (error "Invalid header: %s" string)))
  161. (push (list attribute value number encoded)
  162. parameters))))
  163. (error
  164. (setq parameters nil)
  165. (when signal-error
  166. (signal (car err) (cdr err)))))
  167. ;; Now collect and concatenate continuation parameters.
  168. (let ((cparams nil)
  169. elem)
  170. (loop for (attribute value part encoded)
  171. in (sort parameters (lambda (e1 e2)
  172. (< (or (caddr e1) 0)
  173. (or (caddr e2) 0))))
  174. do (cond
  175. ;; First part.
  176. ((or (not (setq elem (assq attribute cparams)))
  177. (and (numberp part)
  178. (zerop part)))
  179. (push (list attribute value encoded) cparams))
  180. ;; Repetition of a part; do nothing.
  181. ((and elem
  182. (null number))
  183. )
  184. ;; Concatenate continuation parts.
  185. (t
  186. (setcar (cdr elem) (concat (cadr elem) value)))))
  187. ;; Finally decode encoded values.
  188. (cons type (mapcar
  189. (lambda (elem)
  190. (cons (car elem)
  191. (if (nth 2 elem)
  192. (rfc2231-decode-encoded-string (nth 1 elem))
  193. (nth 1 elem))))
  194. (nreverse cparams))))))))
  195. (defun rfc2231-decode-encoded-string (string)
  196. "Decode an RFC2231-encoded string.
  197. These look like:
  198. \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
  199. \"us-ascii''This%20is%20%2A%2A%2Afun%2A%2A%2A\",
  200. \"'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\",
  201. \"''This%20is%20%2A%2A%2Afun%2A%2A%2A\", or
  202. \"This is ***fun***\"."
  203. (string-match "\\`\\(?:\\([^']+\\)?'\\([^']+\\)?'\\)?\\(.+\\)" string)
  204. (let ((coding-system (mm-charset-to-coding-system (match-string 1 string)))
  205. ;;(language (match-string 2 string))
  206. (value (match-string 3 string)))
  207. (mm-with-unibyte-buffer
  208. (insert value)
  209. (goto-char (point-min))
  210. (while (re-search-forward "%\\([0-9A-Fa-f][0-9A-Fa-f]\\)" nil t)
  211. (insert
  212. (prog1
  213. (string-to-number (match-string 1) 16)
  214. (delete-region (match-beginning 0) (match-end 0)))))
  215. ;; Decode using the charset, if any.
  216. (if (memq coding-system '(nil ascii))
  217. (buffer-string)
  218. (mm-decode-coding-string (buffer-string) coding-system)))))
  219. (defun rfc2231-encode-string (param value)
  220. "Return and PARAM=VALUE string encoded according to RFC2231.
  221. Use `mml-insert-parameter' or `mml-insert-parameter-string' to insert
  222. the result of this function."
  223. (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token))
  224. (tspecial (ietf-drums-token-to-list ietf-drums-tspecials))
  225. (special (ietf-drums-token-to-list "*'%\n\t"))
  226. (ascii (ietf-drums-token-to-list ietf-drums-text-token))
  227. (num -1)
  228. ;; Don't make lines exceeding 76 column.
  229. (limit (- 74 (length param)))
  230. spacep encodep charsetp charset broken)
  231. (mm-with-multibyte-buffer
  232. (insert value)
  233. (goto-char (point-min))
  234. (while (not (eobp))
  235. (cond
  236. ((or (memq (following-char) control)
  237. (memq (following-char) tspecial)
  238. (memq (following-char) special))
  239. (setq encodep t))
  240. ((eq (following-char) ? )
  241. (setq spacep t))
  242. ((not (memq (following-char) ascii))
  243. (setq charsetp t)))
  244. (forward-char 1))
  245. (when charsetp
  246. (setq charset (mm-encode-body)))
  247. (mm-disable-multibyte)
  248. (cond
  249. ((or encodep charsetp
  250. (progn
  251. (end-of-line)
  252. (> (current-column) (if spacep (- limit 2) limit))))
  253. (setq limit (- limit 6))
  254. (goto-char (point-min))
  255. (insert (symbol-name (or charset 'us-ascii)) "''")
  256. (while (not (eobp))
  257. (if (or (not (memq (following-char) ascii))
  258. (memq (following-char) control)
  259. (memq (following-char) tspecial)
  260. (memq (following-char) special)
  261. (eq (following-char) ? ))
  262. (progn
  263. (when (>= (current-column) (1- limit))
  264. (insert ";\n")
  265. (setq broken t))
  266. (insert "%" (format "%02x" (following-char)))
  267. (delete-char 1))
  268. (when (> (current-column) limit)
  269. (insert ";\n")
  270. (setq broken t))
  271. (forward-char 1)))
  272. (goto-char (point-min))
  273. (if (not broken)
  274. (insert param "*=")
  275. (while (not (eobp))
  276. (insert (if (>= num 0) " " "")
  277. param "*" (format "%d" (incf num)) "*=")
  278. (forward-line 1))))
  279. (spacep
  280. (goto-char (point-min))
  281. (insert param "=\"")
  282. (goto-char (point-max))
  283. (insert "\""))
  284. (t
  285. (goto-char (point-min))
  286. (insert param "=")))
  287. (buffer-string))))
  288. (provide 'rfc2231)
  289. ;;; rfc2231.el ends here