deuglify.el 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. ;;; deuglify.el --- deuglify broken Outlook (Express) articles
  2. ;; Copyright (C) 2001-2012 Free Software Foundation, Inc.
  3. ;; Author: Raymond Scholz <rscholz@zonix.de>
  4. ;; Thomas Steffen
  5. ;; (unwrapping algorithm, based on an idea of Stefan Monnier)
  6. ;; Keywords: mail, news
  7. ;; This file is part of GNU Emacs.
  8. ;; GNU Emacs is free software: you can redistribute it and/or modify
  9. ;; it under the terms of the GNU General Public License as published by
  10. ;; the Free Software Foundation, either version 3 of the License, or
  11. ;; (at your option) any later version.
  12. ;; GNU Emacs is distributed in the hope that it will be useful,
  13. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;; GNU General Public License for more details.
  16. ;; You should have received a copy of the GNU General Public License
  17. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  18. ;;; Commentary:
  19. ;; This file enables Gnus to repair broken citations produced by
  20. ;; common user agents like MS Outlook (Express). It may repair
  21. ;; articles of other user agents too.
  22. ;;
  23. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  24. ;;
  25. ;; Outlook sometimes wraps cited lines before sending a message as
  26. ;; seen in this example:
  27. ;;
  28. ;; Example #1
  29. ;; ----------
  30. ;;
  31. ;; John Doe wrote:
  32. ;;
  33. ;; > This sentence no verb. This sentence no verb. This sentence
  34. ;; no
  35. ;; > verb. This sentence no verb. This sentence no verb. This
  36. ;; > sentence no verb.
  37. ;;
  38. ;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those
  39. ;; erroneously wrapped lines and will unwrap them. I.e. putting the
  40. ;; wrapped parts ("no" in this example) back where they belong (at the
  41. ;; end of the cited line above).
  42. ;;
  43. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  44. ;;
  45. ;; Note that some people not only use broken user agents but also
  46. ;; practice a bad citation style by omitting blank lines between the
  47. ;; cited text and their own text.
  48. ;:
  49. ;; Example #2
  50. ;; ----------
  51. ;;
  52. ;; John Doe wrote:
  53. ;;
  54. ;; > This sentence no verb. This sentence no verb. This sentence no
  55. ;; You forgot in all your sentences.
  56. ;; > verb. This sentence no verb. This sentence no verb. This
  57. ;; > sentence no verb.
  58. ;;
  59. ;; Unwrapping "You forgot in all your sentences." would be invalid as
  60. ;; this part wasn't intended to be cited text.
  61. ;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting
  62. ;; citation line will be of a certain maximum length. You can control
  63. ;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also
  64. ;; unwrapping will only be done if the line above the (possibly)
  65. ;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'.
  66. ;;
  67. ;; Furthermore no unwrapping will be undertaken if the last character
  68. ;; is one of the chars specified in
  69. ;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!"
  70. ;; inhibits unwrapping if the cited line ends with a full stop,
  71. ;; question mark or exclamation mark. Note that this variable
  72. ;; defaults to `nil', triggering a few false positives but generally
  73. ;; giving you better results.
  74. ;;
  75. ;; Unwrapping works on every level of citation. Thus you will be able
  76. ;; repair broken citations of broken user agents citing broken
  77. ;; citations of broken user agents citing broken citations...
  78. ;;
  79. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  80. ;;
  81. ;; Citations are commonly introduced with an attribution line
  82. ;; indicating who wrote the cited text. Outlook adds superfluous
  83. ;; information that can be found in the header of the message to this
  84. ;; line and often wraps it.
  85. ;;
  86. ;; If that weren't enough, lots of people write their own text above
  87. ;; the cited text and cite the complete original article below.
  88. ;;
  89. ;; Example #3
  90. ;; ----------
  91. ;;
  92. ;; Hey, John. There's no in all your sentences!
  93. ;;
  94. ;; John Doe <john.doe@some.domain> wrote in message
  95. ;; news:a87usw8$dklsssa$2@some.news.server...
  96. ;; > This sentence no verb. This sentence no verb. This sentence
  97. ;; no
  98. ;; > verb. This sentence no verb. This sentence no verb. This
  99. ;; > sentence no verb.
  100. ;; >
  101. ;; > Bye, John
  102. ;;
  103. ;; Repairing the attribution line will be done by function
  104. ;; `gnus-article-outlook-repair-attribution which calls other function that
  105. ;; try to recognize and repair broken attribution lines. See variable
  106. ;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be
  107. ;; cut off from the beginning of an attribution line and variable
  108. ;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are
  109. ;; required to be found in an attribution line. These function return
  110. ;; the point where the repaired attribution line starts.
  111. ;;
  112. ;; Rearranging the article so that the cited text appears above the
  113. ;; new text will be done by function
  114. ;; `gnus-article-outlook-rearrange-citation'. This function calls
  115. ;; `gnus-article-outlook-repair-attribution to find and repair an attribution
  116. ;; line.
  117. ;;
  118. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  119. ;;
  120. ;; Well, and that's what the message will look like after applying
  121. ;; deuglification:
  122. ;;
  123. ;; Example #3 (deuglified)
  124. ;; -----------------------
  125. ;;
  126. ;; John Doe <john.doe@some.domain> wrote:
  127. ;;
  128. ;; > This sentence no verb. This sentence no verb. This sentence no
  129. ;; > verb. This sentence no verb. This sentence no verb. This
  130. ;; > sentence no verb.
  131. ;; >
  132. ;; > Bye, John
  133. ;;
  134. ;; Hey, John. There's no in all your sentences!
  135. ;;
  136. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  137. ;;
  138. ;; Usage
  139. ;; -----
  140. ;;
  141. ;; Press `W k' in the Summary Buffer.
  142. ;;
  143. ;; Non recommended usage :-)
  144. ;; ---------------------
  145. ;;
  146. ;; To automatically invoke deuglification on every article you read,
  147. ;; put something like that in your .gnus:
  148. ;;
  149. ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines)
  150. ;;
  151. ;; or _one_ of the following lines:
  152. ;;
  153. ;; ;; repair broken attribution lines
  154. ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution)
  155. ;;
  156. ;; ;; repair broken attribution lines and citations
  157. ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation)
  158. ;;
  159. ;; Note that there always may be some false positives, so I suggest
  160. ;; using the manual invocation. After deuglification you may want to
  161. ;; refill the whole article using `W w'.
  162. ;;
  163. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  164. ;;
  165. ;; Limitations
  166. ;; -----------
  167. ;;
  168. ;; As I said before there may (or will) be a few false positives on
  169. ;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'.
  170. ;;
  171. ;; `gnus-article-outlook-repair-attribution will only fix the first
  172. ;; attribution line found in the article. Furthermore it fixed to
  173. ;; certain kinds of attributions. And there may be horribly many
  174. ;; false positives, vanishing lines and so on -- so don't trust your
  175. ;; eyes. Again I recommend manual invocation.
  176. ;;
  177. ;; `gnus-article-outlook-rearrange-citation' carries all the limitations of
  178. ;; `gnus-article-outlook-repair-attribution.
  179. ;;
  180. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  181. ;;
  182. ;; See ChangeLog for other changes.
  183. ;;
  184. ;; Revision 1.5 2002/01/27 14:39:17 rscholz
  185. ;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit
  186. ;; unwrapping if one these chars is first in the possibly wrapped line.
  187. ;; * Improved rearranging of the article.
  188. ;; * New function `gnus-outlook-repair-attribution-block' for repairing
  189. ;; those big "Original Message (following some headers)" attributions.
  190. ;;
  191. ;; Revision 1.4 2002/01/03 14:05:00 rscholz
  192. ;; Renamed `gnus-outlook-deuglify-article' to
  193. ;; `gnus-article-outlook-deuglify-article'.
  194. ;; Made it easier to deuglify the article while being in Gnus' Article
  195. ;; Edit Mode. (suggested by Phil Nitschke)
  196. ;;
  197. ;;
  198. ;; Revision 1.3 2002/01/02 23:35:54 rscholz
  199. ;; Fix a bug that caused succeeding long attribution lines to be
  200. ;; unwrapped. Minor doc fixes and regular expression tuning.
  201. ;;
  202. ;; Revision 1.2 2001/12/30 20:14:34 rscholz
  203. ;; Clean up source.
  204. ;;
  205. ;; Revision 1.1 2001/12/30 20:13:32 rscholz
  206. ;; Initial revision
  207. ;;
  208. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  209. ;;; Code:
  210. (require 'gnus-art)
  211. (require 'gnus-sum)
  212. (defconst gnus-outlook-deuglify-version "1.5 Gnus version"
  213. "Version of gnus-outlook-deuglify.")
  214. ;;; User Customizable Variables:
  215. (defgroup gnus-outlook-deuglify nil
  216. "Deuglify articles generated by broken user agents like MS Outlook (Express)."
  217. :version "22.1"
  218. :group 'gnus)
  219. (defcustom gnus-outlook-deuglify-unwrap-min 45
  220. "Minimum length of the cited line above the (possibly) wrapped line."
  221. :version "22.1"
  222. :type 'integer
  223. :group 'gnus-outlook-deuglify)
  224. (defcustom gnus-outlook-deuglify-unwrap-max 95
  225. "Maximum length of the cited line after unwrapping."
  226. :version "22.1"
  227. :type 'integer
  228. :group 'gnus-outlook-deuglify)
  229. (defcustom gnus-outlook-deuglify-cite-marks ">|#%"
  230. "Characters that indicate cited lines."
  231. :version "22.1"
  232. :type 'string
  233. :group 'gnus-outlook-deuglify)
  234. (defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil
  235. "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line."
  236. :version "22.1"
  237. :type '(radio (const :format "None " nil)
  238. (string :value ".?!"))
  239. :group 'gnus-outlook-deuglify)
  240. (defcustom gnus-outlook-deuglify-no-wrap-chars "`"
  241. "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line."
  242. :version "22.1"
  243. :type 'string
  244. :group 'gnus-outlook-deuglify)
  245. (defcustom gnus-outlook-deuglify-attrib-cut-regexp
  246. "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, "
  247. "Regular expression matching the beginning of an attribution line that should be cut off."
  248. :version "22.1"
  249. :type 'string
  250. :group 'gnus-outlook-deuglify)
  251. (defcustom gnus-outlook-deuglify-attrib-verb-regexp
  252. "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a écrit\\|schreef\\|escribió"
  253. "Regular expression matching the verb used in an attribution line."
  254. :version "22.1"
  255. :type 'string
  256. :group 'gnus-outlook-deuglify)
  257. (defcustom gnus-outlook-deuglify-attrib-end-regexp
  258. ": *\\|\\.\\.\\."
  259. "Regular expression matching the end of an attribution line."
  260. :version "22.1"
  261. :type 'string
  262. :group 'gnus-outlook-deuglify)
  263. (defcustom gnus-outlook-display-hook nil
  264. "A hook called after an deuglified article has been prepared.
  265. It is run after `gnus-article-prepare-hook'."
  266. :version "22.1"
  267. :type 'hook
  268. :group 'gnus-outlook-deuglify)
  269. ;; Functions
  270. (defun gnus-outlook-display-article-buffer ()
  271. "Redisplay current buffer or article buffer."
  272. (with-current-buffer (or gnus-article-buffer (current-buffer))
  273. ;; "Emulate" `gnus-article-prepare-display' without calling
  274. ;; it. Calling `gnus-article-prepare-display' on an already
  275. ;; prepared article removes all MIME parts. I'm unsure whether
  276. ;; this is a bug or not.
  277. (gnus-article-highlight t)
  278. (gnus-treat-article nil)
  279. (gnus-run-hooks 'gnus-article-prepare-hook
  280. 'gnus-outlook-display-hook)))
  281. ;;;###autoload
  282. (defun gnus-article-outlook-unwrap-lines (&optional nodisplay)
  283. "Unwrap lines that appear to be wrapped citation lines.
  284. You can control what lines will be unwrapped by frobbing
  285. `gnus-outlook-deuglify-unwrap-min' and `gnus-outlook-deuglify-unwrap-max',
  286. indicating the minimum and maximum length of an unwrapped citation line. If
  287. NODISPLAY is non-nil, don't redisplay the article buffer."
  288. (interactive "P")
  289. (let ((case-fold-search nil)
  290. (inhibit-read-only t)
  291. (cite-marks gnus-outlook-deuglify-cite-marks)
  292. (no-wrap gnus-outlook-deuglify-no-wrap-chars)
  293. (stop-chars gnus-outlook-deuglify-unwrap-stop-chars))
  294. (gnus-with-article-buffer
  295. (article-goto-body)
  296. (while (re-search-forward
  297. (concat
  298. "^\\([ \t" cite-marks "]*\\)"
  299. "\\([" cite-marks "].*[^\n " stop-chars "]\\)[ \t]?\n"
  300. "\\1\\([^\n " cite-marks no-wrap "]+.*\\)$")
  301. nil t)
  302. (let ((len12 (- (match-end 2) (match-beginning 1)))
  303. (len3 (- (match-end 3) (match-beginning 3))))
  304. (when (and (> len12 gnus-outlook-deuglify-unwrap-min)
  305. (< (+ len12 len3) gnus-outlook-deuglify-unwrap-max))
  306. (replace-match "\\1\\2 \\3")
  307. (goto-char (match-beginning 0)))))))
  308. (unless nodisplay (gnus-outlook-display-article-buffer)))
  309. (defun gnus-outlook-rearrange-article (attr-start)
  310. "Put the text from ATTR-START to the end of buffer at the top of the article buffer."
  311. ;; FIXME: 1. (*) text/plain ( ) text/html
  312. (let ((inhibit-read-only t)
  313. (cite-marks gnus-outlook-deuglify-cite-marks))
  314. (gnus-with-article-buffer
  315. (article-goto-body)
  316. ;; article does not start with attribution
  317. (unless (= (point) attr-start)
  318. (gnus-kill-all-overlays)
  319. (let ((cur (point))
  320. ;; before signature or end of buffer
  321. (to (if (gnus-article-search-signature)
  322. (point)
  323. (point-max))))
  324. ;; handle the case where the full quote is below the
  325. ;; signature
  326. (when (< to attr-start)
  327. (setq to (point-max)))
  328. (save-excursion
  329. (narrow-to-region attr-start to)
  330. (goto-char attr-start)
  331. (forward-line)
  332. (unless (looking-at ">")
  333. (message-indent-citation (point) (point-max) 'yank-only)
  334. (goto-char (point-max))
  335. (newline)
  336. (setq to (point-max)))
  337. (widen))
  338. (transpose-regions cur attr-start attr-start to))))))
  339. ;; John Doe <john.doe@some.domain> wrote in message
  340. ;; news:a87usw8$dklsssa$2@some.news.server...
  341. (defun gnus-outlook-repair-attribution-outlook ()
  342. "Repair a broken attribution line (Outlook)."
  343. (let ((case-fold-search nil)
  344. (inhibit-read-only t)
  345. (cite-marks gnus-outlook-deuglify-cite-marks))
  346. (gnus-with-article-buffer
  347. (article-goto-body)
  348. (when (re-search-forward
  349. (concat "^\\([^" cite-marks "].+\\)"
  350. "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\)"
  351. "\\(.*\n?[^\n" cite-marks "].*\\)?"
  352. "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
  353. nil t)
  354. (gnus-kill-all-overlays)
  355. (replace-match "\\1\\2\\4")
  356. (match-beginning 0)))))
  357. ;; ----- Original Message -----
  358. ;; From: "John Doe" <john.doe@some.domain>
  359. ;; To: "Doe Foundation" <info@doefnd.org>
  360. ;; Sent: Monday, November 19, 2001 12:13 PM
  361. ;; Subject: More Doenuts
  362. (defun gnus-outlook-repair-attribution-block ()
  363. "Repair a big broken attribution block."
  364. (let ((case-fold-search nil)
  365. (inhibit-read-only t)
  366. (cite-marks gnus-outlook-deuglify-cite-marks))
  367. (gnus-with-article-buffer
  368. (article-goto-body)
  369. (when (re-search-forward
  370. (concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n"
  371. "[^\n:]+:[ \t]*\\([^\n]+\\)\n"
  372. "\\([^\n:]+:[ \t]*[^\n]+\n\\)+")
  373. nil t)
  374. (gnus-kill-all-overlays)
  375. (replace-match "\\1 wrote:\n")
  376. (match-beginning 0)))))
  377. ;; On Wed, 16 Jan 2002 23:23:30 +0100, John Doe <john.doe@some.domain> wrote:
  378. (defun gnus-outlook-repair-attribution-other ()
  379. "Repair a broken attribution line (other user agents than Outlook)."
  380. (let ((case-fold-search nil)
  381. (inhibit-read-only t)
  382. (cite-marks gnus-outlook-deuglify-cite-marks))
  383. (gnus-with-article-buffer
  384. (article-goto-body)
  385. (when (re-search-forward
  386. (concat "^\\("gnus-outlook-deuglify-attrib-cut-regexp"\\)?"
  387. "\\([^" cite-marks "].+\\)\n\\([^\n" cite-marks "].*\\)?"
  388. "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\).*"
  389. "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
  390. nil t)
  391. (gnus-kill-all-overlays)
  392. (replace-match "\\4 \\5\\6\\7")
  393. (match-beginning 0)))))
  394. ;;;###autoload
  395. (defun gnus-article-outlook-repair-attribution (&optional nodisplay)
  396. "Repair a broken attribution line.
  397. If NODISPLAY is non-nil, don't redisplay the article buffer."
  398. (interactive "P")
  399. (let ((attrib-start
  400. (or
  401. (gnus-outlook-repair-attribution-other)
  402. (gnus-outlook-repair-attribution-block)
  403. (gnus-outlook-repair-attribution-outlook))))
  404. (unless nodisplay (gnus-outlook-display-article-buffer))
  405. attrib-start))
  406. (defun gnus-article-outlook-rearrange-citation (&optional nodisplay)
  407. "Repair broken citations.
  408. If NODISPLAY is non-nil, don't redisplay the article buffer."
  409. (interactive "P")
  410. (let ((attrib-start (gnus-article-outlook-repair-attribution 'nodisplay)))
  411. ;; rearrange citations if an attribution line has been recognized
  412. (if attrib-start
  413. (gnus-outlook-rearrange-article attrib-start)))
  414. (unless nodisplay (gnus-outlook-display-article-buffer)))
  415. ;;;###autoload
  416. (defun gnus-outlook-deuglify-article (&optional nodisplay)
  417. "Full deuglify of broken Outlook (Express) articles.
  418. Treat dumbquotes, unwrap lines, repair attribution and rearrange citation. If
  419. NODISPLAY is non-nil, don't redisplay the article buffer."
  420. (interactive "P")
  421. ;; apply treatment of dumb quotes
  422. (gnus-article-treat-dumbquotes)
  423. ;; repair wrapped cited lines
  424. (gnus-article-outlook-unwrap-lines 'nodisplay)
  425. ;; repair attribution line and rearrange citation.
  426. (gnus-article-outlook-rearrange-citation 'nodisplay)
  427. (unless nodisplay (gnus-outlook-display-article-buffer)))
  428. ;;;###autoload
  429. (defun gnus-article-outlook-deuglify-article ()
  430. "Deuglify broken Outlook (Express) articles and redisplay."
  431. (interactive)
  432. (gnus-outlook-deuglify-article nil))
  433. (provide 'deuglify)
  434. ;; Local Variables:
  435. ;; coding: iso-8859-1
  436. ;; End:
  437. ;;; deuglify.el ends here