tildify.el 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. ;;; tildify.el --- adding hard spaces into texts -*- lexical-binding: t -*-
  2. ;; Copyright (C) 1997-2015 Free Software Foundation, Inc.
  3. ;; Author: Milan Zamazal <pdm@zamazal.org>
  4. ;; Michal Nazarewicz <mina86@mina86.com>
  5. ;; Version: 4.6.1
  6. ;; Keywords: text, TeX, SGML, wp
  7. ;; This file is part of GNU Emacs.
  8. ;; GNU Emacs is free software: you can redistribute it and/or modify
  9. ;; it under the terms of the GNU General Public License as published by
  10. ;; the Free Software Foundation, either version 3 of the License, or
  11. ;; (at your option) any later version.
  12. ;; GNU Emacs is distributed in the hope that it will be useful,
  13. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;; GNU General Public License for more details.
  16. ;; You should have received a copy of the GNU General Public License
  17. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  18. ;;; Commentary:
  19. ;; This package can be typically used for adding forgotten tildes in TeX
  20. ;; sources or adding `&nbsp;' sequences in SGML (e.g. HTML) texts.
  21. ;;
  22. ;; For example, the Czech orthography requires avoiding one letter
  23. ;; prepositions at line endings. So they should be connected with the
  24. ;; following words by a tilde. Some users forget to do this all the
  25. ;; time. The purpose of this program is to check the text and suggest
  26. ;; adding of missing tildes on some places. It works in a similar
  27. ;; manner to `query-replace-regexp'.
  28. ;;
  29. ;; The functionality of this program is actually performing query
  30. ;; replace on certain regions, but for historical reasons explained
  31. ;; above it is called `tildify'.
  32. ;;
  33. ;; The default variable settings are suited for Czech, so do not try to
  34. ;; understand them if you are not familiar with Czech grammar and spelling.
  35. ;;
  36. ;; The algorithm was inspired by Petr Olšák's program `vlna'. Abilities of
  37. ;; `tildify.el' are a little limited; if you have improvement suggestions, let
  38. ;; me know.
  39. ;;; Code:
  40. ;;; *** User configuration variables ***
  41. (defgroup tildify nil
  42. "Add hard spaces or other text fragments to text buffers."
  43. :version "21.1"
  44. :group 'wp)
  45. (defcustom tildify-pattern
  46. "\\(?:[,:;(][ \t]*[a]\\|\\<[AIKOSUVZikosuvz]\\)\\([ \t]+\\|[ \t]*\n[ \t]*\\)\\(?:\\w\\|[([{\\]\\|<[a-zA-Z]\\)"
  47. "A pattern specifying where to insert hard spaces.
  48. `tildify-buffer' function will replace first capturing group of the regexp with
  49. a hard space (as defined by `tildify-space-string' variable). (Hint: \\(…\\)
  50. non-capturing groups can be used for grouping prior to the part of the regexp
  51. matching the white space). The pattern is matched case-sensitive regardless of
  52. the value of `case-fold-search' setting."
  53. :version "25.1"
  54. :group 'tildify
  55. :type 'string
  56. :safe t)
  57. (defcustom tildify-pattern-alist ()
  58. "Alist specifying where to insert hard spaces.
  59. Each alist item is of the form (MAJOR-MODE REGEXP NUMBER) or
  60. \(MAJOR-MODE . SYMBOL).
  61. MAJOR-MODE defines major mode, for which the item applies. It can be either:
  62. - a symbol equal to the major mode of the buffer to be fixed
  63. - t for default item, this applies to all major modes not defined in another
  64. alist item
  65. REGEXP is a regular expression matching the part of a text, where a hard space
  66. is missing. The regexp is always case sensitive, regardless of the current
  67. `case-fold-search' setting.
  68. NUMBER defines the number of the REGEXP subexpression which should be replaced
  69. by the hard space character.
  70. The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this
  71. mode, the item for the mode SYMBOL is looked up in the alist instead."
  72. :group 'tildify
  73. :type '(repeat (cons :tag "Entry for major mode"
  74. (choice (const :tag "Default" t)
  75. (symbol :tag "Major mode"))
  76. (choice (list :tag "Regexp"
  77. regexp
  78. (integer :tag "Group "))
  79. (symbol :tag "Like other")))))
  80. (make-obsolete-variable 'tildify-pattern-alist 'tildify-pattern "25.1")
  81. (defcustom tildify-space-string "\u00A0"
  82. "Representation of a hard (a.k.a. no-break) space in current major mode.
  83. Used by `tildify-buffer' in places where space is required but line
  84. cannot be broken. For example \"~\" for TeX or \"&#160;\" for SGML,
  85. HTML and XML modes. A no-break space Unicode character (\"\\u00A0\")
  86. might be used for other modes if compatible encoding is used.
  87. If nil, current major mode has no way to represent a hard space."
  88. :version "25.1"
  89. :group 'tildify
  90. :type '(choice (const :tag "Space character (no hard-space representation)"
  91. " ")
  92. (const :tag "No-break space (U+00A0)" "\u00A0")
  93. (string :tag "Custom string"))
  94. :safe t)
  95. (defcustom tildify-string-alist ()
  96. "Alist specifying what is a hard space in the current major mode.
  97. Each alist item is of the form (MAJOR-MODE . STRING) or
  98. \(MAJOR-MODE . SYMBOL).
  99. MAJOR-MODE defines major mode, for which the item applies. It can be either:
  100. - a symbol equal to the major mode of the buffer to be fixed
  101. - t for default item, this applies to all major modes not defined in another
  102. alist item
  103. STRING defines the hard space, which is inserted at places defined by
  104. `tildify-pattern'. For example it can be \"~\" for TeX or \"&nbsp;\" for SGML.
  105. The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this
  106. mode, the item for the mode SYMBOL is looked up in the alist instead."
  107. :group 'tildify
  108. :type '(repeat (cons :tag "Entry for major mode"
  109. (choice (const :tag "Default" t)
  110. (symbol :tag "Major mode"))
  111. (choice (const :tag "No-break space (U+00A0)" "\u00A0")
  112. (string :tag "String ")
  113. (symbol :tag "Like other")))))
  114. (make-obsolete-variable 'tildify-string-alist
  115. 'tildify-space-string "25.1")
  116. (defcustom tildify-foreach-region-function
  117. 'tildify--deprecated-ignore-evironments
  118. "A function calling a callback on portions of the buffer to tildify.
  119. The function is called from `tildify-buffer' function with three arguments: FUNC
  120. BEG END. FUNC is a callback accepting two arguments -- REG-BEG REG-END --
  121. specifying a portion of buffer to operate on.
  122. The BEG and END arguments may be used to limit portion of the buffer being
  123. scanned, but the `tildify-foreach-region-function' is not required to make use
  124. of them. IT must, however, terminate as soon as FUNC returns nil.
  125. For example, if `tildify-buffer' function should operate on the whole buffer,
  126. a simple pass through function could be used:
  127. (setq-local tildify-foreach-region-function
  128. (lambda (cb beg end) (funcall cb beg end)))
  129. or better still:
  130. (setq-local tildify-foreach-region-function \\='funcall)
  131. See `tildify-foreach-ignore-environments' function for other ways to use the
  132. variable."
  133. :version "25.1"
  134. :group 'tildify
  135. :type 'function)
  136. (defcustom tildify-ignored-environments-alist ()
  137. "Alist specifying ignored structured text environments.
  138. Parts of text defined in this alist are skipped without performing hard space
  139. insertion on them. These setting allow skipping text parts like verbatim or
  140. math environments in TeX or preformatted text in SGML.
  141. Each list element is of the form
  142. (MAJOR-MODE (BEG-REGEX . END-REGEX) (BEG-REGEX . END-REGEX) ... )
  143. MAJOR-MODE defines major mode, for which the item applies. It can be either:
  144. - a symbol equal to the major mode of the buffer to be fixed
  145. - t for default item, this applies to all major modes not defined in another
  146. alist item
  147. See `tildify-foreach-ignore-environments' function for description of BEG-REGEX
  148. and END-REGEX."
  149. :group 'tildify
  150. :type '(repeat
  151. (cons :tag "Entry for major mode"
  152. (choice (const :tag "Default" t)
  153. (symbol :tag "Major mode"))
  154. (choice
  155. (const :tag "None")
  156. (repeat
  157. :tag "Environments"
  158. (cons :tag "Regexp pair"
  159. (regexp :tag "Open ")
  160. (choice :tag "Close"
  161. (regexp :tag "Regexp")
  162. (list :tag "Regexp and groups (concatenated)"
  163. (choice (regexp :tag "Regexp")
  164. (integer :tag "Group "))))))
  165. (symbol :tag "Like other")))))
  166. (make-obsolete-variable 'tildify-ignored-environments-alist
  167. 'tildify-foreach-region-function "25.1")
  168. ;;; *** Interactive functions ***
  169. ;;;###autoload
  170. (defun tildify-region (beg end &optional dont-ask)
  171. "Add hard spaces in the region between BEG and END.
  172. See variables `tildify-pattern', `tildify-space-string', and
  173. `tildify-ignored-environments-alist' for information about configuration
  174. parameters.
  175. This function performs no refilling of the changed text.
  176. If DONT-ASK is set, or called interactively with prefix argument, user
  177. won't be prompted for confirmation of each substitution."
  178. (interactive "*rP")
  179. (let (case-fold-search (count 0) (ask (not dont-ask)))
  180. (tildify--foreach-region
  181. (lambda (beg end)
  182. (let ((aux (tildify-tildify beg end ask)))
  183. (setq count (+ count (car aux)))
  184. (if (not (eq (cdr aux) 'force))
  185. (cdr aux)
  186. (setq ask nil)
  187. t)))
  188. beg end)
  189. (message "%d spaces replaced." count)))
  190. ;;;###autoload
  191. (defun tildify-buffer (&optional dont-ask)
  192. "Add hard spaces in the current buffer.
  193. See variables `tildify-pattern', `tildify-space-string', and
  194. `tildify-ignored-environments-alist' for information about configuration
  195. parameters.
  196. This function performs no refilling of the changed text.
  197. If DONT-ASK is set, or called interactively with prefix argument, user
  198. won't be prompted for confirmation of each substitution."
  199. (interactive "*P")
  200. (tildify-region (point-min) (point-max) dont-ask))
  201. ;;; *** Auxiliary functions ***
  202. (defun tildify--pick-alist-entry (mode-alist &optional mode)
  203. "Return alist item for the MODE-ALIST in the current major MODE."
  204. (let ((alist (cdr (or (assoc (or mode major-mode) mode-alist)
  205. (assoc t mode-alist)))))
  206. (if (and alist
  207. (symbolp alist))
  208. (tildify--pick-alist-entry mode-alist alist)
  209. alist)))
  210. (make-obsolete 'tildify--pick-alist-entry
  211. "it should not be used in new code." "25.1")
  212. (defun tildify--deprecated-ignore-evironments (callback beg end)
  213. "Call CALLBACK on regions between BEG and END.
  214. Call CALLBACK on each region outside of environment to ignore. Stop scanning
  215. the region as soon as CALLBACK returns nil. Environments to ignore are
  216. defined by deprecated `tildify-ignored-environments-alist'. CALLBACK may be
  217. called on portions of the buffer outside of [BEG END)."
  218. (let ((pairs (tildify--pick-alist-entry tildify-ignored-environments-alist)))
  219. (if pairs
  220. (tildify-foreach-ignore-environments pairs callback beg end)
  221. (funcall callback beg end))))
  222. (make-obsolete 'tildify--deprecated-ignore-evironments
  223. "it should not be used in new code." "25.1")
  224. (defun tildify-foreach-ignore-environments (pairs callback _beg end)
  225. "Outside of environments defined by PAIRS call CALLBACK.
  226. PAIRS is a list of (BEG-REGEX . END-REGEX) cons. BEG-REGEX is a regexp matching
  227. beginning of a text part to be skipped. END-REGEX defines end of the
  228. corresponding text part and can be either:
  229. - a regexp matching the end of the skipped text part
  230. - a list of regexps and numbers, which will compose the ending regexp by
  231. concatenating themselves, while replacing the numbers with corresponding
  232. subexpressions of BEG-REGEX (this is used to solve cases like
  233. \\\\verb<character> in TeX).
  234. CALLBACK is a function accepting two arguments -- REG-BEG and REG-END -- that
  235. will be called for portions of the buffer outside of the environments defined by
  236. PAIRS regexes.
  237. The function will return as soon as CALLBACK returns nil or point goes past END.
  238. CALLBACK may be called on portions of the buffer outside of [BEG END); in fact
  239. BEG argument is ignored.
  240. This function is meant to be used to set `tildify-foreach-region-function'
  241. variable. For example, for an XML file one might use:
  242. (setq-local tildify-foreach-region-function
  243. (apply-partially \\='tildify-foreach-ignore-environments
  244. \\='((\"<! *--\" . \"-- *>\") (\"<\" . \">\"))))"
  245. (let ((beg-re (concat "\\(?:" (mapconcat 'car pairs "\\)\\|\\(?:") "\\)"))
  246. p end-re)
  247. (save-excursion
  248. (save-restriction
  249. (widen)
  250. (goto-char (point-min))
  251. (while (and (< (setq p (point)) end)
  252. (if (setq end-re (tildify--find-env beg-re pairs))
  253. (and (funcall callback p (match-beginning 0))
  254. (< (point) end)
  255. (re-search-forward end-re nil t))
  256. (funcall callback p end)
  257. nil)))))))
  258. (defun tildify--foreach-region (callback beg end)
  259. "Call CALLBACK on portions of the buffer between BEG and END.
  260. Which portions to call CALLBACK on is determined by
  261. `tildify-foreach-region-function' variable. This function merely makes sure
  262. CALLBACK is not called with portions of the buffer outside of [BEG END)."
  263. (let ((func (lambda (reg-beg reg-end)
  264. (setq reg-beg (max reg-beg beg) reg-end (min reg-end end))
  265. (and (or (>= reg-beg reg-end)
  266. (funcall callback reg-beg reg-end))
  267. (< reg-end end)))))
  268. (funcall tildify-foreach-region-function func beg end)))
  269. (defun tildify--find-env (regexp pairs)
  270. "Find environment using REGEXP.
  271. Return regexp for the end of the environment found in PAIRS or nil if
  272. no environment was found."
  273. ;; Find environment
  274. (when (re-search-forward regexp nil t)
  275. (save-match-data
  276. (let ((match (match-string 0)))
  277. (while (not (eq (string-match (caar pairs) match) 0))
  278. (setq pairs (cdr pairs)))
  279. (let ((expression (cdar pairs)))
  280. (if (stringp expression)
  281. expression
  282. (mapconcat
  283. (lambda (expr)
  284. (if (stringp expr)
  285. expr
  286. (regexp-quote (match-string expr match))))
  287. expression
  288. "")))))))
  289. (defun tildify-tildify (beg end ask)
  290. "Add tilde characters in the region between BEG and END.
  291. This function does not do any further checking except of for comments and
  292. macros.
  293. If ASK is nil, perform replace without asking user for confirmation.
  294. Returns (count . response) cons where count is number of string
  295. replacements done and response is one of symbols: t (all right), nil
  296. (quit), force (replace without further questions)."
  297. (save-excursion
  298. (goto-char beg)
  299. (let ((regexp tildify-pattern)
  300. (match-number 1)
  301. (tilde (or (tildify--pick-alist-entry tildify-string-alist)
  302. tildify-space-string))
  303. (end-marker (copy-marker end))
  304. answer
  305. bad-answer
  306. replace
  307. quit
  308. (message-log-max nil)
  309. (count 0))
  310. ;; For the time being, tildify-pattern-alist overwrites tildify-pattern
  311. (let ((alist (tildify--pick-alist-entry tildify-pattern-alist)))
  312. (when alist
  313. (setq regexp (car alist) match-number (cadr alist))))
  314. (while (and (not quit)
  315. (re-search-forward regexp (marker-position end-marker) t))
  316. (when (or (not ask)
  317. (progn
  318. (goto-char (match-beginning match-number))
  319. (setq bad-answer t)
  320. (while bad-answer
  321. (setq bad-answer nil)
  322. (message "Replace? (yn!q) ")
  323. (setq answer (read-event)))
  324. (cond
  325. ((or (eq answer ?y) (eq answer ? ) (eq answer 'space))
  326. (setq replace t))
  327. ((eq answer ?n)
  328. (setq replace nil))
  329. ((eq answer ?!)
  330. (setq replace t
  331. ask nil))
  332. ((eq answer ?q)
  333. (setq replace nil
  334. quit t))
  335. (t
  336. (message "Press y, n, !, or q.")
  337. (setq bad-answer t)))
  338. replace))
  339. (replace-match tilde t t nil match-number)
  340. (setq count (1+ count))))
  341. ;; Return value
  342. (cons count (cond (quit nil)
  343. ((not ask) 'force)
  344. (t t))))))
  345. ;;; *** Tildify Mode ***
  346. (defcustom tildify-space-pattern "[,:;(][ \t]*[a]\\|\\<[AIKOSUVWZikosuvwz]"
  347. "Pattern specifying whether to insert a hard space at point.
  348. If the pattern matches `looking-back', a hard space needs to be inserted instead
  349. of a space at point. The regexp is always case sensitive, regardless of the
  350. current `case-fold-search' setting."
  351. :version "25.1"
  352. :group 'tildify
  353. :type 'string)
  354. (defcustom tildify-space-predicates '(tildify-space-region-predicate)
  355. "A list of predicate functions for `tildify-space' function."
  356. :version "25.1"
  357. :group 'tildify
  358. :type '(repeat 'function))
  359. (defcustom tildify-double-space-undos t
  360. "Weather `tildify-space' should undo hard space when space is typed again."
  361. :version "25.1"
  362. :group 'tildify
  363. :type 'boolean)
  364. ;;;###autoload
  365. (defun tildify-space ()
  366. "Convert space before point into a hard space if the context is right.
  367. If
  368. * character before point is a space character,
  369. * character before that has \"w\" character syntax (i.e. it's a word
  370. constituent),
  371. * `tildify-space-pattern' matches when `looking-back' (no more than 10
  372. characters) from before the space character, and
  373. * all predicates in `tildify-space-predicates' return non-nil,
  374. replace the space character with value of `tildify-space-string' and
  375. return t.
  376. Otherwise, if
  377. * `tildify-double-space-undos' variable is non-nil,
  378. * character before point is a space character, and
  379. * text before that is a hard space as defined by
  380. `tildify-space-string' variable,
  381. remove the hard space and leave only the space character.
  382. This function is meant to be used as a `post-self-insert-hook'."
  383. (interactive)
  384. (let* ((p (point)) (p-1 (1- p)) (n (- p (point-min)))
  385. (l (length tildify-space-string)) (l+1 (1+ l))
  386. case-fold-search)
  387. (when (and (> n 2) (eq (preceding-char) ?\s))
  388. (cond
  389. ((and (eq (char-syntax (char-before p-1)) ?w)
  390. (save-excursion
  391. (goto-char p-1)
  392. (looking-back tildify-space-pattern (max (point-min) (- p 10))))
  393. (run-hook-with-args-until-failure 'tildify-space-predicates))
  394. (delete-char -1)
  395. (insert tildify-space-string)
  396. t)
  397. ((and tildify-double-space-undos
  398. (> n l+1)
  399. (string-equal tildify-space-string
  400. (buffer-substring (- p l+1) p-1)))
  401. (goto-char p-1)
  402. (delete-char (- l))
  403. (goto-char (1+ (point)))
  404. nil)))))
  405. (defun tildify-space-region-predicate ()
  406. "Check whether character before point should be tildified.
  407. Based on `tildify-foreach-region-function', check whether character before,
  408. which is assumed to be a space character, should be replaced with a hard space."
  409. (catch 'found
  410. (tildify--foreach-region (lambda (_b _e) (throw 'found t)) (1- (point)) (point))))
  411. ;;;###autoload
  412. (define-minor-mode tildify-mode
  413. "Adds electric behaviour to space character.
  414. When space is inserted into a buffer in a position where hard space is required
  415. instead (determined by `tildify-space-pattern' and `tildify-space-predicates'),
  416. that space character is replaced by a hard space specified by
  417. `tildify-space-string'. Converting of the space is done by `tildify-space'.
  418. When `tildify-mode' is enabled, if `tildify-string-alist' specifies a hard space
  419. representation for current major mode, the `tildify-space-string' buffer-local
  420. variable will be set to the representation."
  421. nil " ~" nil
  422. (when tildify-mode
  423. (let ((space (tildify--pick-alist-entry tildify-string-alist)))
  424. (if (not (string-equal " " (or space tildify-space-string)))
  425. (when space
  426. (setq tildify-space-string space))
  427. (message (eval-when-compile
  428. (concat "Hard space is a single space character, tildify-"
  429. "mode won't have any effect, disabling.")))
  430. (setq tildify-mode nil))))
  431. (if tildify-mode
  432. (add-hook 'post-self-insert-hook 'tildify-space nil t)
  433. (remove-hook 'post-self-insert-hook 'tildify-space t)))
  434. ;;; *** Announce ***
  435. (provide 'tildify)
  436. ;;; tildify.el ends here