cc-awk.el 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155
  1. ;;; cc-awk.el --- AWK specific code within cc-mode.
  2. ;; Copyright (C) 1988, 1994, 1996, 2000-2015 Free Software Foundation,
  3. ;; Inc.
  4. ;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el)
  5. ;; Maintainer: emacs-devel@gnu.org
  6. ;; Keywords: AWK, cc-mode, unix, languages
  7. ;; Package: cc-mode
  8. ;; This file is part of GNU Emacs.
  9. ;; GNU Emacs is free software: you can redistribute it and/or modify
  10. ;; it under the terms of the GNU General Public License as published by
  11. ;; the Free Software Foundation, either version 3 of the License, or
  12. ;; (at your option) any later version.
  13. ;; GNU Emacs is distributed in the hope that it will be useful,
  14. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. ;; GNU General Public License for more details.
  17. ;; You should have received a copy of the GNU General Public License
  18. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  19. ;;; Commentary:
  20. ;; This file contains (most of) the adaptations to cc-mode required for the
  21. ;; integration of AWK Mode.
  22. ;; It is organized thusly, the sections being separated by page breaks:
  23. ;; 1. The AWK Mode syntax table.
  24. ;; 2. Regular expressions for analyzing AWK code.
  25. ;; 3. Indentation calculation stuff ("c-awk-NL-prop text-property").
  26. ;; 4. Syntax-table property/font-locking stuff, including the
  27. ;; font-lock-keywords setting.
  28. ;; 5. The AWK Mode before/after-change-functions.
  29. ;; 6. AWK Mode specific versions of commands like beginning-of-defun.
  30. ;; The AWK Mode keymap, abbreviation table, and the mode function itself are
  31. ;; in cc-mode.el.
  32. ;;; Code:
  33. (eval-when-compile
  34. (let ((load-path
  35. (if (and (boundp 'byte-compile-dest-file)
  36. (stringp byte-compile-dest-file))
  37. (cons (file-name-directory byte-compile-dest-file) load-path)
  38. load-path)))
  39. (load "cc-bytecomp" nil t)))
  40. (cc-require 'cc-defs)
  41. ;; Silence the byte compiler.
  42. (cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use.
  43. (cc-bytecomp-defvar c-new-BEG)
  44. (cc-bytecomp-defvar c-new-END)
  45. ;; Some functions in cc-engine that are used below. There's a cyclic
  46. ;; dependency so it can't be required here. (Perhaps some functions
  47. ;; could be moved to cc-engine to avoid it.)
  48. (cc-bytecomp-defun c-backward-token-1)
  49. (cc-bytecomp-defun c-beginning-of-statement-1)
  50. (cc-bytecomp-defun c-backward-sws)
  51. (cc-bytecomp-defun c-forward-sws)
  52. (defvar awk-mode-syntax-table
  53. (let ((st (make-syntax-table)))
  54. (modify-syntax-entry ?\\ "\\" st)
  55. (modify-syntax-entry ?\n "> " st)
  56. (modify-syntax-entry ?\r "> " st)
  57. (modify-syntax-entry ?\f "> " st)
  58. (modify-syntax-entry ?\# "< " st)
  59. ;; / can delimit regexes or be a division operator. By default we assume
  60. ;; that it is a division sign, and fix the regexp operator cases with
  61. ;; `font-lock-syntactic-keywords'.
  62. (modify-syntax-entry ?/ "." st) ; ACM 2002/4/27.
  63. (modify-syntax-entry ?* "." st)
  64. (modify-syntax-entry ?+ "." st)
  65. (modify-syntax-entry ?- "." st)
  66. (modify-syntax-entry ?= "." st)
  67. (modify-syntax-entry ?% "." st)
  68. (modify-syntax-entry ?< "." st)
  69. (modify-syntax-entry ?> "." st)
  70. (modify-syntax-entry ?& "." st)
  71. (modify-syntax-entry ?| "." st)
  72. (modify-syntax-entry ?_ "_" st)
  73. (modify-syntax-entry ?\' "." st)
  74. st)
  75. "Syntax table in use in AWK Mode buffers.")
  76. ;; This section defines regular expressions used in the analysis of AWK code.
  77. ;; N.B. In the following regexps, an EOL is either \n OR \r. This is because
  78. ;; Emacs has in the past used \r to mark hidden lines in some fashion (and
  79. ;; maybe still does).
  80. (defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
  81. ;; Matches any escaped (with \) character-pair, including an escaped newline.
  82. (defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)")
  83. ;; Matches any escaped (with \) character-pair, apart from an escaped newline.
  84. (defconst c-awk-comment-without-nl "#.*")
  85. ;; Matches an AWK comment, not including the terminating NL (if any). Note
  86. ;; that the "enclosing" (elisp) regexp must ensure the # is real.
  87. (defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
  88. ;; Matches a newline, or the end of buffer.
  89. ;; "Space" regular expressions.
  90. (eval-and-compile
  91. (defconst c-awk-escaped-nl "\\\\[\n\r]"))
  92. ;; Matches an escaped newline.
  93. (eval-and-compile
  94. (defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*")))
  95. ;; Matches a possibly empty sequence of escaped newlines. Used in
  96. ;; awk-font-lock-keywords.
  97. ;; (defconst c-awk-escaped-nls*-with-space*
  98. ;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
  99. ;; The above RE was very slow. It's runtime was doubling with each additional
  100. ;; space :-( Reformulate it as below:
  101. (eval-and-compile
  102. (defconst c-awk-escaped-nls*-with-space*
  103. (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*")))
  104. ;; Matches a possibly empty sequence of escaped newlines with optional
  105. ;; interspersed spaces and tabs. Used in awk-font-lock-keywords.
  106. (defconst c-awk-blank-or-comment-line-re
  107. (concat "[ \t]*\\(#\\|\\\\?$\\)"))
  108. ;; Matche (the tail of) a line containing at most either a comment or an
  109. ;; escaped EOL.
  110. ;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
  111. (defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
  112. ;; Matches an underline NOT followed by ".
  113. (defconst c-awk-harmless-char-re "[^_#/\"{}();\\\\\n\r]")
  114. ;; Matches any character not significant in the state machine applying
  115. ;; syntax-table properties to "s and /s.
  116. (defconst c-awk-harmless-string*-re
  117. (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
  118. ;; Matches a (possibly empty) sequence of characters insignificant in the
  119. ;; state machine applying syntax-table properties to "s and /s.
  120. (defconst c-awk-harmless-string*-here-re
  121. (concat "\\=" c-awk-harmless-string*-re))
  122. ;; Matches the (possibly empty) sequence of "insignificant" chars at point.
  123. (defconst c-awk-harmless-line-char-re "[^_#/\"\\\\\n\r]")
  124. ;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
  125. ;; localization string in gawk 3.1
  126. (defconst c-awk-harmless-line-string*-re
  127. (concat "\\(" c-awk-harmless-line-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
  128. ;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
  129. ;; #, or newlines.
  130. (defconst c-awk-harmless-line-re
  131. (concat c-awk-harmless-line-string*-re
  132. "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
  133. ;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped
  134. ;; " or /. "logical" means "possibly containing escaped newlines". A comment
  135. ;; is matched as part of the line even if it contains a " or a /. The End of
  136. ;; buffer is also an end of line.
  137. (defconst c-awk-harmless-lines+-here-re
  138. (concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
  139. ;; Matches a sequence of (at least one) \"harmless-line\" at point.
  140. ;; REGEXPS FOR AWK STRINGS.
  141. (defconst c-awk-string-ch-re "[^\"\\\n\r]")
  142. ;; Matches any character which can appear unescaped in a string.
  143. (defconst c-awk-string-innards-re
  144. (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
  145. ;; Matches the inside of an AWK string (i.e. without the enclosing quotes).
  146. (defconst c-awk-string-without-end-here-re
  147. (concat "\\=_?\"" c-awk-string-innards-re))
  148. ;; Matches an AWK string at point up to, but not including, any terminator.
  149. ;; A gawk 3.1+ string may look like _"localizable string".
  150. (defconst c-awk-possibly-open-string-re
  151. (concat "\"\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"
  152. "\\(\"\\|$\\|\\'\\)"))
  153. ;; REGEXPS FOR AWK REGEXPS.
  154. (defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
  155. ;; Matches any AWK regexp character which doesn't require special analysis.
  156. (defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
  157. ;; Matches a (possibly empty) sequence of escaped newlines.
  158. ;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character
  159. ;; list", and "[:alpha:]" inside a character list will be known as a
  160. ;; "character class". These terms for these things vary between regexp
  161. ;; descriptions .
  162. (defconst c-awk-regexp-char-class-re
  163. "\\[:[a-z]+:\\]")
  164. ;; Matches a character class spec (e.g. [:alpha:]).
  165. (defconst c-awk-regexp-char-list-re
  166. (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
  167. "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
  168. "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
  169. ;; Matches a regexp char list, up to (but not including) EOL if the ] is
  170. ;; missing.
  171. (defconst c-awk-regexp-innards-re
  172. (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re
  173. "\\|" c-awk-regexp-normal-re "\\)*"))
  174. ;; Matches the inside of an AWK regexp (i.e. without the enclosing /s)
  175. (defconst c-awk-regexp-without-end-re
  176. (concat "/" c-awk-regexp-innards-re))
  177. ;; Matches an AWK regexp up to, but not including, any terminating /.
  178. ;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
  179. ;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant
  180. ;; whether a '/' at the current position would by a regexp opener or a
  181. ;; division sign.
  182. (defconst c-awk-neutral-re
  183. ; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
  184. "\\([}@` \t]\\|\\+\\+\\|--\\|\\\\\\(.\\|[\n\r]\\)\\)")
  185. ;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /.
  186. ;; This is space/tab, close brace, an auto-increment/decrement operator or an
  187. ;; escaped character. Or one of the (invalid) characters @ or `. But NOT an
  188. ;; end of line (unless escaped).
  189. (defconst c-awk-neutrals*-re
  190. (concat "\\(" c-awk-neutral-re "\\)*"))
  191. ;; A (possibly empty) string of neutral characters (or character pairs).
  192. (defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
  193. ;; Matches a char which is a constituent of a variable or number, or a ket
  194. ;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to
  195. ;; \xff are "letters".
  196. (defconst c-awk-div-sign-re
  197. (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
  198. ;; Will match a piece of AWK buffer ending in / which is a division sign, in
  199. ;; a context where an immediate / would be a regexp bracket. It follows a
  200. ;; variable or number (with optional intervening "neutral" characters). This
  201. ;; will only work when there won't be a preceding " or / before the sought /
  202. ;; to foul things up.
  203. (defconst c-awk-non-arith-op-bra-re
  204. "[[\({&=:!><,?;'~|]")
  205. ;; Matches an opening BRAcket (of any sort), or any operator character
  206. ;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a
  207. ;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
  208. ;; and "--".
  209. (defconst c-awk-regexp-sign-re
  210. (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
  211. ;; Will match a piece of AWK buffer ending in / which is an opening regexp
  212. ;; bracket, in a context where an immediate / would be a division sign. This
  213. ;; will only work when there won't be a preceding " or / before the sought /
  214. ;; to foul things up.
  215. (defconst c-awk-pre-exp-alphanum-kwd-re
  216. (concat "\\(^\\|\\=\\|[^_\n\r]\\)\\<"
  217. (regexp-opt '("print" "return" "case") t)
  218. "\\>\\([^_\n\r]\\|$\\)"))
  219. ;; Matches all AWK keywords which can precede expressions (including
  220. ;; /regexp/).
  221. (defconst c-awk-kwd-regexp-sign-re
  222. (concat c-awk-pre-exp-alphanum-kwd-re c-awk-escaped-nls*-with-space* "/"))
  223. ;; Matches a piece of AWK buffer ending in <kwd> /, where <kwd> is a keyword
  224. ;; which can precede an expression.
  225. ;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon"
  226. (defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]")
  227. (defconst c-awk-non-/-syn-ws*-re
  228. (concat
  229. "\\(" c-awk-escaped-nls*-with-space*
  230. "\\(" c-awk-_-harmless-nonws-char-re "\\|"
  231. c-awk-non-eol-esc-pair-re "\\|"
  232. c-awk-possibly-open-string-re
  233. "\\)"
  234. "\\)*"))
  235. (defconst c-awk-space*-/-re (concat c-awk-escaped-nls*-with-space* "/"))
  236. ;; Matches optional whitespace followed by "/".
  237. (defconst c-awk-space*-regexp-/-re
  238. (concat c-awk-escaped-nls*-with-space* "\\s\""))
  239. ;; Matches optional whitespace followed by a "/" with string syntax (a matched
  240. ;; regexp delimiter).
  241. (defconst c-awk-space*-unclosed-regexp-/-re
  242. (concat c-awk-escaped-nls*-with-space* "\\s\|"))
  243. ;; Matches optional whitespace followed by a "/" with string fence syntax (an
  244. ;; unmatched regexp delimiter).
  245. ;; ACM, 2002/5/29:
  246. ;;
  247. ;; The next section of code is about determining whether or not an AWK
  248. ;; statement is complete or not. We use this to indent the following line.
  249. ;; The determination is pretty straightforward in C, where a statement ends
  250. ;; with either a ; or a }. Only "while" really gives any trouble there, since
  251. ;; it might be the end of a do-while. In AWK, on the other hand, semicolons
  252. ;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In
  253. ;; addition, we have the complexity of escaped EOLs. The core of this
  254. ;; analysis is in the middle of the function
  255. ;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down.
  256. ;;
  257. ;; To avoid continually repeating this expensive analysis, we "cache" its
  258. ;; result in a text-property, c-awk-NL-prop, whose value for a line is set on
  259. ;; the EOL (if any) which terminates that line. Should the property be
  260. ;; required for the very last line (which has no EOL), it is calculated as
  261. ;; required but not cached. The c-awk-NL-prop property should be thought of
  262. ;; as only really valid immediately after a buffer change, not a permanently
  263. ;; set property. (By contrast, the syntax-table text properties (set by an
  264. ;; after-change function) must be constantly updated for the mode to work
  265. ;; properly).
  266. ;;
  267. ;; This text property is also used for "syntactic whitespace" movement, this
  268. ;; being where the distinction between the values '$' and '}' is significant.
  269. ;;
  270. ;; The valid values for c-awk-NL-prop are:
  271. ;;
  272. ;; nil The property is not currently set for this line.
  273. ;; '#' There is NO statement on this line (at most a comment), and no open
  274. ;; statement from a previous line which could have been completed on this
  275. ;; line.
  276. ;; '{' There is an unfinished statement on this (or a previous) line which
  277. ;; doesn't require \s to continue onto another line, e.g. the line ends
  278. ;; with {, or the && operator, or "if (condition)". Note that even if the
  279. ;; newline is redundantly escaped, it remains a '{' line.
  280. ;; '\' There is an escaped newline at the end of this line and this '\' is
  281. ;; essential to the syntax of the program. (i.e. if it had been a
  282. ;; frivolous \, it would have been ignored and the line been given one of
  283. ;; the other property values.)
  284. ;; '$' A non-empty statement is terminated on the line by an EOL (a "virtual
  285. ;; semicolon"). This might be a content-free line terminating a statement
  286. ;; from the preceding (continued) line (which has property \).
  287. ;; '}' A statement, being the last thing (aside from ws/comments) is
  288. ;; explicitly terminated on this line by a closing brace (or sometimes a
  289. ;; semicolon).
  290. ;;
  291. ;; This set of values has been chosen so that the property's value on a line
  292. ;; is completely determined by the contents of the line and the property on
  293. ;; the previous line, EXCEPT for where a "while" might be the closing
  294. ;; statement of a do-while.
  295. (defun c-awk-after-if-for-while-condition-p (&optional do-lim)
  296. ;; Are we just after the ) in "if/for/while (<condition>)"?
  297. ;;
  298. ;; Note that the end of the ) in a do .... while (<condition>) doesn't
  299. ;; count, since the purpose of this routine is essentially to decide
  300. ;; whether to indent the next line.
  301. ;;
  302. ;; DO-LIM sets a limit on how far back we search for the "do" of a possible
  303. ;; do-while.
  304. ;;
  305. ;; This function might do hidden buffer changes.
  306. (and
  307. (eq (char-before) ?\))
  308. (save-excursion
  309. (let ((par-pos (c-safe (scan-lists (point) -1 0))))
  310. (when par-pos
  311. (goto-char par-pos) ; back over "(...)"
  312. (c-backward-token-1) ; BOB isn't a problem.
  313. (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)")
  314. (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while.
  315. (not (eq (c-beginning-of-statement-1 do-lim)
  316. 'beginning)))))))))
  317. (defun c-awk-after-function-decl-param-list ()
  318. ;; Are we just after the ) in "function foo (bar)" ?
  319. ;;
  320. ;; This function might do hidden buffer changes.
  321. (and (eq (char-before) ?\))
  322. (save-excursion
  323. (let ((par-pos (c-safe (scan-lists (point) -1 0))))
  324. (when par-pos
  325. (goto-char par-pos) ; back over "(...)"
  326. (c-backward-token-1) ; BOB isn't a problem
  327. (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>")
  328. (progn (c-backward-token-1)
  329. (looking-at "func\\(tion\\)?\\>"))))))))
  330. ;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code).
  331. (defun c-awk-after-continue-token ()
  332. ;; Are we just after a token which can be continued onto the next line without
  333. ;; a backslash?
  334. ;;
  335. ;; This function might do hidden buffer changes.
  336. (save-excursion
  337. (c-backward-token-1) ; FIXME 2002/10/27. What if this fails?
  338. (if (and (looking-at "[&|]") (not (bobp)))
  339. (backward-char)) ; c-backward-token-1 doesn't do this :-(
  340. (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>")))
  341. (defun c-awk-after-rbrace-or-statement-semicolon ()
  342. ;; Are we just after a } or a ; which closes a statement?
  343. ;; Be careful about ;s in for loop control bits. They don't count!
  344. ;;
  345. ;; This function might do hidden buffer changes.
  346. (or (eq (char-before) ?\})
  347. (and
  348. (eq (char-before) ?\;)
  349. (save-excursion
  350. (let ((par-pos (c-safe (scan-lists (point) -1 1))))
  351. (when par-pos
  352. (goto-char par-pos) ; go back to containing (
  353. (not (and (looking-at "(")
  354. (c-backward-token-1) ; BOB isn't a problem
  355. (looking-at "for\\>")))))))))
  356. (defun c-awk-back-to-contentful-text-or-NL-prop ()
  357. ;; Move back to just after the first found of either (i) an EOL which has
  358. ;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB.
  359. ;; We return either the value of c-awk-NL-prop (in case (i)) or nil.
  360. ;; Calling functions can best distinguish cases (ii) and (iii) with (bolp).
  361. ;;
  362. ;; Note that an escaped eol counts as whitespace here.
  363. ;;
  364. ;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
  365. ;; that the previous line contains an unterminated string (without \). In
  366. ;; this case, assume that the previous line's c-awk-NL-prop is a $.
  367. ;;
  368. ;; POINT MUST BE AT THE START OF A LINE when calling this function. This
  369. ;; is to ensure that the various backward-comment functions will work
  370. ;; properly.
  371. ;;
  372. ;; This function might do hidden buffer changes.
  373. (let ((nl-prop nil)
  374. bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call.
  375. (while ;; We are at a BOL here. Go back one line each iteration.
  376. (and
  377. (not (bobp))
  378. (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop)))
  379. (progn (setq bol-pos (c-point 'bopl))
  380. (setq bsws-pos (point))
  381. ;; N.B. the following function will not go back past an EOL if
  382. ;; there is an open string (without \) on the previous line.
  383. ;; If we find such, set the c-awk-NL-prop on it, too
  384. ;; (2004/3/29).
  385. (c-backward-syntactic-ws bol-pos)
  386. (or (/= (point) bsws-pos)
  387. (progn (setq nl-prop ?\$)
  388. (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop)
  389. nil)))
  390. ;; If we had a backslash at EOL, c-backward-syntactic-ws will
  391. ;; have gone backwards over it. Check the backslash was "real".
  392. (progn
  393. (if (looking-at "[ \t]*\\\\+$")
  394. (if (progn
  395. (end-of-line)
  396. (search-backward-regexp
  397. "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-)
  398. bol-pos t))
  399. (progn (end-of-line) ; escaped EOL.
  400. (backward-char)
  401. (c-backward-syntactic-ws bol-pos))
  402. (end-of-line))) ; The \ at eol is a fake.
  403. (bolp))))
  404. nl-prop))
  405. (defun c-awk-calculate-NL-prop-prev-line (&optional do-lim)
  406. ;; Calculate and set the value of the c-awk-NL-prop on the immediately
  407. ;; preceding EOL. This may also involve doing the same for several
  408. ;; preceding EOLs.
  409. ;;
  410. ;; NOTE that if the property was already set, we return it without
  411. ;; recalculation. (This is by accident rather than design.)
  412. ;;
  413. ;; Return the property which got set (or was already set) on the previous
  414. ;; line. Return nil if we hit BOB.
  415. ;;
  416. ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  417. ;;
  418. ;; This function might do hidden buffer changes.
  419. (save-excursion
  420. (save-match-data
  421. (beginning-of-line)
  422. (let* ((pos (point))
  423. (nl-prop (c-awk-back-to-contentful-text-or-NL-prop)))
  424. ;; We are either (1) at a BOL (with nl-prop containing the previous
  425. ;; line's c-awk-NL-prop) or (2) after contentful text on a line. At
  426. ;; the BOB counts as case (1), so we test next for bolp rather than
  427. ;; non-nil nl-prop.
  428. (when (not (bolp))
  429. (setq nl-prop
  430. (cond
  431. ;; Incomplete statement which doesn't require escaped EOL?
  432. ((or (c-awk-after-if-for-while-condition-p do-lim)
  433. (c-awk-after-function-decl-param-list)
  434. (c-awk-after-continue-token))
  435. ?\{)
  436. ;; Escaped EOL (where there's also something to continue)?
  437. ((and (looking-at "[ \t]*\\\\$")
  438. (not (c-awk-after-rbrace-or-statement-semicolon)))
  439. ?\\)
  440. ;; A statement was completed on this line. How?
  441. ((memq (char-before) '(?\; ?\})) ?\}) ; Real ; or }
  442. (t ?\$))) ; A virtual semicolon.
  443. (end-of-line)
  444. (c-put-char-property (point) 'c-awk-NL-prop nl-prop)
  445. (forward-line))
  446. ;; We are now at a (possibly empty) sequence of content-free lines.
  447. ;; Set c-awk-NL-prop on each of these lines's EOL.
  448. (while (< (point) pos) ; one content-free line each iteration.
  449. (cond ; recalculate nl-prop from previous line's value.
  450. ((memq nl-prop '(?\} ?\$ nil)) (setq nl-prop ?\#))
  451. ((eq nl-prop ?\\)
  452. (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\$)))
  453. ;; ?\# (empty line) and ?\{ (open stmt) don't change.
  454. )
  455. (forward-line)
  456. (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop))
  457. nl-prop))))
  458. (defun c-awk-get-NL-prop-prev-line (&optional do-lim)
  459. ;; Get the c-awk-NL-prop text-property from the previous line, calculating
  460. ;; it if necessary. Return nil if we're already at BOB.
  461. ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  462. ;;
  463. ;; This function might do hidden buffer changes.
  464. (if (bobp)
  465. nil
  466. (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)
  467. (c-awk-calculate-NL-prop-prev-line do-lim))))
  468. (defun c-awk-get-NL-prop-cur-line (&optional do-lim)
  469. ;; Get the c-awk-NL-prop text-property from the current line, calculating it
  470. ;; if necessary. (As a special case, the property doesn't get set on an
  471. ;; empty line at EOB (there's no position to set the property on), but the
  472. ;; function returns the property value an EOL would have got.)
  473. ;;
  474. ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  475. ;;
  476. ;; This function might do hidden buffer changes.
  477. (save-excursion
  478. (let ((extra-nl nil))
  479. (end-of-line) ; Necessary for the following test to work.
  480. (when (= (forward-line) 1) ; if we were on the last line....
  481. (insert-char ?\n 1) ; ...artificial eol is needed for comment detection.
  482. (setq extra-nl t))
  483. (prog1 (c-awk-get-NL-prop-prev-line do-lim)
  484. (if extra-nl (delete-char -1))))))
  485. (defsubst c-awk-prev-line-incomplete-p (&optional do-lim)
  486. ;; Is there an incomplete statement at the end of the previous line?
  487. ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  488. ;;
  489. ;; This function might do hidden buffer changes.
  490. (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{)))
  491. (defsubst c-awk-cur-line-incomplete-p (&optional do-lim)
  492. ;; Is there an incomplete statement at the end of the current line?
  493. ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  494. ;;
  495. ;; This function might do hidden buffer changes.
  496. (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{)))
  497. ;; NOTES ON "VIRTUAL SEMICOLONS"
  498. ;;
  499. ;; A "virtual semicolon" is what terminates a statement when there is no ;
  500. ;; or } to do the job. Like point, it is considered to lie _between_ two
  501. ;; characters. As from mid-March 2004, it is considered to lie just after
  502. ;; the last non-syntactic-whitespace character on the line; (previously, it
  503. ;; was considered an attribute of the EOL on the line). A real semicolon
  504. ;; never counts as a virtual one.
  505. (defun c-awk-at-vsemi-p (&optional pos)
  506. ;; Is there a virtual semicolon at POS (or POINT)?
  507. (save-excursion
  508. (let* (nl-prop
  509. (pos-or-point (progn (if pos (goto-char pos)) (point)))
  510. (bol (c-point 'bol)) (eol (c-point 'eol)))
  511. (c-awk-beginning-of-logical-line)
  512. ;; Next `while' goes round one logical line (ending in, e.g. "\\") per
  513. ;; iteration. Such a line is rare, and can only be an open string
  514. ;; ending in an escaped \.
  515. (while
  516. (progn
  517. ;; Next `while' goes over a division sign or /regexp/ per iteration.
  518. (while
  519. (and
  520. (< (point) eol)
  521. (progn
  522. (search-forward-regexp c-awk-non-/-syn-ws*-re eol)
  523. (looking-at c-awk-space*-/-re)))
  524. (cond
  525. ((looking-at c-awk-space*-regexp-/-re) ; /regexp/
  526. (forward-sexp))
  527. ((looking-at c-awk-space*-unclosed-regexp-/-re) ; Unclosed /regexp
  528. (condition-case nil
  529. (progn
  530. (forward-sexp)
  531. (backward-char)) ; Move to end of (logical) line.
  532. (error (end-of-line)))) ; Happens at EOB.
  533. (t ; division sign
  534. (c-forward-syntactic-ws)
  535. (forward-char))))
  536. (< (point) bol))
  537. (forward-line))
  538. (and (eq (point) pos-or-point)
  539. (progn
  540. (while (and (eq (setq nl-prop (c-awk-get-NL-prop-cur-line)) ?\\)
  541. (eq (forward-line) 0)
  542. (looking-at c-awk-blank-or-comment-line-re)))
  543. (eq nl-prop ?\$))))))
  544. (defun c-awk-vsemi-status-unknown-p ()
  545. ;; Are we unsure whether there is a virtual semicolon on the current line?
  546. ;; DO NOT under any circumstances attempt to calculate this; that would
  547. ;; defeat the (admittedly kludgy) purpose of this function, which is to
  548. ;; prevent an infinite recursion in c-beginning-of-statement-1 when point
  549. ;; starts at a `while' token.
  550. (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))
  551. (defun c-awk-clear-NL-props (beg end)
  552. ;; This function is run from before-change-hooks. It clears the
  553. ;; c-awk-NL-prop text property from beg to the end of the buffer (The END
  554. ;; parameter is ignored). This ensures that the indentation engine will
  555. ;; never use stale values for this property.
  556. ;;
  557. ;; This function might do hidden buffer changes.
  558. (save-restriction
  559. (widen)
  560. (c-clear-char-properties beg (point-max) 'c-awk-NL-prop)))
  561. (defun c-awk-unstick-NL-prop ()
  562. ;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without
  563. ;; this, a new newline inserted after an old newline (e.g. by C-j) would
  564. ;; inherit any c-awk-NL-prop from the old newline. This would be a Bad
  565. ;; Thing. This function's action is required by c-put-char-property.
  566. (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in XEmacs
  567. (not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
  568. (setq text-property-default-nonsticky
  569. (cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))
  570. ;; The following is purely a diagnostic command, to be commented out of the
  571. ;; final release. ACM, 2002/6/1
  572. ;; (defun NL-props ()
  573. ;; (interactive)
  574. ;; (let (pl-prop cl-prop)
  575. ;; (message "Prev-line: %s Cur-line: %s"
  576. ;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop))
  577. ;; (char-to-string pl-prop)
  578. ;; "nil")
  579. ;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))
  580. ;; (char-to-string cl-prop)
  581. ;; "nil"))))
  582. ;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31
  583. ;for now. In the byte compiled version, this causes things to crash because
  584. ;awk-mode-map isn't yet defined. :-(
  585. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  586. ;; The following section of the code is to do with font-locking. The biggest
  587. ;; problem for font-locking is deciding whether a / is a regular expression
  588. ;; delimiter or a division sign - determining precisely where strings and
  589. ;; regular expressions start and stop is also troublesome. This is the
  590. ;; purpose of the function c-awk-set-syntax-table-properties and the myriad
  591. ;; elisp regular expressions it uses.
  592. ;;
  593. ;; Because AWK is a line oriented language, I felt the normal cc-mode strategy
  594. ;; for font-locking unterminated strings (i.e. font-locking the buffer up to
  595. ;; the next string delimiter as a string) was inappropriate. Instead,
  596. ;; unbalanced string/regexp delimiters are given the warning font, being
  597. ;; refonted with the string font as soon as the matching delimiter is entered.
  598. ;;
  599. ;; This requires the region processed by the current font-lock after-change
  600. ;; function to have access to the start of the string/regexp, which may be
  601. ;; several lines back. The elisp "advice" feature is used on these functions
  602. ;; to allow this.
  603. (defun c-awk-beginning-of-logical-line (&optional pos)
  604. ;; Go back to the start of the (apparent) current line (or the start of the
  605. ;; line containing POS), returning the buffer position of that point. I.e.,
  606. ;; go back to the last line which doesn't have an escaped EOL before it.
  607. ;;
  608. ;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
  609. ;; comment, string or regexp. IT MAY WELL BE that this function should not be
  610. ;; executed on a narrowed buffer.
  611. ;;
  612. ;; This function might do hidden buffer changes.
  613. (if pos (goto-char pos))
  614. (forward-line 0)
  615. (while (and (> (point) (point-min))
  616. (eq (char-before (1- (point))) ?\\))
  617. (forward-line -1))
  618. (point))
  619. (defun c-awk-beyond-logical-line (&optional pos)
  620. ;; Return the position just beyond the (apparent) current logical line, or the
  621. ;; one containing POS. This is usually the beginning of the next line which
  622. ;; doesn't follow an escaped EOL. At EOB, this will be EOB.
  623. ;;
  624. ;; Point is unchanged.
  625. ;;
  626. ;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
  627. ;; comment, string or regexp. IT MAY WELL BE that this function should not be
  628. ;; executed on a narrowed buffer.
  629. (save-excursion
  630. (if pos (goto-char pos))
  631. (end-of-line)
  632. (while (and (< (point) (point-max))
  633. (eq (char-before) ?\\))
  634. (end-of-line 2))
  635. (if (< (point) (point-max))
  636. (1+ (point))
  637. (point))))
  638. ;; ACM, 2002/02/15: The idea of the next function is to put the "Error font"
  639. ;; on strings/regexps which are missing their closing delimiter.
  640. ;; 2002/4/28. The default syntax for / has been changed from "string" to
  641. ;; "punctuation", to reduce hassle when this character appears within a string
  642. ;; or comment.
  643. (defun c-awk-set-string-regexp-syntax-table-properties (beg end)
  644. ;; BEG and END bracket a (possibly unterminated) string or regexp. The
  645. ;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER
  646. ;; END. Set the appropriate syntax-table properties on the delimiters and
  647. ;; contents of this string/regex.
  648. ;;
  649. ;; "String" here can also mean a gawk 3.1 "localizable" string which starts
  650. ;; with _". In this case, we step over the _ and ignore it; It will get it's
  651. ;; font from an entry in awk-font-lock-keywords.
  652. ;;
  653. ;; If the closing delimiter is missing (i.e., there is an EOL there) set the
  654. ;; STRING-FENCE property on the opening " or / and closing EOL.
  655. ;;
  656. ;; This function does hidden buffer changes.
  657. (if (eq (char-after beg) ?_) (setq beg (1+ beg)))
  658. ;; First put the properties on the delimiters.
  659. (cond ((eq end (point-max)) ; string/regexp terminated by EOB
  660. (c-put-char-property beg 'syntax-table '(15))) ; (15) = "string fence"
  661. ((/= (char-after beg) (char-after end)) ; missing end delimiter
  662. (c-put-char-property beg 'syntax-table '(15))
  663. (c-put-char-property end 'syntax-table '(15)))
  664. ((eq (char-after beg) ?/) ; Properly bracketed regexp
  665. (c-put-char-property beg 'syntax-table '(7)) ; (7) = "string"
  666. (c-put-char-property end 'syntax-table '(7)))
  667. (t)) ; Properly bracketed string: Nothing to do.
  668. ;; Now change the properties of any escaped "s in the string to punctuation.
  669. (save-excursion
  670. (goto-char (1+ beg))
  671. (or (eobp)
  672. (while (search-forward "\"" end t)
  673. (c-put-char-property (1- (point)) 'syntax-table '(1))))))
  674. (defun c-awk-syntax-tablify-string ()
  675. ;; Point is at the opening " or _" of a string. Set the syntax-table
  676. ;; properties on this string, leaving point just after the string.
  677. ;;
  678. ;; The result is nil if a / immediately after the string would be a regexp
  679. ;; opener, t if it would be a division sign.
  680. ;;
  681. ;; This function does hidden buffer changes.
  682. (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string
  683. (c-awk-set-string-regexp-syntax-table-properties
  684. (match-beginning 0) (match-end 0))
  685. (cond ((looking-at "\"")
  686. (forward-char)
  687. t) ; In AWK, ("15" / 5) gives 3 ;-)
  688. ((looking-at "[\n\r]") ; Unterminated string with EOL.
  689. (forward-char)
  690. nil) ; / on next line would start a regexp
  691. (t nil))) ; Unterminated string at EOB
  692. (defun c-awk-syntax-tablify-/ (anchor anchor-state-/div)
  693. ;; Point is at a /. Determine whether this is a division sign or a regexp
  694. ;; opener, and if the latter, apply syntax-table properties to the entire
  695. ;; regexp. Point is left immediately after the division sign or regexp, as
  696. ;; the case may be.
  697. ;;
  698. ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a
  699. ;; division sign (value t) or a regexp opener (value nil). The idea is that
  700. ;; we analyze the line from ANCHOR up till point to determine what the / at
  701. ;; point is.
  702. ;;
  703. ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left.
  704. ;;
  705. ;; This function does hidden buffer changes.
  706. (let ((/point (point)))
  707. (goto-char anchor)
  708. ;; Analyze the line to find out what the / is.
  709. (if (if anchor-state-/div
  710. (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
  711. (and (not (search-forward-regexp c-awk-kwd-regexp-sign-re (1+ /point) t))
  712. (search-forward-regexp c-awk-div-sign-re (1+ /point) t)))
  713. ;; A division sign.
  714. (progn (goto-char (1+ /point)) nil)
  715. ;; A regexp opener
  716. ;; Jump over the regexp innards, setting the match data.
  717. (goto-char /point)
  718. (search-forward-regexp c-awk-regexp-without-end-re)
  719. (c-awk-set-string-regexp-syntax-table-properties
  720. (match-beginning 0) (match-end 0))
  721. (cond ((looking-at "/") ; Terminating /
  722. (forward-char)
  723. t)
  724. ((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL
  725. (forward-char)
  726. nil) ; / on next line would start another regexp
  727. (t nil))))) ; Unterminated regexp at EOB
  728. (defun c-awk-set-syntax-table-properties (lim)
  729. ;; Scan the buffer text between point and LIM, setting (and clearing) the
  730. ;; syntax-table property where necessary.
  731. ;;
  732. ;; This function is designed to be called as the FUNCTION in a MATCHER in
  733. ;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit
  734. ;; repeated calls from font-lock: See elisp info page "Search-based
  735. ;; Fontification"). It also gets called, with a bit of glue, from
  736. ;; after-change-functions when font-lock isn't active. Point is left
  737. ;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN
  738. ;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE.
  739. ;;
  740. ;; We need to set/clear the syntax-table property on:
  741. ;; (i) / - It is set to "string" on a / which is the opening or closing
  742. ;; delimiter of the properly terminated regexp (and left unset on a
  743. ;; division sign).
  744. ;; (ii) the opener of an unterminated string/regexp, we set the property
  745. ;; "generic string delimiter" on both the opening " or / and the end of the
  746. ;; line where the closing delimiter is missing.
  747. ;; (iii) "s inside strings/regexps (these will all be escaped "s). They are
  748. ;; given the property "punctuation". This will later allow other routines
  749. ;; to use the regexp "\\S\"*" to skip over the string innards.
  750. ;; (iv) Inside a comment, all syntax-table properties are cleared.
  751. ;;
  752. ;; This function does hidden buffer changes.
  753. (let (anchor
  754. (anchor-state-/div nil)) ; t means a following / would be a div sign.
  755. (c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant.
  756. (c-clear-char-properties (point) lim 'syntax-table)
  757. ;; Once round the next loop for each string, regexp, or div sign
  758. (while (progn
  759. ;; Skip any "harmless" lines before the next tricky one.
  760. (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t)
  761. (setq anchor-state-/div nil))
  762. (< (point) lim))
  763. (setq anchor (point))
  764. (search-forward-regexp c-awk-harmless-string*-here-re nil t)
  765. ;; We are now looking at either a " or a / or a brace/paren/semicolon.
  766. ;; Do our thing on the string, regexp or division sign or update
  767. ;; our state.
  768. (setq anchor-state-/div
  769. (cond
  770. ((looking-at "_?\"")
  771. (c-awk-syntax-tablify-string))
  772. ((eq (char-after) ?/)
  773. (c-awk-syntax-tablify-/ anchor anchor-state-/div))
  774. ((memq (char-after) '(?{ ?} ?\( ?\;))
  775. (forward-char)
  776. nil)
  777. (t ; ?\)
  778. (forward-char)
  779. t))))
  780. nil))
  781. ;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
  782. ;; the syntax-table properties even when font-lock isn't enabled, for the
  783. ;; subsequent use of movement functions, etc. However, it seems that if font
  784. ;; lock _is_ enabled, we can always leave it to do the job.
  785. (defvar c-awk-old-ByLL 0)
  786. (make-variable-buffer-local 'c-awk-old-Byll)
  787. ;; Just beyond logical line following the region which is about to be changed.
  788. ;; Set in c-awk-record-region-clear-NL and used in c-awk-after-change.
  789. (defun c-awk-record-region-clear-NL (beg end)
  790. ;; This function is called exclusively from the before-change-functions hook.
  791. ;; It does two things: Finds the end of the (logical) line on which END lies,
  792. ;; and clears c-awk-NL-prop text properties from this point onwards. BEG is
  793. ;; ignored.
  794. ;;
  795. ;; On entry, the buffer will have been widened and match-data will have been
  796. ;; saved; point is undefined on both entry and exit; the return value is
  797. ;; ignored.
  798. ;;
  799. ;; This function does hidden buffer changes.
  800. (c-save-buffer-state ()
  801. (setq c-awk-old-ByLL (c-awk-beyond-logical-line end))
  802. (c-save-buffer-state nil
  803. (c-awk-clear-NL-props end (point-max)))))
  804. (defun c-awk-end-of-change-region (beg end old-len)
  805. ;; Find the end of the region which needs to be font-locked after a change.
  806. ;; This is the end of the logical line on which the change happened, either
  807. ;; as it was before the change, or as it is now, whichever is later.
  808. ;; N.B. point is left undefined.
  809. (max (+ (- c-awk-old-ByLL old-len) (- end beg))
  810. (c-awk-beyond-logical-line end)))
  811. ;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region
  812. ;; specified by the font-lock after-change function must be expanded to
  813. ;; include ALL of any string or regexp within the region. The simplest way to
  814. ;; do this in practice is to use the beginning/end-of-logical-line functions.
  815. ;; Don't overlook the possibility of the buffer change being the "recapturing"
  816. ;; of a previously escaped newline.
  817. ;; ACM 2008-02-05:
  818. (defun c-awk-extend-and-syntax-tablify-region (beg end old-len)
  819. ;; Expand the region (BEG END) as needed to (c-new-BEG c-new-END) then put
  820. ;; `syntax-table' properties on this region.
  821. ;;
  822. ;; This function is called from an after-change function, BEG END and
  823. ;; OLD-LEN being the standard parameters.
  824. ;;
  825. ;; Point is undefined both before and after this function call, the buffer
  826. ;; has been widened, and match-data saved. The return value is ignored.
  827. ;;
  828. ;; It prepares the buffer for font
  829. ;; locking, hence must get called before `font-lock-after-change-function'.
  830. ;;
  831. ;; This function is the AWK value of `c-before-font-lock-function'.
  832. ;; It does hidden buffer changes.
  833. (c-save-buffer-state ()
  834. (setq c-new-END (c-awk-end-of-change-region beg end old-len))
  835. (setq c-new-BEG (c-awk-beginning-of-logical-line beg))
  836. (goto-char c-new-BEG)
  837. (c-awk-set-syntax-table-properties c-new-END)))
  838. ;; Awk regexps written with help from Peter Galbraith
  839. ;; <galbraith@mixing.qc.dfo.ca>.
  840. ;; Take GNU Emacs's 'words out of the following regexp-opts. They don't work
  841. ;; in XEmacs 21.4.4. acm 2002/9/19.
  842. (defconst awk-font-lock-keywords
  843. (eval-when-compile
  844. (list
  845. ;; Function names.
  846. '("^\\s *\\(func\\(tion\\)?\\)\\>\\s *\\(\\sw+\\)?"
  847. (1 font-lock-keyword-face) (3 font-lock-function-name-face nil t))
  848. ;;
  849. ;; Variable names.
  850. (cons
  851. (concat "\\<"
  852. (regexp-opt
  853. '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON"
  854. "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FS" "IGNORECASE"
  855. "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PROCINFO" "RLENGTH"
  856. "RS" "RSTART" "RT" "SUBSEP" "TEXTDOMAIN") t) "\\>")
  857. 'font-lock-variable-name-face)
  858. ;; Special file names. (acm, 2002/7/22)
  859. ;; The following regexp was created by first evaluating this in GNU Emacs 21.1:
  860. ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid"
  861. ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words)
  862. ;; , removing the "?:" from each "\\(?:" (for backward compatibility with older Emacsen)
  863. ;; , replacing the "n" in "dev/fd/n" with "[0-9]+"
  864. ;; , removing the unwanted \\< at the beginning, and finally filling out the
  865. ;; regexp so that a " must come before, and either a " or heuristic stuff after.
  866. ;; The surrounding quotes are fontified along with the filename, since, semantically,
  867. ;; they are an indivisible unit.
  868. '("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\
  869. std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\
  870. \\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)"
  871. (1 font-lock-variable-name-face t)
  872. (8 font-lock-variable-name-face t t))
  873. ;; Do the same (almost) with
  874. ;; (regexp-opt '("/inet/tcp/lport/rhost/rport" "/inet/udp/lport/rhost/rport"
  875. ;; "/inet/raw/lport/rhost/rport") 'words)
  876. ;; This cannot be combined with the above pattern, because the match number
  877. ;; for the (optional) closing \" would then exceed 9.
  878. '("\\(\"/inet/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/lport/rhost/rport\\)\\)\\>\
  879. \\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)"
  880. (1 font-lock-variable-name-face t)
  881. (6 font-lock-variable-name-face t t))
  882. ;; Keywords.
  883. (concat "\\<"
  884. (regexp-opt
  885. '("BEGIN" "END" "break" "case" "continue" "default" "delete"
  886. "do" "else" "exit" "for" "getline" "if" "in" "next"
  887. "nextfile" "return" "switch" "while")
  888. t) "\\>")
  889. ;; Builtins.
  890. `(eval . (list
  891. ,(concat
  892. "\\<"
  893. (regexp-opt
  894. '("adump" "and" "asort" "atan2" "bindtextdomain" "close"
  895. "compl" "cos" "dcgettext" "exp" "extension" "fflush"
  896. "gensub" "gsub" "index" "int" "length" "log" "lshift"
  897. "match" "mktime" "or" "print" "printf" "rand" "rshift"
  898. "sin" "split" "sprintf" "sqrt" "srand" "stopme"
  899. "strftime" "strtonum" "sub" "substr" "system"
  900. "systime" "tolower" "toupper" "xor") t)
  901. "\\>")
  902. 0 c-preprocessor-face-name))
  903. ;; gawk debugging keywords. (acm, 2002/7/21)
  904. ;; (Removed, 2003/6/6. These functions are now fontified as built-ins)
  905. ;; (list (concat "\\<" (regexp-opt '("adump" "stopme") t) "\\>")
  906. ;; 0 'font-lock-warning-face)
  907. ;; User defined functions with an apparent spurious space before the
  908. ;; opening parenthesis. acm, 2002/5/30.
  909. `(,(concat "\\(\\w\\|_\\)" c-awk-escaped-nls* "\\s "
  910. c-awk-escaped-nls*-with-space* "(")
  911. (0 'font-lock-warning-face))
  912. ;; Space after \ in what looks like an escaped newline. 2002/5/31
  913. '("\\\\\\s +$" 0 font-lock-warning-face t)
  914. ;; Unbalanced string (") or regexp (/) delimiters. 2002/02/16.
  915. '("\\s|" 0 font-lock-warning-face t nil)
  916. ;; gawk 3.1 localizable strings ( _"translate me!"). 2002/5/21
  917. '("\\(_\\)\\s|" 1 font-lock-warning-face)
  918. '("\\(_\\)\\s\"" 1 font-lock-string-face) ; FIXME! not for XEmacs. 2002/10/6
  919. ))
  920. "Default expressions to highlight in AWK mode.")
  921. ;; ACM 2002/9/29. Movement functions, e.g. for C-M-a and C-M-e
  922. ;; The following three regexps differ from those earlier on in cc-awk.el in
  923. ;; that they assume the syntax-table properties have been set. They are thus
  924. ;; not useful for code which sets these properties.
  925. (defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"")
  926. ;; Matches a terminated string/regexp.
  927. (defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$")
  928. ;; Matches an unterminated string/regexp, NOT including the eol at the end.
  929. (defconst c-awk-harmless-pattern-characters*
  930. (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*"))
  931. ;; Matches any "harmless" character in a pattern or an escaped character pair.
  932. (defun c-awk-at-statement-end-p ()
  933. ;; Point is not inside a comment or string. Is it AT the end of a
  934. ;; statement? This means immediately after the last non-ws character of the
  935. ;; statement. The caller is responsible for widening the buffer, if
  936. ;; appropriate.
  937. (and (not (bobp))
  938. (save-excursion
  939. (backward-char)
  940. (or (looking-at "[};]")
  941. (and (memq (c-awk-get-NL-prop-cur-line) '(?\$ ?\\))
  942. (looking-at
  943. (eval-when-compile
  944. (concat "[^ \t\n\r\\]" c-awk-escaped-nls*-with-space*
  945. "[#\n\r]"))))))))
  946. (defun c-awk-beginning-of-defun (&optional arg)
  947. "Move backward to the beginning of an AWK \"defun\". With ARG, do it that
  948. many times. Negative arg -N means move forward to Nth following beginning of
  949. defun. Returns t unless search stops due to beginning or end of buffer.
  950. By a \"defun\" is meant either a pattern-action pair or a function. The start
  951. of a defun is recognized as code starting at column zero which is neither a
  952. closing brace nor a comment nor a continuation of the previous line. Unlike
  953. in some other modes, having an opening brace at column 0 is neither necessary
  954. nor helpful.
  955. Note that this function might do hidden buffer changes. See the
  956. comment at the start of cc-engine.el for more info."
  957. (interactive "p")
  958. (or arg (setq arg 1))
  959. (save-match-data
  960. (c-save-buffer-state ; ensures the buffer is writable.
  961. nil
  962. (let ((found t)) ; Has the most recent regexp search found b-of-defun?
  963. (if (>= arg 0)
  964. ;; Go back one defun each time round the following loop. (For +ve arg)
  965. (while (and found (> arg 0) (not (eq (point) (point-min))))
  966. ;; Go back one "candidate" each time round the next loop until one
  967. ;; is genuinely a beginning-of-defun.
  968. (while (and (setq found (search-backward-regexp
  969. "^[^#} \t\n\r]" (point-min) 'stop-at-limit))
  970. (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
  971. (setq arg (1- arg)))
  972. ;; The same for a -ve arg.
  973. (if (not (eq (point) (point-max))) (forward-char 1))
  974. (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg.
  975. (while (and (setq found (search-forward-regexp
  976. "^[^#} \t\n\r]" (point-max) 'stop-at-limit))
  977. (not (memq (c-awk-get-NL-prop-prev-line) '(?\$ ?\} ?\#)))))
  978. (setq arg (1+ arg)))
  979. (if found (goto-char (match-beginning 0))))
  980. (eq arg 0)))))
  981. (defun c-awk-forward-awk-pattern ()
  982. ;; Point is at the start of an AWK pattern (which may be null) or function
  983. ;; declaration. Move to the pattern's end, and past any trailing space or
  984. ;; comment. Typically, we stop at the { which denotes the corresponding AWK
  985. ;; action/function body. Otherwise we stop at the EOL (or ;) marking the
  986. ;; absence of an explicit action.
  987. ;;
  988. ;; This function might do hidden buffer changes.
  989. (while
  990. (progn
  991. (search-forward-regexp c-awk-harmless-pattern-characters*)
  992. (if (looking-at "#") (end-of-line))
  993. (cond
  994. ((eobp) nil)
  995. ((looking-at "[{;]") nil) ; We've finished!
  996. ((eolp)
  997. (if (c-awk-cur-line-incomplete-p)
  998. (forward-line) ; returns non-nil
  999. nil))
  1000. ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t))
  1001. ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t))
  1002. ((looking-at "/") (forward-char) t))))) ; division sign.
  1003. (defun c-awk-end-of-defun1 ()
  1004. ;; point is at the start of a "defun". Move to its end. Return end position.
  1005. ;;
  1006. ;; This function might do hidden buffer changes.
  1007. (c-awk-forward-awk-pattern)
  1008. (cond
  1009. ((looking-at "{") (goto-char (scan-sexps (point) 1)))
  1010. ((looking-at ";") (forward-char))
  1011. ((eolp))
  1012. (t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern")))
  1013. (point))
  1014. (defun c-awk-beginning-of-defun-p ()
  1015. ;; Are we already at the beginning of a defun? (i.e. at code in column 0
  1016. ;; which isn't a }, and isn't a continuation line of any sort.
  1017. ;;
  1018. ;; This function might do hidden buffer changes.
  1019. (and (looking-at "^[^#} \t\n\r]")
  1020. (not (c-awk-prev-line-incomplete-p))))
  1021. (defun c-awk-end-of-defun (&optional arg)
  1022. "Move forward to next end of defun. With argument, do it that many times.
  1023. Negative argument -N means move back to Nth preceding end of defun.
  1024. An end of a defun occurs right after the closing brace that matches the
  1025. opening brace at its start, or immediately after the AWK pattern when there is
  1026. no explicit action; see function `c-awk-beginning-of-defun'.
  1027. Note that this function might do hidden buffer changes. See the
  1028. comment at the start of cc-engine.el for more info."
  1029. (interactive "p")
  1030. (or arg (setq arg 1))
  1031. (save-match-data
  1032. (c-save-buffer-state
  1033. nil
  1034. (let ((start-point (point)) end-point)
  1035. ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun,
  1036. ;; move backwards to one.
  1037. ;; Repeat [(i) move forward to end-of-current-defun (see below);
  1038. ;; (ii) If this isn't it, move forward to beginning-of-defun].
  1039. ;; We start counting ARG only when step (i) has passed the original point.
  1040. (when (> arg 0)
  1041. ;; Try to move back to a beginning-of-defun, if not already at one.
  1042. (if (not (c-awk-beginning-of-defun-p))
  1043. (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point.
  1044. (goto-char start-point)
  1045. (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough!
  1046. ;; Now count forward, one defun at a time
  1047. (while (and (not (eobp))
  1048. (c-awk-end-of-defun1)
  1049. (if (> (point) start-point) (setq arg (1- arg)) t)
  1050. (> arg 0)
  1051. (c-awk-beginning-of-defun -1))))
  1052. (when (< arg 0)
  1053. (setq end-point start-point)
  1054. (while (and (not (bobp))
  1055. (c-awk-beginning-of-defun 1)
  1056. (if (< (setq end-point (if (bobp) (point)
  1057. (save-excursion (c-awk-end-of-defun1))))
  1058. start-point)
  1059. (setq arg (1+ arg)) t)
  1060. (< arg 0)))
  1061. (goto-char (min start-point end-point)))))))
  1062. (cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21
  1063. ;; Local Variables:
  1064. ;; indent-tabs-mode: t
  1065. ;; tab-width: 8
  1066. ;; End:
  1067. ;;; awk-mode.el ends here