ledger-regex.el 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. ;;; ledger-regex.el --- Helper code for use with the "ledger" command-line tool
  2. ;; Copyright (C) 2003-2016 John Wiegley (johnw AT gnu DOT org)
  3. ;; This file is not part of GNU Emacs.
  4. ;; This is free software; you can redistribute it and/or modify it under
  5. ;; the terms of the GNU General Public License as published by the Free
  6. ;; Software Foundation; either version 2, or (at your option) any later
  7. ;; version.
  8. ;;
  9. ;; This is distributed in the hope that it will be useful, but WITHOUT
  10. ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  12. ;; for more details.
  13. ;;
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs; see the file COPYING. If not, write to the
  16. ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. ;; MA 02110-1301 USA.
  18. (require 'rx)
  19. (eval-when-compile
  20. (require 'cl))
  21. (defconst ledger-amount-regex
  22. (concat "\\( \\|\t\\| \t\\)[ \t]*-?"
  23. "\\([A-Z$€£₹_(]+ *\\)?"
  24. ;; We either match just a number after the commodity with no
  25. ;; decimal or thousand separators or a number with thousand
  26. ;; separators. If we have a decimal part starting with `,'
  27. ;; or `.', because the match is non-greedy, it must leave at
  28. ;; least one of those symbols for the following capture
  29. ;; group, which then finishes the decimal part.
  30. "\\(-?\\(?:[0-9]+\\|[0-9,.]+?\\)\\)"
  31. "\\([,.][0-9)]+\\)?"
  32. "\\( *[[:word:]€£₹_\"]+\\)?"
  33. "\\([ \t]*[@={]@?[^\n;]+?\\)?"
  34. "\\([ \t]+;.+?\\|[ \t]*\\)?$"))
  35. (defconst ledger-amount-decimal-comma-regex
  36. "-?[1-9][0-9.]*[,]?[0-9]*")
  37. (defconst ledger-amount-decimal-period-regex
  38. "-?[1-9][0-9,]*[.]?[0-9]*")
  39. (defconst ledger-other-entries-regex
  40. "\\(^[~=A-Za-z].+\\)+")
  41. (defconst ledger-comment-regex
  42. "^[;#|\\*%].*\\|[ \t]+;.*")
  43. (defconst ledger-multiline-comment-start-regex
  44. "^!comment$")
  45. (defconst ledger-multiline-comment-end-regex
  46. "^!end_comment$")
  47. (defconst ledger-multiline-comment-regex
  48. "^!comment\n\\(.*\n\\)*?!end_comment$")
  49. (defconst ledger-payee-any-status-regex
  50. "^[0-9]+[-/][-/.=0-9]+\\(\\s-+\\*\\)?\\(\\s-+(.*?)\\)?\\s-+\\(.+?\\)\\s-*\\(;\\|$\\)")
  51. (defconst ledger-payee-pending-regex
  52. "^[0-9]+[-/][-/.=0-9]+\\s-\\!\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
  53. (defconst ledger-payee-cleared-regex
  54. "^[0-9]+[-/][-/.=0-9]+\\s-\\*\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
  55. (defconst ledger-payee-uncleared-regex
  56. "^[0-9]+[-/][-/.=0-9]+\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
  57. (defconst ledger-init-string-regex
  58. "^--.+?\\($\\|[ ]\\)")
  59. (defconst ledger-account-any-status-regex
  60. "^[ \t]+\\([*!]\\s-+\\)?\\([[(]?.+?\\)\\(\t\\|\n\\| [ \t]\\)")
  61. (defun ledger-account-any-status-with-seed-regex (seed)
  62. (concat "^[ \t]+\\([*!]\\s-+\\)?\\([[(]?" seed ".+?\\)\\(\t\\|\n\\| [ \t]\\)"))
  63. (defconst ledger-account-pending-regex
  64. "\\(^[ \t]+\\)\\(!\\s-*.*?\\)\\( \\|\t\\|$\\)")
  65. (defconst ledger-account-cleared-regex
  66. "\\(^[ \t]+\\)\\(*\\s-*.*?\\)\\( \\|\t\\|$\\)")
  67. (defmacro ledger-define-regexp (name regex docs &rest args)
  68. "Simplify the creation of a Ledger regex and helper functions."
  69. (let ((defs
  70. (list
  71. `(defconst
  72. ,(intern (concat "ledger-" (symbol-name name) "-regexp"))
  73. ,(eval regex))))
  74. (addend 0) last-group)
  75. (if (null args)
  76. (progn
  77. (nconc
  78. defs
  79. (list
  80. `(defconst
  81. ,(intern
  82. (concat "ledger-regex-" (symbol-name name) "-group"))
  83. 1)))
  84. (nconc
  85. defs
  86. (list
  87. `(defconst
  88. ,(intern (concat "ledger-regex-" (symbol-name name)
  89. "-group--count"))
  90. 1)))
  91. (nconc
  92. defs
  93. (list
  94. `(defmacro
  95. ,(intern (concat "ledger-regex-" (symbol-name name)))
  96. (&optional string)
  97. ,(format "Return the match string for the %s" name)
  98. (match-string
  99. ,(intern (concat "ledger-regex-" (symbol-name name)
  100. "-group"))
  101. string)))))
  102. (dolist (arg args)
  103. (let (var grouping target)
  104. (if (symbolp arg)
  105. (setq var arg target arg)
  106. (assert (listp arg))
  107. (if (= 2 (length arg))
  108. (setq var (car arg)
  109. target (cadr arg))
  110. (setq var (car arg)
  111. grouping (cadr arg)
  112. target (caddr arg))))
  113. (if (and last-group
  114. (not (eq last-group (or grouping target))))
  115. (incf addend
  116. (symbol-value
  117. (intern-soft (concat "ledger-regex-"
  118. (symbol-name last-group)
  119. "-group--count")))))
  120. (nconc
  121. defs
  122. (list
  123. `(defconst
  124. ,(intern (concat "ledger-regex-" (symbol-name name)
  125. "-group-" (symbol-name var)))
  126. ,(+ addend
  127. (symbol-value
  128. (intern-soft
  129. (if grouping
  130. (concat "ledger-regex-" (symbol-name grouping)
  131. "-group-" (symbol-name target))
  132. (concat "ledger-regex-" (symbol-name target)
  133. "-group"))))))))
  134. (nconc
  135. defs
  136. (list
  137. `(defmacro
  138. ,(intern (concat "ledger-regex-" (symbol-name name)
  139. "-" (symbol-name var)))
  140. (&optional string)
  141. ,(format "Return the sub-group match for the %s %s."
  142. name var)
  143. (match-string
  144. ,(intern (concat "ledger-regex-" (symbol-name name)
  145. "-group-" (symbol-name var)))
  146. string))))
  147. (setq last-group (or grouping target))))
  148. (nconc defs
  149. (list
  150. `(defconst ,(intern (concat "ledger-regex-" (symbol-name name)
  151. "-group--count"))
  152. ,(length args)))))
  153. (cons 'progn defs)))
  154. (put 'ledger-define-regexp 'lisp-indent-function 1)
  155. (ledger-define-regexp iso-date
  156. ( let ((sep '(or ?- ?/)))
  157. (rx (group
  158. (and (? (and (group (= 4 num)))
  159. (eval sep))
  160. (group (and num (? num)))
  161. (eval sep)
  162. (group (and num (? num)))))))
  163. "Match a single date, in its 'written' form.")
  164. (ledger-define-regexp full-date
  165. (macroexpand
  166. `(rx (and (regexp ,ledger-iso-date-regexp)
  167. (? (and ?= (regexp ,ledger-iso-date-regexp))))))
  168. "Match a compound date, of the form ACTUAL=EFFECTIVE"
  169. (actual iso-date)
  170. (effective iso-date))
  171. (ledger-define-regexp state
  172. (rx (group (any ?! ?*)))
  173. "Match a transaction or posting's \"state\" character.")
  174. (ledger-define-regexp code
  175. (rx (and ?\( (group (+? (not (any ?\))))) ?\)))
  176. "Match the transaction code.")
  177. (ledger-define-regexp long-space
  178. (rx (and (*? blank)
  179. (or (and ? (or ? ?\t)) ?\t)))
  180. "Match a \"long space\".")
  181. (ledger-define-regexp note
  182. (rx (group (+ nonl)))
  183. "")
  184. (ledger-define-regexp end-note
  185. (macroexpand
  186. `(rx (and (regexp ,ledger-long-space-regexp) ?\;
  187. (regexp ,ledger-note-regexp))))
  188. "")
  189. (ledger-define-regexp full-note
  190. (macroexpand
  191. `(rx (and line-start (+ blank)
  192. ?\; (regexp ,ledger-note-regexp))))
  193. "")
  194. (ledger-define-regexp xact-line
  195. (macroexpand
  196. `(rx (and line-start
  197. (regexp ,ledger-full-date-regexp)
  198. (? (and (+ blank) (regexp ,ledger-state-regexp)))
  199. (? (and (+ blank) (regexp ,ledger-code-regexp)))
  200. (+ blank) (+? nonl)
  201. (? (regexp ,ledger-end-note-regexp))
  202. line-end)))
  203. "Match a transaction's first line (and optional notes)."
  204. (actual-date full-date actual)
  205. (effective-date full-date effective)
  206. state
  207. code
  208. (note end-note))
  209. (ledger-define-regexp recurring-line
  210. (macroexpand
  211. `(rx (and line-start
  212. (regexp "\\[.+/.+/.+\\]")
  213. (? (and (+ blank) (regexp ,ledger-state-regexp)))
  214. (? (and (+ blank) (regexp ,ledger-code-regexp)))
  215. (+ blank) (+? nonl)
  216. (? (regexp ,ledger-end-note-regexp))
  217. line-end)))
  218. "Match a transaction's first line (and optional notes)."
  219. (actual-date full-date actual)
  220. (effective-date full-date effective)
  221. state
  222. code
  223. (note end-note))
  224. (ledger-define-regexp account
  225. (rx (group (and (not (any blank ?\[ ?\( ?: ?\;)) (*? nonl))))
  226. "")
  227. (ledger-define-regexp account-kind
  228. (rx (group (? (any ?\[ ?\())))
  229. "")
  230. (ledger-define-regexp full-account
  231. (macroexpand
  232. `(rx (and (regexp ,ledger-account-kind-regexp)
  233. (regexp ,ledger-account-regexp)
  234. (? (any ?\] ?\))))))
  235. ""
  236. (kind account-kind)
  237. (name account))
  238. (ledger-define-regexp commodity
  239. (rx (group
  240. (or (and ?\" (+ (not (any ?\"))) ?\")
  241. (not (any blank ?\n
  242. digit
  243. ?- ?\[ ?\]
  244. ?. ?, ?\; ?+ ?* ?/ ?^ ?? ?: ?& ?| ?! ?=
  245. ?\< ?\> ?\{ ?\} ?\( ?\) ?@)))))
  246. "")
  247. (ledger-define-regexp amount
  248. (rx (group
  249. (and (? ?-)
  250. (and (+ digit)
  251. (*? (and (any ?. ?,) (+ digit))))
  252. (? (and (any ?. ?,) (+ digit))))))
  253. "")
  254. (ledger-define-regexp commoditized-amount
  255. (macroexpand
  256. `(rx (group
  257. (or (and (regexp ,ledger-commodity-regexp)
  258. (*? blank)
  259. (regexp ,ledger-amount-regexp))
  260. (and (regexp ,ledger-amount-regexp)
  261. (*? blank)
  262. (regexp ,ledger-commodity-regexp))))))
  263. "")
  264. (ledger-define-regexp commodity-annotations
  265. (macroexpand
  266. `(rx (* (+ blank)
  267. (or (and ?\{ (regexp ,ledger-commoditized-amount-regexp) ?\})
  268. (and ?\[ (regexp ,ledger-iso-date-regexp) ?\])
  269. (and ?\( (not (any ?\))) ?\))))))
  270. "")
  271. (ledger-define-regexp cost
  272. (macroexpand
  273. `(rx (and (or "@" "@@") (+ blank)
  274. (regexp ,ledger-commoditized-amount-regexp))))
  275. "")
  276. (ledger-define-regexp balance-assertion
  277. (macroexpand
  278. `(rx (and ?= (+ blank)
  279. (regexp ,ledger-commoditized-amount-regexp))))
  280. "")
  281. (ledger-define-regexp full-amount
  282. (macroexpand `(rx (group (+? (not (any ?\;))))))
  283. "")
  284. (ledger-define-regexp post-line
  285. (macroexpand
  286. `(rx (and line-start (+ blank)
  287. (? (and (regexp ,ledger-state-regexp) (* blank)))
  288. (regexp ,ledger-full-account-regexp)
  289. (? (and (regexp ,ledger-long-space-regexp)
  290. (regexp ,ledger-full-amount-regexp)))
  291. (? (regexp ,ledger-end-note-regexp))
  292. line-end)))
  293. ""
  294. state
  295. (account-kind full-account kind)
  296. (account full-account name)
  297. (amount full-amount)
  298. (note end-note))
  299. (defconst ledger-iterate-regex
  300. (concat "\\(\\(?:Y\\|year\\)\\s-+\\([0-9]+\\)\\|" ;; Catches a Y/year directive
  301. ledger-iso-date-regexp
  302. "\\([ *!]+\\)" ;; mark
  303. "\\((.*)\\)?" ;; code
  304. "\\([[:word:] ]+\\)" ;; desc
  305. "\\)"))
  306. (defconst ledger-xact-start-regex
  307. (concat "^" ledger-iso-date-regexp ;; subexp 1
  308. "\\(=" ledger-iso-date-regexp "\\)?"
  309. ))
  310. (defconst ledger-xact-after-date-regex
  311. (concat "\\([ \t]+[*!]\\)?" ;; mark, subexp 1
  312. "\\([ \t]+(.*?)\\)?" ;; code, subexp 2
  313. "\\([ \t]+[^;\n]+\\)" ;; desc, subexp 3
  314. "\\(;[^\n]*\\)?" ;; comment, subexp 4
  315. ))
  316. (defconst ledger-posting-regex
  317. (concat "^[ \t]+ ?" ;; initial white space
  318. "\\([*!]\\)? ?" ;; state, subexpr 1
  319. "\\([[:print:]]+\\([ \t][ \t]\\)\\)" ;; account, subexpr 2
  320. "\\([^;\n]*\\)" ;; amount, subexpr 4
  321. "\\(.*\\)" ;; comment, subexpr 5
  322. ))
  323. (defconst ledger-directive-start-regex
  324. "[=~;#%|\\*[A-Za-z]")
  325. (provide 'ledger-regex)