parser.lisp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. ;; This software is Copyright (c) cage, 2012.
  2. ;; cage grants you the rights to distribute
  3. ;; and use this software as governed by the terms
  4. ;; of the Lisp Lesser GNU Public License
  5. ;; (http://opensource.franz.com/preamble.html),
  6. ;; known as the LLGPL
  7. (in-package :cl-i18n)
  8. (alexandria:define-constant +peek-length-tokenizer-on-error+ 6 :test 'equal)
  9. (defparameter *file* "")
  10. (defparameter *string-pos* 0)
  11. (defparameter *has-errors* nil)
  12. (defparameter *parsing-errors* '())
  13. (defparameter *blank-space* '(#\space #\newline))
  14. (defclass parsed-file (buffered-input-file)
  15. ((comment-line
  16. :initform nil
  17. :initarg :comment-line
  18. :accessor comment-line)))
  19. (defgeneric peek-token (object &optional test))
  20. (defgeneric peek-token-suppress-errors (object &optional test))
  21. (defgeneric parse-comment-line (object))
  22. (defgeneric is-comment-line-p (object line))
  23. (defgeneric next-token (object &key hook-to-stringpos
  24. return-first-match
  25. predicate-sort-tokens
  26. no-more-token-error))
  27. (defgeneric next-token-simple (object &key no-more-token-error))
  28. (defmacro with-error ((predicate msg &rest arg-predicate) &body body)
  29. `(if (apply ,predicate (list ,@arg-predicate))
  30. (progn ,@body)
  31. (progn
  32. (setf *has-errors* t)
  33. (push ,msg *parsing-errors*)
  34. nil)))
  35. (defmacro with-no-errors (&body body)
  36. `(when (not *has-errors*)
  37. ,@body))
  38. (defmacro with-no-errors* (&body forms)
  39. (when forms
  40. `(with-no-errors
  41. ,(first forms)
  42. (with-no-errors* ,@(rest forms)))))
  43. (defmacro let-noerr (forms &body body)
  44. (if (not (null forms))
  45. `(with-no-errors
  46. (let (,(first forms))
  47. (let-noerr ,(rest forms) ,@body)))
  48. `(progn ,@body)))
  49. (defmacro let-noerr* (forms &body body)
  50. (if (not (null forms))
  51. `(with-no-errors
  52. (let* (,(first forms))
  53. (let-noerr ,(rest forms) ,@body)))
  54. `(progn ,@body)))
  55. (defmacro with-valid-stream (&body body)
  56. `(with-error (#'peek-valid-stream "Attempt to read an empty stream")
  57. ,@body))
  58. (defmethod peek-token ((object parsed-file) &optional (test #'identity))
  59. (with-valid-stream
  60. (multiple-value-bind (token start-token)
  61. (next-token object)
  62. (prog1
  63. (funcall test token)
  64. (seek *file* start-token)))))
  65. (defmethod peek-token-suppress-errors ((object parsed-file) &optional (test #'identity))
  66. (with-no-errors
  67. (multiple-value-bind (token start-token)
  68. (next-token object)
  69. (prog1
  70. (funcall test token)
  71. (if token
  72. (seek *file* start-token))))))
  73. (defmethod parse-comment-line ((object parsed-file))
  74. (let-noerr ((peek (peek-token *file*)))
  75. (when (is-comment-line-p object peek)
  76. (next-token *file*)
  77. (parse-comment-line object))))
  78. (defmethod is-comment-line-p ((object parsed-file) line)
  79. (if (comment-line object)
  80. (scan (comment-line object) line)
  81. nil))
  82. (defmacro define-parser-skeleton (name classname &rest other-vars)
  83. (let ((macro-name (alexandria:format-symbol t "~:@(with-~a-file~)" name))
  84. (other-v other-vars))
  85. `(defmacro ,macro-name ((&key (buffer (make-buffer 2)) (filename nil)) &rest body)
  86. `(let ((*file* (make-instance ',',classname :buffer ,buffer :filename ,filename))
  87. (*parsing-errors* '())
  88. (*has-errors* nil)
  89. ,@',other-v)
  90. (unwind-protect
  91. (progn ,@body)
  92. (close-file *file*))))))
  93. (defmacro define-parser-skeleton* (package name classname &rest other-vars)
  94. "does not close the stream"
  95. (let ((macro-name (alexandria:format-symbol package "~:@(with-~a-file~)" name))
  96. (other-v other-vars))
  97. `(defmacro ,macro-name ((&key (buffer (make-buffer 2)) (filename nil)) &rest body)
  98. `(let ((*file* (make-instance ',',classname :buffer ,buffer :filename ,filename))
  99. (*parsing-errors* '())
  100. (*has-errors* nil)
  101. ,@',other-v)
  102. (progn ,@body)))))
  103. (defmacro define-is-stuff-p (test &rest operators)
  104. (alexandria:with-gensyms (str)
  105. `(progn
  106. ,@(mapcar #'(lambda (op)
  107. `(defun ,(alexandria:format-symbol t "IS-~:@(~a~)-P"
  108. (cl-ppcre:regex-replace-all "\\+|\\*"
  109. (symbol-name op)
  110. ""))
  111. (,str)
  112. (,test ,op ,str)))
  113. operators))))
  114. (defun char@ ()
  115. (restart-case
  116. (let ((char (get-char *file*)))
  117. (if (not (null char))
  118. (string char)
  119. (error 'i18n-conditions:out-of-bounds :seq *file* :idx *string-pos*)))
  120. (ignore-error () ())
  121. (use-value (e) e)))
  122. (defun char@1+ ()
  123. (restart-case
  124. (let ((char (get-char *file*)))
  125. (if (not (null char))
  126. (progn
  127. (increment-pointer *file*)
  128. (string char))
  129. (error 'i18n-conditions:out-of-bounds :seq *file* :idx *string-pos*)))
  130. (ignore-error () ())
  131. (use-value (e) e)))
  132. (defun 1+char@ (&key (go-back t))
  133. (restart-case
  134. (progn
  135. (increment-pointer *file*)
  136. (let ((char (get-char *file*)))
  137. (if (not (null char))
  138. (progn
  139. (when go-back
  140. (decrement-pointer *file*))
  141. (string char))
  142. (error 'i18n-conditions:out-of-bounds :seq *file* :idx *string-pos*))))
  143. (ignore-error () ())
  144. (use-value (e) e)))
  145. (defun peek-end-stream (&key (pos-offset 0))
  146. (let ((saved-pos (logical-file-position *file*)))
  147. (loop for i from 0 below (1- pos-offset) do (increment-pointer *file*))
  148. (prog1
  149. (not (increment-pointer *file*))
  150. (seek *file* saved-pos))))
  151. (defun peek-valid-stream ()
  152. (not (peek-end-stream)))
  153. (defmacro multiple-increment (times)
  154. `(progn
  155. ,@(loop for i from 0 below times collect
  156. `(increment-pointer *file*))))
  157. (defmacro scanner-re (re)
  158. `(cl-ppcre:create-scanner ,re))
  159. (defun concatenate-regexps (regexps)
  160. (format nil "~{(~a)~^|~}" regexps))
  161. (defmacro define-tokenizer-simple (classname &rest regexps)
  162. (alexandria:with-gensyms (scanner register-number match start-re end-re line-length line-start)
  163. (let ((class-name (alexandria:format-symbol t "~@:(~a~)" classname))
  164. (no-more-token-error (alexandria:format-symbol t "NO-MORE-TOKEN-ERROR")))
  165. `(let ((,scanner (cl-ppcre:create-scanner ,(concatenate-regexps regexps))))
  166. (defmethod next-token-simple ((object ,class-name) &key (,no-more-token-error t))
  167. (declare (optimize (speed 3) (debug 0) (safety 0)))
  168. (multiple-value-bind (,register-number ,line-start ,line-length ,match ,start-re
  169. ,end-re)
  170. (regex-scan-line-simple object ,scanner)
  171. (declare (ignore ,line-start))
  172. (declare ((signed-byte 64) ,register-number ,line-start ,line-length
  173. ,start-re ,end-re))
  174. (declare (simple-string ,match))
  175. (if (>= ,register-number 0)
  176. (progn
  177. (seek *file* ,end-re)
  178. (values ,match ,start-re))
  179. (if (peek-end-stream :pos-offset 0)
  180. (if ,no-more-token-error
  181. (progn
  182. (setf *has-errors* t)
  183. (push "Error: stream ended without valid token found"
  184. *parsing-errors*))
  185. nil)
  186. (handler-bind ((i18n-conditions:out-of-bounds
  187. #'(lambda (c)
  188. (declare (ignore c))
  189. (invoke-restart 'ignore-error))))
  190. (seek *file* ,line-length)
  191. (char@1+)
  192. (next-token-simple object
  193. :no-more-token-error ,no-more-token-error))))))))))
  194. (defmacro define-tokenizer ((classname &rest regexps) &body other-cond-clause)
  195. (alexandria:with-gensyms (scan tokens sorted-matches max-match)
  196. (let ((class-name (alexandria:format-symbol t "~@:(~a~)" classname)))
  197. `(defmethod next-token ((object ,class-name) &key
  198. (hook-to-stringpos t)
  199. (return-first-match nil)
  200. (predicate-sort-tokens #'(lambda (a b)
  201. (> (length (first a))
  202. (length (first b)))))
  203. (no-more-token-error t))
  204. (if (peek-valid-stream)
  205. (let ((,tokens nil))
  206. (cond
  207. ,@other-cond-clause
  208. (t
  209. (block token-matching
  210. ,@(loop for r in regexps collect
  211. `(let ((,scan (multiple-value-list
  212. (regex-scan *file*
  213. ,r
  214. hook-to-stringpos))))
  215. (when (first ,scan)
  216. (if return-first-match
  217. (progn
  218. (setf ,tokens
  219. (list
  220. (first ,scan) ; the token
  221. (second ,scan) ; where the token starts
  222. (third ,scan))) ; where the token ends
  223. (return-from token-matching))
  224. (push (list
  225. (first ,scan) ; the token
  226. (second ,scan) ; where the token starts
  227. (third ,scan)) ; where the token ends
  228. ,tokens))))))
  229. (if (not (null ,tokens))
  230. (let* ((,sorted-matches (sort ,tokens predicate-sort-tokens))
  231. (,max-match (first ,sorted-matches)))
  232. (seek *file* (third ,max-match))
  233. (values (first ,max-match) (second ,max-match)))
  234. (if no-more-token-error
  235. (if (peek-end-stream :pos-offset +peek-length-tokenizer-on-error+)
  236. (progn
  237. (setf *has-errors* t)
  238. (push "Error: stream ended without valid token found" *parsing-errors*)
  239. (string (char@))
  240. nil)
  241. (progn
  242. (setf *has-errors* t)
  243. (push (format nil
  244. "Error: stream ended without valid token found starting from ~s"
  245. (regex-scan *file*
  246. ,(format nil "(?s).{~a}"
  247. +peek-length-tokenizer-on-error+)
  248. :sticky t))
  249. *parsing-errors*)
  250. nil))
  251. nil)))))
  252. nil)))))
  253. (defmacro defnocfun (name args &body body)
  254. `(defun ,(alexandria:format-symbol t "~:@(~a~)" name) (,@args)
  255. ,@(let ((user-rest (eq (caar body) 'declare)))
  256. (append
  257. (if user-rest
  258. (list (car body))
  259. nil)
  260. (list
  261. `(when (peek-valid-stream)
  262. (parse-comment-line *file*)))
  263. (list
  264. (if user-rest
  265. `(progn ,@(rest body))
  266. `(progn ,@body)))))))