string-peg.scm 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. ;;;; string-peg.scm --- representing PEG grammars as strings
  2. ;;;;
  3. ;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
  4. ;;;;
  5. ;;;; This library is free software; you can redistribute it and/or
  6. ;;;; modify it under the terms of the GNU Lesser General Public
  7. ;;;; License as published by the Free Software Foundation; either
  8. ;;;; version 3 of the License, or (at your option) any later version.
  9. ;;;;
  10. ;;;; This library is distributed in the hope that it will be useful,
  11. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. ;;;; Lesser General Public License for more details.
  14. ;;;;
  15. ;;;; You should have received a copy of the GNU Lesser General Public
  16. ;;;; License along with this library; if not, write to the Free Software
  17. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. ;;;;
  19. (define-module (ice-9 peg string-peg)
  20. #:export (peg-as-peg
  21. define-peg-string-patterns
  22. peg-grammar)
  23. #:use-module (ice-9 peg using-parsers)
  24. #:use-module (ice-9 peg codegen)
  25. #:use-module (ice-9 peg simplify-tree))
  26. ;; Gets the left-hand depth of a list.
  27. (define (depth lst)
  28. (if (or (not (list? lst)) (null? lst))
  29. 0
  30. (+ 1 (depth (car lst)))))
  31. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  32. ;;;;; Parse string PEGs using sexp PEGs.
  33. ;; See the variable PEG-AS-PEG for an easier-to-read syntax.
  34. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  35. ;; Grammar for PEGs in PEG grammar.
  36. (define peg-as-peg
  37. "grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
  38. pattern <-- alternative (SLASH sp alternative)*
  39. alternative <-- ([!&]? sp suffix)+
  40. suffix <-- primary ([*+?] sp)*
  41. primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
  42. literal <-- ['] (!['] .)* ['] sp
  43. charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
  44. CCrange <-- . '-' .
  45. CCsingle <-- .
  46. nonterminal <-- [a-zA-Z0-9-]+ sp
  47. sp < [ \t\n]*
  48. SLASH < '/'
  49. LB < '['
  50. RB < ']'
  51. ")
  52. (define-syntax define-sexp-parser
  53. (lambda (x)
  54. (syntax-case x ()
  55. ((_ sym accum pat)
  56. (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
  57. (accumsym (syntax->datum #'accum))
  58. (syn (wrap-parser-for-users x matchf accumsym #'sym)))
  59. #`(define sym #,syn))))))
  60. (define-sexp-parser peg-grammar all
  61. (+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
  62. (define-sexp-parser peg-pattern all
  63. (and peg-alternative
  64. (* (and (ignore "/") peg-sp peg-alternative))))
  65. (define-sexp-parser peg-alternative all
  66. (+ (and (? (or "!" "&")) peg-sp peg-suffix)))
  67. (define-sexp-parser peg-suffix all
  68. (and peg-primary (* (and (or "*" "+" "?") peg-sp))))
  69. (define-sexp-parser peg-primary all
  70. (or (and "(" peg-sp peg-pattern ")" peg-sp)
  71. (and "." peg-sp)
  72. peg-literal
  73. peg-charclass
  74. (and peg-nonterminal (not-followed-by "<"))))
  75. (define-sexp-parser peg-literal all
  76. (and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
  77. (define-sexp-parser peg-charclass all
  78. (and (ignore "[")
  79. (* (and (not-followed-by "]")
  80. (or charclass-range charclass-single)))
  81. (ignore "]")
  82. peg-sp))
  83. (define-sexp-parser charclass-range all (and peg-any "-" peg-any))
  84. (define-sexp-parser charclass-single all peg-any)
  85. (define-sexp-parser peg-nonterminal all
  86. (and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
  87. (define-sexp-parser peg-sp none
  88. (* (or " " "\t" "\n")))
  89. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  90. ;;;;; PARSE STRING PEGS
  91. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  92. ;; Takes a string representing a PEG grammar and returns syntax that
  93. ;; will define all of the nonterminals in the grammar with equivalent
  94. ;; PEG s-expressions.
  95. (define (peg-parser str for-syntax)
  96. (let ((parsed (match-pattern peg-grammar str)))
  97. (if (not parsed)
  98. (begin
  99. ;; (display "Invalid PEG grammar!\n")
  100. #f)
  101. (let ((lst (peg:tree parsed)))
  102. (cond
  103. ((or (not (list? lst)) (null? lst))
  104. lst)
  105. ((eq? (car lst) 'peg-grammar)
  106. #`(begin
  107. #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
  108. (context-flatten (lambda (lst) (<= (depth lst) 2))
  109. (cdr lst))))))))))
  110. ;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
  111. ;; defines all the appropriate nonterminals.
  112. (define-syntax define-peg-string-patterns
  113. (lambda (x)
  114. (syntax-case x ()
  115. ((_ str)
  116. (peg-parser (syntax->datum #'str) x)))))
  117. ;; lst has format (nonterm grabber pattern), where
  118. ;; nonterm is a symbol (the name of the nonterminal),
  119. ;; grabber is a string (either "<", "<-" or "<--"), and
  120. ;; pattern is the parse of a PEG pattern expressed as as string.
  121. (define (peg-nonterm->defn lst for-syntax)
  122. (let* ((nonterm (car lst))
  123. (grabber (cadr lst))
  124. (pattern (caddr lst))
  125. (nonterm-name (datum->syntax for-syntax
  126. (string->symbol (cadr nonterm)))))
  127. #`(define-peg-pattern #,nonterm-name
  128. #,(cond
  129. ((string=? grabber "<--") (datum->syntax for-syntax 'all))
  130. ((string=? grabber "<-") (datum->syntax for-syntax 'body))
  131. (else (datum->syntax for-syntax 'none)))
  132. #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
  133. ;; lst has format ('peg-pattern ...).
  134. ;; After the context-flatten, (cdr lst) has format
  135. ;; (('peg-alternative ...) ...), where the outer list is a collection
  136. ;; of elements from a '/' alternative.
  137. (define (peg-pattern->defn lst for-syntax)
  138. #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
  139. (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
  140. (cdr lst)))))
  141. ;; lst has format ('peg-alternative ...).
  142. ;; After the context-flatten, (cdr lst) has the format
  143. ;; (item ...), where each item has format either ("!" ...), ("&" ...),
  144. ;; or ('peg-suffix ...).
  145. (define (peg-alternative->defn lst for-syntax)
  146. #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
  147. (context-flatten (lambda (x) (or (string? (car x))
  148. (eq? (car x) 'peg-suffix)))
  149. (cdr lst)))))
  150. ;; lst has the format either
  151. ;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
  152. ;; ('peg-suffix ...).
  153. (define (peg-body->defn lst for-syntax)
  154. (cond
  155. ((equal? (car lst) "&")
  156. #`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
  157. ((equal? (car lst) "!")
  158. #`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
  159. ((eq? (car lst) 'peg-suffix)
  160. (peg-suffix->defn lst for-syntax))
  161. (else `(peg-parse-body-fail ,lst))))
  162. ;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
  163. (define (peg-suffix->defn lst for-syntax)
  164. (let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
  165. (cond
  166. ((null? (cddr lst))
  167. inner-defn)
  168. ((equal? (caddr lst) "*")
  169. #`(* #,inner-defn))
  170. ((equal? (caddr lst) "?")
  171. #`(? #,inner-defn))
  172. ((equal? (caddr lst) "+")
  173. #`(+ #,inner-defn)))))
  174. ;; Parse a primary.
  175. (define (peg-primary->defn lst for-syntax)
  176. (let ((el (cadr lst)))
  177. (cond
  178. ((list? el)
  179. (cond
  180. ((eq? (car el) 'peg-literal)
  181. (peg-literal->defn el for-syntax))
  182. ((eq? (car el) 'peg-charclass)
  183. (peg-charclass->defn el for-syntax))
  184. ((eq? (car el) 'peg-nonterminal)
  185. (datum->syntax for-syntax (string->symbol (cadr el))))))
  186. ((string? el)
  187. (cond
  188. ((equal? el "(")
  189. (peg-pattern->defn (caddr lst) for-syntax))
  190. ((equal? el ".")
  191. (datum->syntax for-syntax 'peg-any))
  192. (else (datum->syntax for-syntax
  193. `(peg-parse-any unknown-string ,lst)))))
  194. (else (datum->syntax for-syntax
  195. `(peg-parse-any unknown-el ,lst))))))
  196. ;; Trims characters off the front and end of STR.
  197. ;; (trim-1chars "'ab'") -> "ab"
  198. (define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
  199. ;; Parses a literal.
  200. (define (peg-literal->defn lst for-syntax)
  201. (datum->syntax for-syntax (trim-1chars (cadr lst))))
  202. ;; Parses a charclass.
  203. (define (peg-charclass->defn lst for-syntax)
  204. #`(or
  205. #,@(map
  206. (lambda (cc)
  207. (cond
  208. ((eq? (car cc) 'charclass-range)
  209. #`(range #,(datum->syntax
  210. for-syntax
  211. (string-ref (cadr cc) 0))
  212. #,(datum->syntax
  213. for-syntax
  214. (string-ref (cadr cc) 2))))
  215. ((eq? (car cc) 'charclass-single)
  216. (datum->syntax for-syntax (cadr cc)))))
  217. (context-flatten
  218. (lambda (x) (or (eq? (car x) 'charclass-range)
  219. (eq? (car x) 'charclass-single)))
  220. (cdr lst)))))
  221. ;; Compresses a list to save the optimizer work.
  222. ;; e.g. (or (and a)) -> a
  223. (define (compressor-core lst)
  224. (if (or (not (list? lst)) (null? lst))
  225. lst
  226. (cond
  227. ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
  228. (null? (cddr lst)))
  229. (compressor-core (cadr lst)))
  230. ((and (eq? (car lst) 'body)
  231. (eq? (cadr lst) 'lit)
  232. (eq? (cadddr lst) 1))
  233. (compressor-core (caddr lst)))
  234. (else (map compressor-core lst)))))
  235. (define (compressor syn for-syntax)
  236. (datum->syntax for-syntax
  237. (compressor-core (syntax->datum syn))))
  238. ;; Builds a lambda-expressions for the pattern STR using accum.
  239. (define (peg-string-compile args accum)
  240. (syntax-case args ()
  241. ((str-stx) (string? (syntax->datum #'str-stx))
  242. (let ((string (syntax->datum #'str-stx)))
  243. (compile-peg-pattern
  244. (compressor
  245. (peg-pattern->defn
  246. (peg:tree (match-pattern peg-pattern string)) #'str-stx)
  247. #'str-stx)
  248. (if (eq? accum 'all) 'body accum))))
  249. (else (error "Bad embedded PEG string" args))))
  250. (add-peg-compiler! 'peg peg-string-compile)