123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274 |
- ;;;; string-peg.scm --- representing PEG grammars as strings
- ;;;;
- ;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
- ;;;;
- ;;;; This library is free software; you can redistribute it and/or
- ;;;; modify it under the terms of the GNU Lesser General Public
- ;;;; License as published by the Free Software Foundation; either
- ;;;; version 3 of the License, or (at your option) any later version.
- ;;;;
- ;;;; This library is distributed in the hope that it will be useful,
- ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;;;; Lesser General Public License for more details.
- ;;;;
- ;;;; You should have received a copy of the GNU Lesser General Public
- ;;;; License along with this library; if not, write to the Free Software
- ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ;;;;
- (define-module (ice-9 peg string-peg)
- #:export (peg-as-peg
- define-peg-string-patterns
- peg-grammar)
- #:use-module (ice-9 peg using-parsers)
- #:use-module (ice-9 peg codegen)
- #:use-module (ice-9 peg simplify-tree))
- ;; Gets the left-hand depth of a list.
- (define (depth lst)
- (if (or (not (list? lst)) (null? lst))
- 0
- (+ 1 (depth (car lst)))))
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;;; Parse string PEGs using sexp PEGs.
- ;; See the variable PEG-AS-PEG for an easier-to-read syntax.
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; Grammar for PEGs in PEG grammar.
- (define peg-as-peg
- "grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
- pattern <-- alternative (SLASH sp alternative)*
- alternative <-- ([!&]? sp suffix)+
- suffix <-- primary ([*+?] sp)*
- primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
- literal <-- ['] (!['] .)* ['] sp
- charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
- CCrange <-- . '-' .
- CCsingle <-- .
- nonterminal <-- [a-zA-Z0-9-]+ sp
- sp < [ \t\n]*
- SLASH < '/'
- LB < '['
- RB < ']'
- ")
- (define-syntax define-sexp-parser
- (lambda (x)
- (syntax-case x ()
- ((_ sym accum pat)
- (let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
- (accumsym (syntax->datum #'accum))
- (syn (wrap-parser-for-users x matchf accumsym #'sym)))
- #`(define sym #,syn))))))
- (define-sexp-parser peg-grammar all
- (+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
- (define-sexp-parser peg-pattern all
- (and peg-alternative
- (* (and (ignore "/") peg-sp peg-alternative))))
- (define-sexp-parser peg-alternative all
- (+ (and (? (or "!" "&")) peg-sp peg-suffix)))
- (define-sexp-parser peg-suffix all
- (and peg-primary (* (and (or "*" "+" "?") peg-sp))))
- (define-sexp-parser peg-primary all
- (or (and "(" peg-sp peg-pattern ")" peg-sp)
- (and "." peg-sp)
- peg-literal
- peg-charclass
- (and peg-nonterminal (not-followed-by "<"))))
- (define-sexp-parser peg-literal all
- (and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
- (define-sexp-parser peg-charclass all
- (and (ignore "[")
- (* (and (not-followed-by "]")
- (or charclass-range charclass-single)))
- (ignore "]")
- peg-sp))
- (define-sexp-parser charclass-range all (and peg-any "-" peg-any))
- (define-sexp-parser charclass-single all peg-any)
- (define-sexp-parser peg-nonterminal all
- (and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
- (define-sexp-parser peg-sp none
- (* (or " " "\t" "\n")))
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;;;; PARSE STRING PEGS
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;; Takes a string representing a PEG grammar and returns syntax that
- ;; will define all of the nonterminals in the grammar with equivalent
- ;; PEG s-expressions.
- (define (peg-parser str for-syntax)
- (let ((parsed (match-pattern peg-grammar str)))
- (if (not parsed)
- (begin
- ;; (display "Invalid PEG grammar!\n")
- #f)
- (let ((lst (peg:tree parsed)))
- (cond
- ((or (not (list? lst)) (null? lst))
- lst)
- ((eq? (car lst) 'peg-grammar)
- #`(begin
- #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
- (context-flatten (lambda (lst) (<= (depth lst) 2))
- (cdr lst))))))))))
- ;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
- ;; defines all the appropriate nonterminals.
- (define-syntax define-peg-string-patterns
- (lambda (x)
- (syntax-case x ()
- ((_ str)
- (peg-parser (syntax->datum #'str) x)))))
- ;; lst has format (nonterm grabber pattern), where
- ;; nonterm is a symbol (the name of the nonterminal),
- ;; grabber is a string (either "<", "<-" or "<--"), and
- ;; pattern is the parse of a PEG pattern expressed as as string.
- (define (peg-nonterm->defn lst for-syntax)
- (let* ((nonterm (car lst))
- (grabber (cadr lst))
- (pattern (caddr lst))
- (nonterm-name (datum->syntax for-syntax
- (string->symbol (cadr nonterm)))))
- #`(define-peg-pattern #,nonterm-name
- #,(cond
- ((string=? grabber "<--") (datum->syntax for-syntax 'all))
- ((string=? grabber "<-") (datum->syntax for-syntax 'body))
- (else (datum->syntax for-syntax 'none)))
- #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
- ;; lst has format ('peg-pattern ...).
- ;; After the context-flatten, (cdr lst) has format
- ;; (('peg-alternative ...) ...), where the outer list is a collection
- ;; of elements from a '/' alternative.
- (define (peg-pattern->defn lst for-syntax)
- #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
- (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
- (cdr lst)))))
- ;; lst has format ('peg-alternative ...).
- ;; After the context-flatten, (cdr lst) has the format
- ;; (item ...), where each item has format either ("!" ...), ("&" ...),
- ;; or ('peg-suffix ...).
- (define (peg-alternative->defn lst for-syntax)
- #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
- (context-flatten (lambda (x) (or (string? (car x))
- (eq? (car x) 'peg-suffix)))
- (cdr lst)))))
- ;; lst has the format either
- ;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
- ;; ('peg-suffix ...).
- (define (peg-body->defn lst for-syntax)
- (cond
- ((equal? (car lst) "&")
- #`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
- ((equal? (car lst) "!")
- #`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
- ((eq? (car lst) 'peg-suffix)
- (peg-suffix->defn lst for-syntax))
- (else `(peg-parse-body-fail ,lst))))
- ;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
- (define (peg-suffix->defn lst for-syntax)
- (let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
- (cond
- ((null? (cddr lst))
- inner-defn)
- ((equal? (caddr lst) "*")
- #`(* #,inner-defn))
- ((equal? (caddr lst) "?")
- #`(? #,inner-defn))
- ((equal? (caddr lst) "+")
- #`(+ #,inner-defn)))))
- ;; Parse a primary.
- (define (peg-primary->defn lst for-syntax)
- (let ((el (cadr lst)))
- (cond
- ((list? el)
- (cond
- ((eq? (car el) 'peg-literal)
- (peg-literal->defn el for-syntax))
- ((eq? (car el) 'peg-charclass)
- (peg-charclass->defn el for-syntax))
- ((eq? (car el) 'peg-nonterminal)
- (datum->syntax for-syntax (string->symbol (cadr el))))))
- ((string? el)
- (cond
- ((equal? el "(")
- (peg-pattern->defn (caddr lst) for-syntax))
- ((equal? el ".")
- (datum->syntax for-syntax 'peg-any))
- (else (datum->syntax for-syntax
- `(peg-parse-any unknown-string ,lst)))))
- (else (datum->syntax for-syntax
- `(peg-parse-any unknown-el ,lst))))))
- ;; Trims characters off the front and end of STR.
- ;; (trim-1chars "'ab'") -> "ab"
- (define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
- ;; Parses a literal.
- (define (peg-literal->defn lst for-syntax)
- (datum->syntax for-syntax (trim-1chars (cadr lst))))
- ;; Parses a charclass.
- (define (peg-charclass->defn lst for-syntax)
- #`(or
- #,@(map
- (lambda (cc)
- (cond
- ((eq? (car cc) 'charclass-range)
- #`(range #,(datum->syntax
- for-syntax
- (string-ref (cadr cc) 0))
- #,(datum->syntax
- for-syntax
- (string-ref (cadr cc) 2))))
- ((eq? (car cc) 'charclass-single)
- (datum->syntax for-syntax (cadr cc)))))
- (context-flatten
- (lambda (x) (or (eq? (car x) 'charclass-range)
- (eq? (car x) 'charclass-single)))
- (cdr lst)))))
- ;; Compresses a list to save the optimizer work.
- ;; e.g. (or (and a)) -> a
- (define (compressor-core lst)
- (if (or (not (list? lst)) (null? lst))
- lst
- (cond
- ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
- (null? (cddr lst)))
- (compressor-core (cadr lst)))
- ((and (eq? (car lst) 'body)
- (eq? (cadr lst) 'lit)
- (eq? (cadddr lst) 1))
- (compressor-core (caddr lst)))
- (else (map compressor-core lst)))))
- (define (compressor syn for-syntax)
- (datum->syntax for-syntax
- (compressor-core (syntax->datum syn))))
- ;; Builds a lambda-expressions for the pattern STR using accum.
- (define (peg-string-compile args accum)
- (syntax-case args ()
- ((str-stx) (string? (syntax->datum #'str-stx))
- (let ((string (syntax->datum #'str-stx)))
- (compile-peg-pattern
- (compressor
- (peg-pattern->defn
- (peg:tree (match-pattern peg-pattern string)) #'str-stx)
- #'str-stx)
- (if (eq? accum 'all) 'body accum))))
- (else (error "Bad embedded PEG string" args))))
- (add-peg-compiler! 'peg peg-string-compile)
|