123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649 |
- #lang racket
- (require rackunit)
- (define (safe-rest lst)
- (if (empty? lst)
- '()
- (rest lst)))
- (define (safe-first lst)
- (if (empty? lst)
- '()
- (first lst)))
- ; Parse-result = [Struct Boolean Syntaxtree [List of Some] Number]
- (struct parsing-success
- [syntax-tree
- chars
- pos]
- #:transparent)
- (struct parsing-fail
- [chars
- error-pos]
- #:transparent)
- (define (bind fun res)
- (match res
- [(parsing-success syntax-tree chars pos)
- (parsing-success (fun syntax-tree)
- chars
- pos)]
- [(parsing-fail _ _)
- res]
- [_
- (fun res)]))
- (define (binding fun)
- (lambda (res)
- (bind fun res)))
-
- ; Parser = (Chars Number -> Parse-success | Parse-fail)
- #|
- В Zig структура - это пространство имён. Это натолкнуло меня на мысль, что структура и локальная, лексическая область видимости - это понятия, между которыми можно найти много общего. Так, я использовал локальное окружение (надеюсь, я правильно употребил данное выражение), чтобы реализовать ООП на чистом Scheme.
- |#
- (define (parse parser str)
- (define res (parser (string->list str)
- 0))
- (parsing-success-syntax-tree res))
- (define (wrap-parsing-result result wrap-name)
- (if wrap-name
- (list wrap-name result)
- result))
- (define (none/p chars pos)
- (parsing-fail chars pos))
- #| Некрасивая функция, использовать не надо.
- (define (atom/cmb pred [wrap-name #f])
- (lambda (chars pos)
- (if (empty? chars)
- (parsing-fail chars pos)
- (let ([current (first chars)])
- (if (pred current)
- (parsing-success
- (wrap-parsing-result current wrap-name)
- (rest chars)
- (+ pos 1))
- (parsing-fail
- chars pos))))))
- |#
- (define (atom-if/cmb pred [wrap-name #f])
- (define (new-atom-if-parser chars pos)
- (match chars
- [(cons current rest-chars)
- (if (pred current)
- (parsing-success (wrap-parsing-result current wrap-name)
- rest-chars
- (+ pos 1))
-
- (parsing-fail chars
- pos))]
- [_
- (parsing-fail chars
- pos)]))
- new-atom-if-parser)
- (define (atom-of/cmb atom [wrap-name #f])
- (define (new-atom-of-parser chars pos)
- (match chars
- [(cons current rest-chars)
- (if (equal? atom current)
- (parsing-success (wrap-parsing-result current wrap-name)
- rest-chars
- (+ pos 1))
-
- (parsing-fail chars
- pos))]
- [_
- (parsing-fail chars
- pos)]))
- new-atom-of-parser)
-
- (define (many/cmb parser #:to-skip [to-skip none/p])
- (lambda (chars pos)
- (let loop ([chars chars]
- [pos pos]
- [fun-res '()])
- (define res (parser chars pos))
- (match res
- [(parsing-success syntax-tree
- rest-chars
- new-pos)
- (loop rest-chars
- new-pos
- (if (not-save-sym? syntax-tree)
- fun-res
- (cons syntax-tree fun-res)))]
-
- [(parsing-fail _ _)
- (match (to-skip chars pos)
- [(parsing-success _ rest-chars new-pos)
- (loop (rest chars)
- (+ pos 1)
- fun-res)]
- [_
- (parsing-success (reverse fun-res)
- chars
- pos)])]))))
-
- (define (many1/cmb parser #:to-skip [to-skip none/p])
- (lambda (chars pos)
- (define many-parser (many/cmb parser #:to-skip to-skip))
- (define result (many-parser chars pos))
- (match result
- [(parsing-success syntax-tree
- rest-chars
- new-pos)
-
- (if (empty? syntax-tree)
- (parsing-fail chars pos)
- result)]
- [_
- (parsing-fail chars pos)])))
- (define (seq/cmb #:to-skip [to-skip none/p] . parsers)
- (lambda (chars pos)
- (let loop ([parsers parsers]
- [chars chars]
- [pos pos]
- [res '()])
- (define parser (safe-first parsers))
- (define parsed (if (not (empty? parser))
- (parser chars pos)
- '()))
- (match parsed
- ['()
- (parsing-success (reverse res)
- chars
- pos)]
-
- [(parsing-success syntax-tree
- rest-chars
- new-pos)
- (loop (rest parsers)
- rest-chars
- new-pos
- (if (not-save-sym? syntax-tree)
- res
- (cons syntax-tree res)))]
-
- [(parsing-fail fail-chars fail-pos)
- (match (to-skip chars pos)
- [(parsing-success _ skip-chars skip-pos)
- (loop parsers
- skip-chars
- skip-pos
- res)]
- [(parsing-fail _ _)
- parsed])]))))
-
- (define (lazy/cmb parser)
- (define (new-lazy-parser chars pos)
- (parser chars pos))
- new-lazy-parser)
- (define (not-saving/cmb parser)
- (compose (binding (λ (x) 'not-save))
- parser))
- (define (not-save-sym? sym)
- (equal? 'not-save sym))
- (define (or/cmb . parsers)
- (define (new-or-parser chars pos)
- (let loop ([parsers parsers])
- (match parsers
- [(cons parser rest-parsers)
- (define res (parser chars pos))
- (match res
- [(parsing-fail _ _)
- (loop rest-parsers)]
- [(parsing-success _ _ _)
- res])]
- ['()
- (parsing-fail chars pos)])))
- new-or-parser)
- (define (opt/cmb parser #:alternative-value [alt-val 'not-save])
- (define (new-opt-parser chars pos)
- (define res (parser chars pos))
- (match res
- [(? parsing-success?)
- res]
- [(? parsing-fail?)
- (parsing-success alt-val chars pos)]))
- new-opt-parser)
- (define (give-name/cmb name parser)
- (compose (binding (λ (stx)
- (cons name stx)))
- parser))
- (define (wrap-name/cmb name parser)
- (compose (binding (λ (stx)
- (list name stx)))
- parser))
- (define (sep-by/cmb parser sep #:to-skip [to-skip none/p] #:save-sep? [save-sep? #f])
- (define new-parser
- (seq/cmb #:to-skip to-skip
- parser
- (many/cmb #:to-skip to-skip
- (seq/cmb #:to-skip to-skip
- (if save-sep?
- sep
- (not-saving/cmb sep))
- parser))))
- (define (new-sep-by-parser chars pos)
- (bind
- (λ (stx)
- (match stx
- [(list frst rst)
- (cons frst
- (apply append rst))]))
- (new-parser chars pos)))
- new-sep-by-parser)
- (define (replace/cmb parser to)
- (compose (binding (λ (stx) to))
- parser))
-
- (define digit/p
- (atom-if/cmb char-numeric?))
- (define (digit->integer digit)
- (if (char-numeric? digit)
- (- (char->integer digit)
- 48)
- (error 'not-a-digit)))
- (define (digits->integer digits)
- (let loop ([digits digits]
- [acc 0])
- (if (empty? digits)
- acc
- (loop (rest digits)
- (+ (* acc 10)
- (digit->integer (first digits)))))))
- (define integer/p
- (wrap-name/cmb
- 'integer
- (compose (binding digits->integer)
- (many1/cmb digit/p))))
- (define whitespaces/p
- (many1/cmb (atom-if/cmb char-whitespace?)))
- (define plus/p (atom-of/cmb #\+))
- (define star/p (atom-of/cmb #\*))
- (define minus/p (atom-of/cmb #\-))
- (define slash/p (atom-of/cmb #\/))
- (define (left-bracket/p chars pos)
- ((atom-of/cmb #\( )
- chars pos))
- (define (right-bracket/p chars pos)
- ((atom-of/cmb #\) )
- chars pos))
- #|
- (define (brackets/p chars pos)
- ;(println "brackets")
- ((compose (binding first)
- (seq/cmb #:to-skip whitespaces/p
- (not-saving/cmb left-bracket/p)
- expr/p
- (not-saving/cmb right-bracket/p)))
- chars pos))
- (define (math-atom/p chars pos)
- ;(println "math-atom")
- ((or/cmb integer/p
- brackets/p)
- chars pos))
- (define (op1/p chars pos)
- (define parser
- (seq/cmb #:to-skip whitespaces/p
- math-atom/p
-
- (or/cmb star/p
- div/p)
-
- (or/cmb op1/p
- math-atom/p)))
- (bind
- (λ (x)
- (match x
- [(list first-operand operator second-operand)
- (list operator first-operand second-operand)]))
- (parser chars pos)))
-
- (define (op2/p chars pos)
- (define parser
- (seq/cmb #:to-skip whitespaces/p
- (or/cmb
- op1/p
- math-atom/p)
- (or/cmb plus/p
- minus/p)
- (or/cmb op2/p
- op1/p
- math-atom/p)))
- (bind
- (λ (x)
- (match x
- [(list first-operand operator second-operand)
- (list operator first-operand second-operand)]))
- (parser chars pos)))
- (define (expr/p chars pos)
- ((or/cmb
- op2/p
- op1/p
- math-atom/p)
- chars pos))
- |#
- (define (infix->prefix infix-list)
- (match infix-list
- ['()
- '()]
- [(list expr)
- expr]
- [(cons op1 (cons oper (cons op2 rst)))
- (infix->prefix (cons (list oper op1 op2)
- rst))]))
-
- (define (op1/p chars pos)
- (define parser
- (sep-by/cmb #:to-skip whitespaces/p #:save-sep? #t
- math-atom/p
- (or/cmb (replace/cmb star/p 'mul-op)
- (replace/cmb slash/p 'div-op))))
-
- (define res (parser chars pos))
-
- (bind infix->prefix
- res))
- (define (op2/p chars pos)
- (define parser
- (sep-by/cmb #:to-skip whitespaces/p #:save-sep? #t
- op1/p
- (or/cmb (replace/cmb plus/p 'add-op)
- (replace/cmb minus/p 'sub-op))))
-
- (define res (parser chars pos))
-
- (bind infix->prefix
- res))
- (define (expr/p chars pos)
- ((or/cmb
- op2/p
- op1/p
- math-atom/p)
- chars pos))
- (define (brackets/p chars pos)
- ;(println "brackets")
- ((compose (binding first)
- (seq/cmb #:to-skip whitespaces/p
- (not-saving/cmb left-bracket/p)
- expr/p
- (not-saving/cmb right-bracket/p)))
- chars pos))
- (define (math-atom/p chars pos)
- ;(println "math-atom")
- ((or/cmb integer/p
- brackets/p)
- chars pos))
- (define (letter/p chars pos)
- ((atom-if/cmb char-alphabetic?)
- chars
- pos))
- (define (identifier/p chars pos)
- (define parser
- (seq/cmb
- letter/p
- (many/cmb
- (or/cmb letter/p
- digit/p))))
- (define res (parser chars pos))
- (bind (λ (stx)
- (list 'ident
- (list->string
- (cons (first stx)
- (second stx)))))
- res))
- (define (fun-call/p chars pos)
- (define parser
- (give-name/cmb 'fun-call
- (seq/cmb #:to-skip whitespaces/p
- identifier/p
- (not-saving/cmb left-bracket/p)
- (sep-by/cmb #:to-skip whitespaces/p
- expr/p
- (atom-of/cmb #\,)))))
- (parser chars pos))
-
-
- (define s->l string->list)
- (define (digit/p-test)
- (check-match (digit/p (s->l "123") 0)
- (parsing-success #\1 '(#\2 #\3) 1))
- (check-match (digit/p (s->l "abc") 0)
- (parsing-fail '(#\a #\b #\c) 0))
- (check-match (digit/p '() 0)
- (parsing-fail '() 0)))
- (define (integer/p-test)
- (check-match (integer/p (s->l "123") 0)
- (parsing-success '(integer 123)
- '()
- 3))
- (check-match (integer/p '() 0)
- (parsing-fail '() 0))
- (check-match (integer/p (s->l "abc123") 0)
- (parsing-fail '(#\a #\b #\c #\1 #\2 #\3)
- 0)))
- (define (atom-if-test)
- (define parser (atom-if/cmb char-numeric?))
- (check-match (parser '() 0)
- (parsing-fail '() 0))
-
- (check-match (parser (s->l "123")
- 0)
- (parsing-success #\1 '(#\2 #\3) 1))
- (check-match (parser (s->l "!123")
- 0)
- (parsing-fail '(#\! #\1 #\2 #\3) 0)))
- (define (atom-of-test)
- (define parser (atom-of/cmb #\+))
- (check-match (parser '() 0)
- (parsing-fail '() 0))
-
- (check-match (parser (s->l "+23")
- 0)
- (parsing-success #\+ '(#\2 #\3) 1))
- (check-match (parser (s->l "!123")
- 0)
- (parsing-fail '(#\! #\1 #\2 #\3) 0)))
- (define (many-test)
- (define parser (many/cmb (atom-if/cmb char-numeric?)))
- (check-match (parser '() 0)
- (parsing-success '() '() 0))
-
- (check-match (parser (s->l "123a") 0)
- (parsing-success '(#\1 #\2 #\3) '(#\a) 3))
-
- (check-match (parser (s->l "abc") 0)
- (parsing-success '() '(#\a #\b #\c) 0)))
- (define (many1-test)
- (define parser (many1/cmb (atom-if/cmb char-numeric?)))
- (check-match (parser '() 0)
- (parsing-fail '() 0))
- (check-match (parser (s->l "123a") 0)
- (parsing-success '(#\1 #\2 #\3) '(#\a) 3))
- (check-match (parser (s->l "abc") 0)
- (parsing-fail '(#\a #\b #\c) 0)))
- (define (seq-test) ; Нужно выяснить, что должно возвращаться в parsing-fail - позиция, с которой начинался парсинг,
- (define parser (seq/cmb integer/p ; или та, на которой произошла ошибка. Выяснил. Кажется, позиция ошибки.
- (atom-of/cmb #\.)
- integer/p))
-
- (check-match (parser (s->l "123+4") 0)
- (parsing-fail '(#\+ #\4)
- 3))
- (check-match (parser (s->l "123.45") 0)
- (parsing-success '((integer 123)
- #\.
- (integer 45))
- '()
- 6)))
- (define (skip-test)
- (define parser-many (many/cmb digit/p #:to-skip whitespaces/p))
- (check-match (parser-many (s->l "123 456")
- 0)
- (parsing-success '(#\1 #\2 #\3 #\4 #\5 #\6)
- '()
- 8))
- (define parser-seq (seq/cmb #:to-skip integer/p
- (many1/cmb (atom-if/cmb char-alphabetic?))
- (atom-of/cmb #\+)
- (many1/cmb (atom-if/cmb char-alphabetic?))))
- (check-match (parser-seq (s->l "123abc+123abc123")
- 0)
- (parsing-success '((#\a #\b #\c)
- #\+
- (#\a #\b #\c))
- '(#\1 #\2 #\3)
- 13)))
- (define (opt-test)
- (define parser (opt/cmb (atom-if/cmb char-numeric?)
- #:alternative-value 'not-save))
- (check-match (parser (s->l "1234")
- 0)
- (parsing-success #\1
- '(#\2 #\3 #\4)
- 1))
- (check-match (parser (s->l "a1234")
- 0)
- (parsing-success 'not-save
- '(#\a #\1 #\2 #\3 #\4)
- 0)))
- (define (not-saving-test)
- (define parser (not-saving/cmb digit/p))
- (check-match (parser (s->l "123")
- 0)
- (parsing-success 'not-save
- '(#\2 #\3)
- 1))
- (check-match (parser (s->l "abc")
- 0)
- (parsing-fail '(#\a #\b #\c)
- 0))
- (define parser-2 (many/cmb (not-saving/cmb digit/p)))
- (check-match (parser-2 (s->l "123")
- 0)
- (parsing-success '()
- '()
- 3))
- (define parser-3 (seq/cmb integer/p
- (not-saving/cmb plus/p)
- integer/p))
- (check-match (parser-3 (s->l "1+3")
- 0)
- (parsing-success '((integer 1)
- (integer 3))
- '()
- 3)))
- (define (sep-by-test)
- (define parser (sep-by/cmb integer/p (atom-of/cmb #\,)))
- (check-match (parser (s->l "1,2,3")
- 0)
- (parsing-success '((integer 1)
- (integer 2)
- (integer 3))
- '()
- 5))
- (check-match (parser (s->l "1,2,3,")
- 0)
- (parsing-success '((integer 1)
- (integer 2)
- (integer 3))
- '(#\,)
- 5))
- (define parser-2
- (sep-by/cmb #:to-skip whitespaces/p
- integer/p
- plus/p))
- (check-match (parser-2 (s->l "1 + 2 + 3 4")
- 0)
- (parsing-success '((integer 1)
- (integer 2)
- (integer 3))
- '(#\4) ; Внимание! Тут съедаются пробелы в конце.
- 9)))
- (define (test-all)
- (atom-if-test)
- (atom-of-test)
- (digit/p-test)
- (many-test)
- (many1-test)
- (integer/p-test)
- (seq-test)
- (skip-test)
- (opt-test)
- (not-saving-test)
- (sep-by-test))
- (define (parse-tr parser str)
- (parser (string->list str) 0))
- (define (string-mul string num)
- (apply string-append
- (for/list ([i (in-range 0 num)])
- string)))
- (define (put-in-brackets str)
- (string-append "(" str ")"))
- (define test-string
- (string-append (string-mul "(2 + 5 / (6 + 4)) + 4 + 6 + 9 * 7 * 8 +" 1000)
- "5 * 4 - 16"))
|