123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964 |
- (define-record-type regexp :regexp
- (make-regexp type compiled field0 field1 field2)
- regexp?
- (type regexp-type)
- (compiled regexp-compiled set-regexp-compiled!)
- (field0 regexp-field0)
- (field1 regexp-field1)
- (field2 regexp-field2 set-regexp-field2!))
- (define-record-discloser :regexp
- (lambda (regexp)
- (list 'regexp (regexp-type regexp))))
- (define-syntax define-regexp-type
- (syntax-rules ()
- ((define-regexp-type name maker predicate)
- (begin
- (define (maker) (make-regexp 'name #f #f #f #f))
- (define (predicate regexp)
- (eq? (regexp-type regexp) 'name))))
- ((define-regexp-type name maker predicate slot0)
- (begin
- (define (maker x)
- (make-regexp 'name #f x #f #f))
- (define (predicate regexp)
- (eq? (regexp-type regexp) 'name))
- (define slot0 regexp-field0)))
- ((define-regexp-type name maker predicate slot0 slot1)
- (begin
- (define (maker x y) (make-regexp 'name #f x y #f))
- (define (predicate regexp) (eq? (regexp-type regexp) 'name))
- (define slot0 regexp-field0)
- (define slot1 regexp-field1)))
- ((define-regexp-type name maker predicate slot0 slot1 slot2)
- (begin
- (define (maker x y z) (make-regexp 'name #f x y z))
- (define (predicate regexp) (eq? (regexp-type regexp) 'name))
- (define slot0 regexp-field0)
- (define slot1 regexp-field1)
- (define slot2 regexp-field2)))))
-
- (define-regexp-type set really-make-set set?
- set-use-case
- set-no-case
- set-string)
- (define set-set-string! set-regexp-field2!)
- (define (make-set case no-case)
- (really-make-set case no-case #f))
- (define the-empty-set
- (really-make-set 0 0 #f))
- (define (empty-set? set)
- (and (set? set)
- (= 0 (set-use-case set))
- (= 0 (set-no-case set))))
- (define (char->mask char)
- (arithmetic-shift 1 (char->scalar-value char)))
- (define (char-in-set? char set)
- (not (zero? (bitwise-and (set-use-case set)
- (char->mask char)))))
- (define char-limit 256)
- (define no-case-char-masks
- (reduce ((count* i 0 char-limit))
- ((masks '()))
- (cons (let ((ch (scalar-value->char i)))
- (bitwise-ior (arithmetic-shift 1 i)
- (cond ((and (< i 128)
- (char-upper-case? ch))
- (char->mask (char-downcase ch)))
- ((and (< i 128)
- (char-lower-case? ch))
- (char->mask (char-upcase ch)))
- (else
- 0))))
- masks)
- (list->vector (reverse masks))))
- (define singleton-sets
- (reduce ((count* i 0 char-limit))
- ((sets '()))
- (cons (make-set (arithmetic-shift 1 i)
- (vector-ref no-case-char-masks i))
- sets)
- (list->vector (reverse sets))))
- (define (char->set char)
- (vector-ref singleton-sets (char->scalar-value char)))
- (define (set . all-args)
- (if (and (= 1 (length all-args))
- (char? (car all-args)))
- (char->set (car all-args))
- (reduce ((list* arg all-args))
- ((case 0)
- (no-case 0))
- (cond ((char? arg)
- (add-char-masks arg case no-case))
- ((string? arg)
- (add-string-masks arg case no-case))
- (else
- (apply assertion-violation 'set "invalid argument" all-args)))
- (make-set case no-case))))
-
- (define (add-char-masks char case no-case)
- (values (bitwise-ior case (char->mask char))
- (bitwise-ior no-case
- (vector-ref no-case-char-masks
- (char->scalar-value char)))))
-
- (define (add-string-masks string case no-case)
- (reduce ((string* char string))
- ((case case)
- (no-case no-case))
- (add-char-masks char case no-case)))
- (define (range low high)
- (or (real-ranges `(,low ,high) char->integer integer->scalar-value)
- (assertion-violation 'range "invalid argument" low high)))
- (define (ranges . limits)
- (or (real-ranges limits char->integer integer->scalar-value)
- (apply assertion-violation 'ranges "invalid argument" limits)))
- (define (ascii-range low high)
- (or (real-ranges `(,low ,high) char->ascii identity)
- (assertion-violation 'ascii-range "invalid argument" low high)))
- (define (ascii-ranges . limits)
- (or (real-ranges limits char->ascii identity)
- (apply assertion-violation 'ascii-ranges "invalid argument" limits)))
- (define (integer->scalar-value i)
- (char->scalar-value (integer->char i)))
- (define (identity i)
- i)
- (define (real-ranges limits char->int int->scalar-value)
- (if (every char? limits)
- (let loop ((to-do limits) (case 0) (no-case 0))
- (cond ((null? to-do)
- (make-set case no-case))
- ((null? (cdr to-do))
- #f)
- (else
- (let ((start (char->int (car to-do)))
- (end (char->int (cadr to-do))))
- (if (< end start)
- #f
- (reduce ((count* i start (+ end 1)))
- ((case case)
- (no-case no-case))
- (let ((scalar-value (int->scalar-value i)))
- (values (bitwise-ior case
- (arithmetic-shift 1 scalar-value))
- (bitwise-ior no-case
- (vector-ref no-case-char-masks
- scalar-value))))
- (loop (cddr to-do) case no-case)))))))
- #f))
-
-
- (define all-chars (- (arithmetic-shift 1 char-limit) 1))
- (define all-chars-except-nul (- all-chars 1))
- (define (negate set)
- (make-set (bitwise-xor all-chars-except-nul (set-use-case set))
- (bitwise-xor all-chars-except-nul (set-no-case set))))
- (define (set-binop op)
- (lambda (set1 set2)
- (make-set (op (set-use-case set1)
- (set-use-case set2))
- (op (set-no-case set1)
- (set-no-case set2)))))
- (define intersection (set-binop bitwise-and))
- (define union (set-binop bitwise-ior))
- (define subtract
- (set-binop (lambda (x y)
- (bitwise-xor x
- (bitwise-and x y)))))
- (define lower-case (range #\a #\z))
- (define upper-case (range #\A #\Z))
- (define alphabetic (union lower-case upper-case))
- (define numeric (range #\0 #\9))
- (define alphanumeric (union alphabetic numeric))
- (define punctuation (set "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"))
- (define blank (set #\space (scalar-value->char 9)))
- (define graphic (union alphanumeric punctuation))
- (define whitespace (apply set (map scalar-value->char
- '(32 ;space
- 9 ;tab
- 10 ;newline
- 11 ;vertical tab
- 12 ;form feed
- 13))))
- (define printing (union graphic (set #\space)))
- (define control (negate printing))
- (define hexdigit (union numeric (ranges #\a #\f #\A #\F)))
- (define-regexp-type string-start make-string-start string-start?)
- (define-regexp-type string-end make-string-end string-end?)
- (define the-string-start (make-string-start))
- (define the-string-end (make-string-end))
- (define (string-start)
- the-string-start)
- (define (string-end)
- the-string-end)
- (define-regexp-type submatch make-submatch submatch?
- submatch-exp
- submatch-id)
- (define (submatch id exp)
- (cond ((not (regexp? exp))
- (assertion-violation 'submatch "invalid argument" exp))
- ((empty-set? exp)
- the-empty-set)
- (else
- (make-submatch exp id))))
- (define-regexp-type sequence make-sequence sequence?
- sequence-exps)
- (define epsilon (make-sequence '()))
- (define (epsilon? x)
- (and (sequence? x)
- (null? (sequence-exps x))))
- (define (sequence . all-exps)
- (let loop ((exps all-exps) (res '()))
- (if (null? exps)
- (cond ((null? res)
- epsilon)
- ((null? (cdr res))
- (car res))
- (else
- (make-sequence (reverse res))))
- (let ((exp (car exps)))
- (cond ((not (regexp? exp))
- (apply assertion-violation 'sequence "invalid argument" all-exps))
- ((empty-set? exp)
- the-empty-set)
- ((sequence? exp)
- (loop (append (sequence-exps exp) (cdr exps)) res))
- (else
- (loop (cdr exps) (cons exp res))))))))
- (define-regexp-type one-of make-one-of one-of?
- one-of-exps)
- (define (one-of . all-exps)
- (let loop ((exps all-exps) (res '()) (set the-empty-set))
- (if (null? exps)
- (cond ((null? res)
- set)
- ((and (null? (cdr res))
- (empty-set? set))
- (car res))
- (else
- (make-one-of (if (empty-set? set)
- (reverse res)
- (cons set (reverse res))))))
- (let ((exp (car exps)))
- (cond ((not (regexp? exp))
- (apply assertion-violation 'one-of "invalid argument" all-exps))
- ((empty-set? exp)
- (loop (cdr exps) res set))
- ((set? exp)
- (loop (cdr exps) res (union exp set)))
- ((one-of? exp)
- (loop (append (one-of-exps exp) (cdr exps)) res set))
- (else
- (loop (cdr exps) (cons exp res) set)))))))
- (define (text string)
- (if (string? string)
- (apply sequence (map char->set (string->list string)))
- (assertion-violation 'text "invalid argument" string)))
- (define-regexp-type repeat really-make-repeat repeat?
- repeat-low
- repeat-high
- repeat-exp)
- (define (make-repeat low high exp)
- (cond ((not (and (integer? low)
- (<= 0 low)
- (regexp? exp)
- (or (not high)
- (and (integer? high)
- (<= low high)))))
- (assertion-violation 'make-repeat "invalid argument" low high exp))
- ((or (epsilon? exp)
- (and high
- (= low high 0)))
- epsilon)
- ((and high
- (= low high 1))
- exp)
- ((empty-set? exp)
- (if (and (= low 0)
- (or (not high)
- (= high 0)))
- epsilon
- the-empty-set))
- (else
- (really-make-repeat low high exp))))
- (define (repeat . stuff)
- (case (length stuff)
- ((0)
- (assertion-violation 'repeat "invalid argument" repeat))
- ((1)
- (make-repeat 0 #f (car stuff)))
- ((2)
- (make-repeat (car stuff) (car stuff) (cadr stuff)))
- ((3)
- (apply make-repeat stuff))
- (else
- (apply assertion-violation 'repeat "invalid argument" stuff))))
- (define (ignore-case exp)
- (start-expression-map ignore-case 'no-case exp))
- (define (use-case exp)
- (start-expression-map use-case 'use-case exp))
- (define (no-submatches exp)
- (start-expression-map no-submatches 'no-submatch exp))
- (define (start-expression-map proc function exp)
- (if (regexp? exp)
- (or (expression-map function exp)
- exp)
- (assertion-violation 'start-expression-map "invalid argument" proc exp)))
- (define (expression-map function exp)
- (let recur ((exp exp))
- (cond ((set? exp)
- (if (or (eq? function 'no-submatch)
- (= (set-use-case exp)
- (set-no-case exp)))
- #f
- (let ((chars (if (eq? function 'no-case)
- (set-no-case exp)
- (set-use-case exp))))
- (make-set chars chars))))
- ((submatch? exp)
- (let ((sub (submatch-exp exp)))
- (if (eq? function 'no-submatch)
- (or (recur sub)
- sub)
- (let ((new (recur sub)))
- (if new
- (submatch (submatch-id exp) new)
- #f)))))
- ((sequence? exp)
- (let ((exps (expression-list-map function (sequence-exps exp))))
- (if exps
- (make-sequence exps)
- #f)))
- ((one-of? exp)
- (let ((exps (expression-list-map function (one-of-exps exp))))
- (if exps
- (make-one-of exps)
- #f)))
- ((repeat? exp)
- (let ((new (recur (repeat-exp exp))))
- (if new
- (make-repeat (repeat-low exp)
- (repeat-high exp)
- new)
- #f)))
- ((or (string-start? exp)
- (string-end? exp))
- exp)
- (else
- (assertion-violation 'expression-map "got a non-expression" exp)))))
- (define (expression-list-map function exps)
- (let recur ((exps exps))
- (if (null? exps)
- #f
- (let ((new (expression-map function (car exps)))
- (more (recur (cdr exps))))
- (if (or new more)
- (cons (or new (car exps))
- (or more (cdr exps)))
- #f)))))
- (define (exp->posix-string exp)
- (cond ((not (regexp? exp))
- (assertion-violation 'exp->posix-string "invalid argument" exp))
- ((empty-set? exp)
- (assertion-violation 'exp->posix-string "no Posix string for the empty set" exp))
- (else
- (reduce ((list* elt (exp->strings exp '())))
- ((strings '())
- (parens '()))
- (cond ((not elt)
- (values (cons "(" strings) (cons #f parens)))
- ((pair? elt)
- (values (cons "(" strings) (cons elt parens)))
- (else
- (values (cons elt strings) parens)))
- (values (apply string-append (reverse strings))
- (reverse parens))))))
- (define (exp->strings exp tail)
- (cond ((set? exp)
- (cons (set->posix-string exp) tail))
- ((submatch? exp)
- `((,(submatch-id exp))
- ,@(exp->strings (submatch-exp exp) '())
- ")"
- . ,tail))
- ((one-of? exp)
- (one-of-strings (one-of-exps exp) tail))
- ((sequence? exp)
- (sequence-strings (sequence-exps exp) tail))
- ((repeat? exp)
- (repetition-strings (repeat-low exp)
- (repeat-high exp)
- (repeat-exp exp)
- tail))
- ((string-start? exp)
- (cons "^" tail))
- ((string-end? exp)
- (cons "$" tail))
- (else
- (assertion-violation 'exp->strings "bad expression" exp))))
- (define (exp->parenthesized-strings exp tail)
- (cons #f (exp->strings exp (cons ")" tail))))
- (define (one-of-strings exps tail)
- (let ((exps (reverse exps)))
- (reduce ((list* exp (cdr exps)))
- ((res (exp->strings (car exps) tail)))
- (exp->strings exp (cons "|" res)))))
- (define (sequence-strings exps tail)
- (if (null? exps)
- `(#f ")" . ,tail)
- (reduce ((list* exp (reverse exps)))
- ((strings tail))
- (if (one-of? exp)
- (exp->parenthesized-strings exp strings)
- (exp->strings exp strings)))))
- (define (repetition-strings low high subexp tail)
- (let ((tail `("{"
- ,(number->string low)
- ,@(cond ((not high)
- (list ","))
- ((= low high)
- '())
- (else
- (list "," (number->string high))))
- "}"
- . ,tail)))
- (if (or (one-of? subexp)
- (sequence? subexp))
- (exp->parenthesized-strings subexp tail)
- (exp->strings subexp tail))))
- (define (set->posix-string set)
- (or (set-string set)
- (let ((string (bit-set->posix-string (set-use-case set))))
- (set-set-string! set string)
- string)))
- (define (bit-set->posix-string bit-set)
- (cond ((= bit-set 0)
- (assertion-violation 'bit-set->posix-string "trying to convert the empty set"))
- ((= (bitwise-and bit-set all-chars)
- all-chars)
- ".")
- (else
- (let* ((string (maybe-bit-set->string bit-set)))
- (if (string? string)
- (string-append "["
- (if (char? string)
- (list->string (list string))
- string)
- "]")
- (char->posix-string string))))))
- (define dash-hat (bitwise-ior (char->mask #\-)
- (char->mask #\^)))
- (define funny-char-list
- (map (lambda (char)
- (let ((mask (char->mask char)))
- (list mask
- char
- (bitwise-ior mask
- (arithmetic-shift mask -1)
- (arithmetic-shift mask 1)
- (cond ((eq? char #\^)
- (char->mask #\\))
- ((eq? char #\])
- (char->mask #\_))
- (else
- 0))))))
- '(#\[ #\^ #\- #\])))
- (define (maybe-bit-set->string bit-set)
- (cond ((= bit-set dash-hat)
- "-^")
- ((assoc bit-set funny-char-list)
- => cadr)
- (else
- (reduce ((list* info funny-char-list))
- ((bit-set bit-set)
- (funny-chars '()))
- (if (or (= 0
- (bitwise-and bit-set
- (car info)))
- (= (caddr info)
- (bitwise-and bit-set
- (caddr info))))
- (values bit-set
- funny-chars)
- (values (bitwise-xor bit-set (car info))
- (cons (cadr info) funny-chars)))
- (let ((chars (bit-set->chars bit-set)))
- (char-list->string (if (and (not (null? funny-chars))
- (eq? (car funny-chars) #\]))
- (append (cdr funny-chars)
- (reverse (cons #\] chars)))
- (append funny-chars
- (reverse chars)))))))))
- (define (bit-set->chars bit-set)
- (reduce ((bits* chunk bit-set 16)
- (count* i 0 -1 16))
- ((ranges '()))
- (if (= 0 chunk)
- ranges
- (small-bit-set->ranges chunk i ranges))
- (reduce ((list* range (cdr ranges)))
- ((done (list (car ranges))))
- (if (= (+ (cdr range) 1)
- (caar done))
- (cons (cons (car range) (cdar done))
- (cdr done))
- (cons range done)))))
- (define (small-bit-set->ranges bit-set i result)
- (let loop ((bit-set bit-set) (i i))
- (cond ((= 0 bit-set)
- result)
- ((odd? bit-set)
- (let range-loop ((bit-set (shift-down bit-set)) (j (+ i 1)))
- (if (odd? bit-set)
- (range-loop (shift-down bit-set) (+ j 1))
- (small-bit-set->ranges (shift-down bit-set)
- (+ j 1)
- (cons (cons i (- j 1))
- result)))))
- (else
- (loop (shift-down bit-set) (+ i 1))))))
- (define (shift-down n)
- (arithmetic-shift n -1))
- (define (char-list->string ranges)
- (if (and (null? (cdr ranges))
- (= (caar ranges)
- (cdar ranges)))
- (string (scalar-value->char (caar ranges)))
- (reduce ((list* range ranges))
- ((res '()))
- (if (char? range)
- (cons range res)
- (let ((first (scalar-value->char (car range)))
- (last (scalar-value->char (cdr range))))
- (case (- (cdr range) (car range))
- ((0)
- (cons first res))
- ((1)
- (cons first (cons last res)))
- (else
- `(,first #\- ,last . ,res)))))
- (list->string res))))
- (define special-char-set
- (set-use-case (set "[.*?()|\\$^+")))
- (define (char->posix-string char)
- (cond ((char=? char #\{)
- "[{]")
- ((= 0 (bitwise-and (char->mask char)
- special-char-set))
- (string char))
- (else
- (string #\\ char))))
- (define (any-match? exp string)
- (if (and (regexp? exp)
- (string? string))
- (if (empty-set? exp)
- #f
- (regexp-match (car (compile-exp exp)) string 0 #f #t #t))
- (assertion-violation 'any-match? "invalid argument" exp string)))
-
- (define (exact-match? exp string)
- (if (and (regexp? exp)
- (string? string))
- (if (empty-set? exp)
- #f
- (let ((matches (regexp-match (car (compile-exp exp)) string 0 #t #t #t)))
- (and matches
- (= 0 (match-start (car matches)))
- (= (string-length string) (match-end (car matches))))))
- (assertion-violation 'exact-match? "invalid argument" exp string)))
- (define (match exp string)
- (if (and (regexp? exp)
- (string? string))
- (let* ((pair (compile-exp exp))
- (regexp (car pair))
- (match-flags (cdr pair))
- (matches (regexp-match regexp string 0 #t #t #t)))
- (if matches
- (reduce ((list% match (cdr matches))
- (list% flag match-flags))
- ((submatches '()))
- (if (and flag match)
- (cons (cons (car flag)
- match)
- submatches)
- submatches)
- (make-match (match-start (car matches))
- (match-end (car matches))
- (reverse submatches)))
- #f))
- (assertion-violation 'match "invalid argument" exp string)))
- (define (compile-exp exp)
- (or (regexp-compiled exp)
- (mvlet* (((string match-flags)
- (exp->posix-string exp))
-
- (regexp (make-posix-regexp string
- (regexp-option extended)
- (regexp-option submatches))))
- (set-regexp-compiled! exp (cons regexp match-flags))
- (cons regexp match-flags))))
- (define (regexp->s-exp x)
- (cond ((not (regexp? x))
- (assertion-violation 'regexp->s-exp "invalid argument" x))
- ((set? x)
- (list 'set
- (let ((chars (set->chars x)))
- (if (= 1 (length chars))
- (car chars)
- (list->string chars)))))
- ((submatch? x)
- (list 'submatch (regexp->s-exp (submatch-exp x))))
- ((sequence? x)
- (cons 'sequence (map regexp->s-exp (sequence-exps x))))
- ((one-of? x)
- (cons 'one-of (map regexp->s-exp (one-of-exps x))))
- ((repeat? x)
- (list 'repeat
- (repeat-low x)
- (repeat-high x)
- (regexp->s-exp (repeat-exp x))))
- ((string-start? x)
- '(string-start))
- ((string-end? x)
- '(string-end))
- (else
- (assertion-violation 'regexp->s-exp "unknown type of regular-expression" x))))
- (define (set->chars set)
- (iterate loop ((count* i 0 -1 16))
- ((bits (set-use-case set))
- (chars '()))
- (if (= 0 bits)
- (reverse chars)
- (loop (arithmetic-shift bits -16)
- (iterate loop ((count* i i))
- ((bits (bitwise-and bits #xffff))
- (chars chars))
- (if (= 0 bits)
- chars
- (loop (arithmetic-shift bits -1)
- (if (odd? bits)
- (cons (scalar-value->char i)
- chars)
- chars))))))))
|