123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175 |
- ; Copyright (c) 1993-2007 by Richard Kelsey and Jonathan Rees. See file COPYING.
- ; Exports:
- ; make-regexp
- ; regexp?
- ; regexp-match
- ; regexp-match?
- ; regexp-match-start
- ; regexp-match-end
- ; regexp-option
- ; The compiled version of the expression is produced when needed.
- (define-record-type regexp :regexp
- (really-make-regexp pattern pattern-byte-string
- compiled
- extended? ignore-case? submatches? newline?)
- regexp?
- (pattern regexp-pattern) ; immutable string
- (pattern-byte-string regexp-pattern-byte-string)
- (compiled real-regexp-compiled set-regexp-compiled!) ; #f or a c-record
- (extended? regexp-extended?) ; four flags
- (ignore-case? regexp-ignore-case?)
- (submatches? regexp-submatches?)
- (newline? regexp-newline?))
- ; Drop the compiled version when resuming. We may be resuming on a different
- ; architecture, or version of the library, or whatever.
- (define-record-resumer :regexp
- (lambda (regexp)
- (set-regexp-compiled! regexp #f)))
- ; There are four options when making a regular expression.
- (define-enumerated-type regexp-option :regexp-option
- regexp-option?
- regexp-options
- regexp-option-name
- regexp-option-index
- (extended ignore-case submatches newline))
- ; Loop down finding which options are present and checking for duplicates.
- ; This is not specific to regular expressions.
- ;
- ; It would be nice if this could handle values as well, as in
- ; (make-regexp "sldkjf" (regexp-option size 10))
- (define (decode-boolean-options options all-options predicate indexer)
- (let ((map (make-vector (vector-length all-options) #f)))
- (let loop ((options options))
- (if (null? options)
- (vector->list map)
- (let ((option (car options)))
- (if (predicate option)
- (let ((index (indexer option)))
- (if (vector-ref map index)
- 'duplicates
- (begin
- (vector-set! map index #t)
- (loop (cdr options)))))
- 'bad-value))))))
- ; The only thing we do here is to decode the options and make sure that the
- ; pattern is immutable, as it won't be used until later.
- (define (make-regexp pattern . options)
- (let ((options (decode-boolean-options options
- regexp-options
- regexp-option?
- regexp-option-index)))
- (if (and (string? pattern)
- (pair? options))
- (let* ((pattern (string->immutable-string pattern))
- (pattern-byte-string (string->byte-vector pattern))
- (regexp (apply really-make-regexp pattern pattern-byte-string #f options)))
- (add-finalizer! regexp free-compiled-regexp)
- regexp)
- (apply call-error "invalid argument(s)"
- make-regexp
- pattern
- options))))
- ; Free up the C-heap storage used for the compiled regexp.
- (define (free-compiled-regexp regexp)
- (let ((compiled (real-regexp-compiled regexp)))
- (if compiled
- (call-imported-binding posix-free-regexp compiled))))
- ; We compile the pattern if that hasn't already been done, raising an error
- ; if anything goes wrong.
- (define (regexp-compiled regexp)
- (or (real-regexp-compiled regexp)
- (let ((compiled (call-imported-binding posix-compile-regexp
- (regexp-pattern-byte-string regexp)
- (regexp-extended? regexp)
- (regexp-ignore-case? regexp)
- (regexp-submatches? regexp)
- (regexp-newline? regexp))))
- (if (not (integer? compiled))
- (begin
- (set-regexp-compiled! regexp compiled)
- compiled)
- (let ((message (call-imported-binding posix-regexp-error-message
- (regexp-pattern-byte-string regexp)
- (regexp-extended? regexp)
- (regexp-ignore-case? regexp)
- (regexp-submatches? regexp)
- (regexp-newline? regexp))))
- (error (if message
- (string-append "Posix regexp: " message)
- "inconsistent results from Posix regexp compiler")
- regexp))))))
- ; Call the pattern matcher. We return #F if the match fails. On a successful
- ; match, we either return #T or a list of match records, depending on the value
- ; of SUBMATCHES?.
- (define (regexp-match regexp string start submatches? starts-line? ends-line?)
- (cond
- ((not (and (regexp? regexp)
- (string? string)))
- (call-error "invalid argument"
- regexp-match
- regexp string start starts-line? ends-line?))
- ((and submatches?
- (not (regexp-submatches? regexp)))
- (call-error "regexp not compiled for submatches"
- regexp-match
- regexp string start starts-line? ends-line?))
- (else
- (call-imported-binding posix-regexp-match
- (regexp-compiled regexp)
- (string->byte-vector string)
- start
- submatches?
- starts-line?
- ends-line?))))
- ; we can't do any better with POSIX, Mike thinks
- (define (string->byte-vector s)
- (os-string->byte-vector
- (call-with-os-string-text-codec
- latin-1-codec
- (lambda ()
- (string->os-string s)))))
-
- ; These are made by the C code. The SUBMATCHES field is not used by us,
- ; but is used by the functional interface.
- (define-record-type match :match
- (make-match start end submatches)
- match?
- (start match-start)
- (end match-end)
- (submatches match-submatches))
- (define-record-discloser :match
- (lambda (rem)
- (list 'match
- (match-start rem)
- (match-end rem))))
- (define-exported-binding "posix-regexp-match-type" :match)
- ; The various C functions we use.
- (import-definition posix-compile-regexp)
- (import-definition posix-regexp-match)
- (import-definition posix-regexp-error-message)
- (import-definition posix-free-regexp)
|