123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 |
- (use-modules (ice-9 match)
- (ice-9 pretty-print)
- (ice-9 format)
- (ice-9 textual-ports))
- (define (generate-codepoint-lookup-table f)
- (define (visit-codepoint-range start end clauses)
- (define (adjoin-span cp val out)
- (match out
- (() (acons cp val '()))
- (((end . val*) . tail)
- (if (eqv? val val*)
- (acons cp val* tail)
- (acons cp val out)))))
- (define (fold-clauses clauses out)
- (match out
- (() clauses)
- (((end . val) (end* . val*) . out)
- (let ((stride (- end end*)))
- (define (finish start out)
- (let* ((span (- end start))
- (nclauses (/ span stride)))
- (unless (and (exact-integer? nclauses) (positive? nclauses))
- (error "unexpected" nclauses))
- (fold-clauses
- (append
- (match nclauses
- (1 `(((,(if (= span 1) '= '<=) cp ,end) ,val)))
- (2 `(((<= cp ,end*) ,val*)
- ((<= cp ,end) ,val)))
- (_
- `(((<= cp ,end)
- (if (logtest
- 1
- ,(cond
- ((= 1 (logcount stride))
- ;; Stride is a power of two.
- (define (quotient/shift expr shift)
- (if (zero? shift)
- expr
- `(ash ,expr ,shift)))
- ;; Compute the offset from the start of
- ;; the span, unless the start is already
- ;; aligned.
- (define offset
- (if (logtest (1+ start) (1- (ash stride 1)))
- `(- cp ,(1+ start))
- 'cp))
- (quotient/shift offset
- (- (logcount (1- stride)))))
- (else
- `(even? (quotient (- cp ,(1+ start)) ,stride)))))
- ,(if (even? nclauses) val val*)
- ,(if (even? nclauses) val* val))))))
- clauses)
- out)))
- (let lp ((prev-end end*) (prev-val val*) (out out)
- (expected val*) (alternate val))
- (match out
- (()
- (let ((end (1- start)))
- (if (= end (- prev-end stride))
- (finish end '())
- (finish prev-end (acons prev-end prev-val out)))))
- (((end . val) . tail)
- (cond
- ((and (= end (- prev-end stride))
- (eqv? prev-val expected))
- (lp end val tail alternate expected))
- (else
- (finish prev-end (acons prev-end prev-val out)))))))))
- (((end . val))
- (cons `((<= cp ,end) ,val) clauses))))
- (let lp ((cp start) (out '()))
- (if (<= cp end)
- (lp (1+ cp) (adjoin-span cp (f (integer->char cp)) out))
- (fold-clauses clauses out))))
- (define* (make-binary-search v #:optional (start 0) (end (vector-length v)))
- (cond
- ((= start end)
- (if (= start (vector-length v))
- `(error "unreachable")
- (match (vector-ref v start)
- ((test expr) expr))))
- (else
- (let ((mid (ash (+ start end) -1)))
- (match (vector-ref v mid)
- ((((or '= '<=) 'cp val) _)
- `(if (<= cp ,val)
- ,(make-binary-search v start mid)
- ,(make-binary-search v (1+ mid) end))))))))
- (let* ((clauses '())
- (clauses (visit-codepoint-range #xe000 #x10ffff clauses))
- (clauses (visit-codepoint-range 0 #xd7ff clauses)))
- `(lambda (cp) ,(make-binary-search (list->vector clauses)))))
- (define (generate-codepoint-bit-lookup-table f)
- (define max-codepoint-bits 21)
- (define level-0-shift 16)
- (define level-0-size (ash 1 (- max-codepoint-bits level-0-shift)))
- (define level-1-shift 8)
- (define level-1-size (ash 1 (- level-0-shift level-1-shift)))
- (define level-1-mask (1- level-1-size))
- (define level-2-size (ash 1 level-1-shift))
- (define level-2-mask (1- level-2-size))
- (define (empty-bitmap) #f)
- (define (adjoin bitmap n)
- (let ((i0 (ash n (- level-0-shift)))
- (i1 (logand (ash n (- level-1-shift)) level-1-mask))
- (i2 (logand n level-2-mask)))
- ;; Could replace with functional setters.
- (define (vector-set v i x) (vector-set! v i x) v)
- (define (bitvector-set bv i) (bitvector-set-bit! bv i) bv)
- (define (adjoin/2 bv)
- (let ((bv (or bv (make-bitvector level-2-size #f))))
- (bitvector-set bv i2)))
- (define (adjoin/1 v)
- (let ((v (or v (make-vector level-1-size #f))))
- (vector-set v i1 (adjoin/2 (vector-ref v i1)))))
- (define (adjoin/0 v)
- (let ((v (or v (make-vector level-0-size #f))))
- (vector-set v i0 (adjoin/1 (vector-ref v i0)))))
- (adjoin/0 bitmap)))
- (define (visit-codepoint-range start end bitmap)
- (let lp ((cp start) (bitmap bitmap))
- (if (<= cp end)
- (lp (1+ cp)
- (if (f (integer->char cp))
- (adjoin bitmap cp)
- bitmap))
- bitmap)))
- (let* ((bitmap (visit-codepoint-range 0 #xd7ff (empty-bitmap)))
- (bitmap (visit-codepoint-range #xe000 #x10ffff bitmap)))
- `(lambda (cp)
- (define-syntax and-let*
- (syntax-rules ()
- ((and-let* () body) body)
- ((and-let* ((var val) . bindings) body)
- (let ((var val)) (and var (and-let* bindings body))))))
- (and-let* ((v (vector-ref ',bitmap (ash cp ,(- level-0-shift))))
- (bv (vector-ref v (logand (ash cp ,(- level-1-shift))
- ,level-1-mask))))
- (bitvector-ref bv (logand cp ,level-2-mask))))))
- (define (make-char-mapper f)
- (define (diff ch)
- (- (char->integer (f ch)) (char->integer ch)))
- `(lambda (ch)
- (let ((cp (char->integer ch)))
- (integer->char
- (+ cp (,(generate-codepoint-lookup-table diff) cp))))))
- (define (make-char-predicate f)
- `(lambda (ch)
- (,(generate-codepoint-bit-lookup-table f) (char->integer ch))))
- (when (batch-mode?)
- (match (program-arguments)
- ((_)
- (define (<< str)
- (put-string (current-output-port) str))
- (define (pp expr)
- (newline (current-output-port))
- (pretty-print expr (current-output-port)))
- (<< ";; This file was generated by generate-char-stdlib.scm.\n")
- (define-syntax-rule (generate-procs (gen proc) ...)
- (begin
- (pp `(define proc ,(gen proc)))
- ...))
- (generate-procs (make-char-mapper char-upcase)
- (make-char-mapper char-downcase)
- (make-char-predicate char-upper-case?)
- (make-char-predicate char-lower-case?)
- (make-char-predicate char-alphabetic?)
- (make-char-predicate char-numeric?)
- (make-char-predicate char-whitespace?)))
- ((arg0 . _)
- (format (current-error-port) "usage: ~a\n" arg0)
- (exit 1))))
|