generate-char-prelude.scm 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. (use-modules (ice-9 match)
  2. (ice-9 pretty-print)
  3. (ice-9 format)
  4. (ice-9 textual-ports))
  5. (define (generate-codepoint-lookup-table f)
  6. (define (visit-codepoint-range start end clauses)
  7. (define (adjoin-span cp val out)
  8. (match out
  9. (() (acons cp val '()))
  10. (((end . val*) . tail)
  11. (if (eqv? val val*)
  12. (acons cp val* tail)
  13. (acons cp val out)))))
  14. (define (fold-clauses clauses out)
  15. (match out
  16. (() clauses)
  17. (((end . val) (end* . val*) . out)
  18. (let ((stride (- end end*)))
  19. (define (finish start out)
  20. (let* ((span (- end start))
  21. (nclauses (/ span stride)))
  22. (unless (and (exact-integer? nclauses) (positive? nclauses))
  23. (error "unexpected" nclauses))
  24. (fold-clauses
  25. (append
  26. (match nclauses
  27. (1 `(((,(if (= span 1) '= '<=) cp ,end) ,val)))
  28. (2 `(((<= cp ,end*) ,val*)
  29. ((<= cp ,end) ,val)))
  30. (_
  31. `(((<= cp ,end)
  32. (if (logtest
  33. 1
  34. ,(cond
  35. ((= 1 (logcount stride))
  36. ;; Stride is a power of two.
  37. (define (quotient/shift expr shift)
  38. (if (zero? shift)
  39. expr
  40. `(ash ,expr ,shift)))
  41. ;; Compute the offset from the start of
  42. ;; the span, unless the start is already
  43. ;; aligned.
  44. (define offset
  45. (if (logtest (1+ start) (1- (ash stride 1)))
  46. `(- cp ,(1+ start))
  47. 'cp))
  48. (quotient/shift offset
  49. (- (logcount (1- stride)))))
  50. (else
  51. `(even? (quotient (- cp ,(1+ start)) ,stride)))))
  52. ,(if (even? nclauses) val val*)
  53. ,(if (even? nclauses) val* val))))))
  54. clauses)
  55. out)))
  56. (let lp ((prev-end end*) (prev-val val*) (out out)
  57. (expected val*) (alternate val))
  58. (match out
  59. (()
  60. (let ((end (1- start)))
  61. (if (= end (- prev-end stride))
  62. (finish end '())
  63. (finish prev-end (acons prev-end prev-val out)))))
  64. (((end . val) . tail)
  65. (cond
  66. ((and (= end (- prev-end stride))
  67. (eqv? prev-val expected))
  68. (lp end val tail alternate expected))
  69. (else
  70. (finish prev-end (acons prev-end prev-val out)))))))))
  71. (((end . val))
  72. (cons `((<= cp ,end) ,val) clauses))))
  73. (let lp ((cp start) (out '()))
  74. (if (<= cp end)
  75. (lp (1+ cp) (adjoin-span cp (f (integer->char cp)) out))
  76. (fold-clauses clauses out))))
  77. (define* (make-binary-search v #:optional (start 0) (end (vector-length v)))
  78. (cond
  79. ((= start end)
  80. (if (= start (vector-length v))
  81. `(error "unreachable")
  82. (match (vector-ref v start)
  83. ((test expr) expr))))
  84. (else
  85. (let ((mid (ash (+ start end) -1)))
  86. (match (vector-ref v mid)
  87. ((((or '= '<=) 'cp val) _)
  88. `(if (<= cp ,val)
  89. ,(make-binary-search v start mid)
  90. ,(make-binary-search v (1+ mid) end))))))))
  91. (let* ((clauses '())
  92. (clauses (visit-codepoint-range #xe000 #x10ffff clauses))
  93. (clauses (visit-codepoint-range 0 #xd7ff clauses)))
  94. `(lambda (cp) ,(make-binary-search (list->vector clauses)))))
  95. (define (generate-codepoint-bit-lookup-table f)
  96. (define max-codepoint-bits 21)
  97. (define level-0-shift 16)
  98. (define level-0-size (ash 1 (- max-codepoint-bits level-0-shift)))
  99. (define level-1-shift 8)
  100. (define level-1-size (ash 1 (- level-0-shift level-1-shift)))
  101. (define level-1-mask (1- level-1-size))
  102. (define level-2-size (ash 1 level-1-shift))
  103. (define level-2-mask (1- level-2-size))
  104. (define (empty-bitmap) #f)
  105. (define (adjoin bitmap n)
  106. (let ((i0 (ash n (- level-0-shift)))
  107. (i1 (logand (ash n (- level-1-shift)) level-1-mask))
  108. (i2 (logand n level-2-mask)))
  109. ;; Could replace with functional setters.
  110. (define (vector-set v i x) (vector-set! v i x) v)
  111. (define (bitvector-set bv i) (bitvector-set-bit! bv i) bv)
  112. (define (adjoin/2 bv)
  113. (let ((bv (or bv (make-bitvector level-2-size #f))))
  114. (bitvector-set bv i2)))
  115. (define (adjoin/1 v)
  116. (let ((v (or v (make-vector level-1-size #f))))
  117. (vector-set v i1 (adjoin/2 (vector-ref v i1)))))
  118. (define (adjoin/0 v)
  119. (let ((v (or v (make-vector level-0-size #f))))
  120. (vector-set v i0 (adjoin/1 (vector-ref v i0)))))
  121. (adjoin/0 bitmap)))
  122. (define (visit-codepoint-range start end bitmap)
  123. (let lp ((cp start) (bitmap bitmap))
  124. (if (<= cp end)
  125. (lp (1+ cp)
  126. (if (f (integer->char cp))
  127. (adjoin bitmap cp)
  128. bitmap))
  129. bitmap)))
  130. (let* ((bitmap (visit-codepoint-range 0 #xd7ff (empty-bitmap)))
  131. (bitmap (visit-codepoint-range #xe000 #x10ffff bitmap)))
  132. `(lambda (cp)
  133. (define-syntax and-let*
  134. (syntax-rules ()
  135. ((and-let* () body) body)
  136. ((and-let* ((var val) . bindings) body)
  137. (let ((var val)) (and var (and-let* bindings body))))))
  138. (and-let* ((v (vector-ref ',bitmap (ash cp ,(- level-0-shift))))
  139. (bv (vector-ref v (logand (ash cp ,(- level-1-shift))
  140. ,level-1-mask))))
  141. (bitvector-ref bv (logand cp ,level-2-mask))))))
  142. (define (make-char-mapper f)
  143. (define (diff ch)
  144. (- (char->integer (f ch)) (char->integer ch)))
  145. `(lambda (ch)
  146. (let ((cp (char->integer ch)))
  147. (integer->char
  148. (+ cp (,(generate-codepoint-lookup-table diff) cp))))))
  149. (define (make-char-predicate f)
  150. `(lambda (ch)
  151. (,(generate-codepoint-bit-lookup-table f) (char->integer ch))))
  152. (when (batch-mode?)
  153. (match (program-arguments)
  154. ((_)
  155. (define (<< str)
  156. (put-string (current-output-port) str))
  157. (define (pp expr)
  158. (newline (current-output-port))
  159. (pretty-print expr (current-output-port)))
  160. (<< ";; This file was generated by generate-char-stdlib.scm.\n")
  161. (define-syntax-rule (generate-procs (gen proc) ...)
  162. (begin
  163. (pp `(define proc ,(gen proc)))
  164. ...))
  165. (generate-procs (make-char-mapper char-upcase)
  166. (make-char-mapper char-downcase)
  167. (make-char-predicate char-upper-case?)
  168. (make-char-predicate char-lower-case?)
  169. (make-char-predicate char-alphabetic?)
  170. (make-char-predicate char-numeric?)
  171. (make-char-predicate char-whitespace?)))
  172. ((arg0 . _)
  173. (format (current-error-port) "usage: ~a\n" arg0)
  174. (exit 1))))