char.scm 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. ;;; R7RS (scheme chars) library
  2. ;;; Copyright (C) 2024 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; R7RS (scheme chars) implementation
  18. ;;;
  19. ;;; Code:
  20. (library (scheme char)
  21. (export char-alphabetic?
  22. char-ci<=?
  23. char-ci<?
  24. char-ci=?
  25. char-ci>=?
  26. char-ci>?
  27. char-downcase
  28. char-foldcase
  29. char-lower-case?
  30. char-numeric?
  31. char-upcase
  32. char-upper-case?
  33. char-whitespace?
  34. digit-value
  35. string-ci<=?
  36. string-ci<?
  37. string-ci=?
  38. string-ci>=?
  39. string-ci>?
  40. string-downcase
  41. string-foldcase
  42. string-upcase)
  43. (import (only (hoot primitives)
  44. include-from-path
  45. %inline-wasm)
  46. (scheme base)
  47. (hoot bitwise)
  48. (only (hoot numbers) 1+)
  49. (only (hoot char)
  50. char-downcase char-upcase
  51. string-downcase string-upcase
  52. char-alphabetic? char-lower-case? char-numeric?
  53. char-upper-case? char-whitespace?))
  54. (define (char-foldcase char)
  55. (if (or (eqv? char #\460) (eqv? char #\461))
  56. char
  57. (char-downcase (char-upcase char))))
  58. (define (digit-value char)
  59. ;; The table can be extracted with:
  60. ;; awk -F ';' '/ZERO;Nd/ {print "#x"$1}' UnicodeData.txt
  61. ;; Up to date with Unicode 15.1.0.
  62. (define *decimal-zeroes*
  63. '#(#x0030 #x0660 #x06F0 #x07C0 #x0966 #x09E6 #x0A66 #x0AE6 #x0B66
  64. #x0BE6 #x0C66 #x0CE6 #x0D66 #x0DE6 #x0E50 #x0ED0 #x0F20
  65. #x1040 #x1090 #x17E0 #x1810 #x1946 #x19D0 #x1A80 #x1A90
  66. #x1B50 #x1BB0 #x1C40 #x1C50 #xA620 #xA8D0 #xA900 #xA9D0
  67. #xA9F0 #xAA50 #xABF0 #xFF10 #x104A0 #x10D30 #x11066
  68. #x110F0 #x11136 #x111D0 #x112F0 #x11450 #x114D0 #x11650
  69. #x116C0 #x11730 #x118E0 #x11950 #x11C50 #x11D50 #x11DA0
  70. #x11F50 #x16A60 #x16AC0 #x16B50 #x1D7CE #x1D7D8 #x1D7E2
  71. #x1D7EC #x1D7F6 #x1E140 #x1E2F0 #x1E4F0 #x1E950 #x1FBF0))
  72. (let ((cp (char->integer char)))
  73. (if (<= 0 (- cp (char->integer #\0)) 9)
  74. ;; Fast case.
  75. (- cp (char->integer #\0))
  76. ;; Otherwise, a binary search.
  77. (let lp ((start 0) (end (vector-length *decimal-zeroes*)))
  78. (and (< start end)
  79. (let* ((mid (ash (+ start end) -1))
  80. (val (- cp (vector-ref *decimal-zeroes* mid))))
  81. (cond
  82. ((< val 0) (lp start mid))
  83. ((< val 10) val)
  84. (else (lp (1+ mid) end)))))))))
  85. (define (char-ci<? ch1 ch2 . ch*)
  86. (apply char<?
  87. (char-foldcase ch1) (char-foldcase ch2) (map char-foldcase ch*)))
  88. (define (char-ci<=? ch1 ch2 . ch*)
  89. (apply char<=?
  90. (char-foldcase ch1) (char-foldcase ch2) (map char-foldcase ch*)))
  91. (define (char-ci=? ch1 ch2 . ch*)
  92. (apply char=?
  93. (char-foldcase ch1) (char-foldcase ch2) (map char-foldcase ch*)))
  94. (define (char-ci>=? ch1 ch2 . ch*)
  95. (apply char>=?
  96. (char-foldcase ch1) (char-foldcase ch2) (map char-foldcase ch*)))
  97. (define (char-ci>? ch1 ch2 . ch*)
  98. (apply char>?
  99. (char-foldcase ch1) (char-foldcase ch2) (map char-foldcase ch*)))
  100. (define (string-foldcase str)
  101. (string-downcase (string-upcase str)))
  102. ;; FIXME: We could use Intl.Collator instead of manually folding case.
  103. (define (string-ci<? . strs) (apply string<? (map string-foldcase strs)))
  104. (define (string-ci<=? . strs) (apply string<=? (map string-foldcase strs)))
  105. (define (string-ci=? . strs) (apply string=? (map string-foldcase strs)))
  106. (define (string-ci>=? . strs) (apply string>=? (map string-foldcase strs)))
  107. (define (string-ci>? . strs) (apply string>? (map string-foldcase strs))))