char.scm 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. ;;; R7RS compatibility libraries
  2. ;;; Copyright (C) 2019 Free Software Foundation, Inc.
  3. ;;;
  4. ;;; This library is free software: you can redistribute it and/or modify
  5. ;;; it under the terms of the GNU Lesser General Public License as
  6. ;;; published by the Free Software Foundation, either version 3 of the
  7. ;;; License, or (at your option) any later version.
  8. ;;;
  9. ;;; This library is distributed in the hope that it will be useful, but
  10. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;;; Lesser General Public License for more details.
  13. ;;;
  14. ;;; You should have received a copy of the GNU Lesser General Public
  15. ;;; License along with this program. If not, see
  16. ;;; <http://www.gnu.org/licenses/>.
  17. ;;; Based on code from https://gitlab.com/akku/akku-scm, written
  18. ;;; 2018-2019 by Göran Weinholt <goran@weinholt.se>, as well as
  19. ;;; https://github.com/okuoku/yuni, written 2014-2018 by OKUMURA Yuki
  20. ;;; <mjt@cltn.org>. This code was originally released under the
  21. ;;; following terms:
  22. ;;;
  23. ;;; To the extent possible under law, the author(s) have dedicated
  24. ;;; all copyright and related and neighboring rights to this
  25. ;;; software to the public domain worldwide. This software is
  26. ;;; distributed without any warranty.
  27. ;;;
  28. ;;; See <http://creativecommons.org/publicdomain/zero/1.0/>, for a
  29. ;;; copy of the CC0 Public Domain Dedication.
  30. (define-module (scheme char)
  31. #:use-module ((srfi srfi-43) #:select (vector-binary-search))
  32. #:use-module (ice-9 i18n)
  33. #:export (char-foldcase
  34. string-foldcase
  35. digit-value)
  36. #:re-export (char-alphabetic?
  37. char-ci<=? char-ci<? char-ci=? char-ci>=?
  38. char-ci>? char-downcase char-lower-case?
  39. char-numeric? char-upcase char-upper-case? char-whitespace?
  40. string-ci<=? string-ci<? string-ci=?
  41. string-ci>=? string-ci>?
  42. (string-locale-downcase . string-downcase)
  43. (string-locale-upcase . string-upcase)))
  44. (define (char-foldcase char)
  45. (if (or (eqv? char #\460) (eqv? char #\461))
  46. char
  47. (char-downcase (char-upcase char))))
  48. (define (string-foldcase str)
  49. (string-locale-downcase (string-locale-upcase str)))
  50. ;; The table can be extracted with:
  51. ;; awk -F ';' '/ZERO;Nd/ {print "#x"$1}' UnicodeData.txt
  52. ;; Up to date with Unicode 11.0.0
  53. (define *decimal-zeroes* '#(#x0030 #x0660 #x06F0 #x07C0 #x0966 #x09E6
  54. #x0A66 #x0AE6 #x0B66 #x0BE6 #x0C66 #x0CE6 #x0D66 #x0DE6 #x0E50
  55. #x0ED0 #x0F20 #x1040 #x1090 #x17E0 #x1810 #x1946 #x19D0 #x1A80
  56. #x1A90 #x1B50 #x1BB0 #x1C40 #x1C50 #xA620 #xA8D0 #xA900 #xA9D0
  57. #xA9F0 #xAA50 #xABF0 #xFF10 #x104A0 #x10D30 #x11066 #x110F0 #x11136
  58. #x111D0 #x112F0 #x11450 #x114D0 #x11650 #x116C0 #x11730 #x118E0
  59. #x11C50 #x11D50 #x11DA0 #x16A60 #x16B50 #x1D7CE #x1D7D8 #x1D7E2
  60. #x1D7EC #x1D7F6 #x1E950))
  61. (define (digit-value char)
  62. (define (cmp zero ch)
  63. (if (integer? ch)
  64. (- (cmp zero ch))
  65. (let ((i (char->integer ch)))
  66. (cond ((< i zero) 1)
  67. ((> i (+ zero 9)) -1)
  68. (else 0)))))
  69. (unless (char? char)
  70. (error "Expected a char" char))
  71. (cond
  72. ((char<=? #\0 char #\9) ;fast case
  73. (- (char->integer char) (char->integer #\0)))
  74. ((vector-binary-search *decimal-zeroes* char cmp)
  75. => (lambda (zero)
  76. (- (char->integer char)
  77. (vector-ref *decimal-zeroes* zero))))
  78. (else #f)))