unicode-category.scm 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. ; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.
  2. ; Copyright (c) 2005-2006 by Basis Technology Corporation. See file COPYING.
  3. ; Code-point classification
  4. (define-enumerated-type primary-category :primary-category
  5. primary-category?
  6. primary-categories
  7. primary-category-name
  8. primary-category-index
  9. (letter
  10. number
  11. punctuation
  12. symbol
  13. mark
  14. separator
  15. miscellaneous))
  16. (define-finite-type general-category :general-category
  17. (primary-category id symbol)
  18. general-category?
  19. general-categories
  20. general-category-name
  21. general-category-index
  22. (primary-category general-category-primary-category)
  23. (id general-category-id)
  24. (symbol general-category-symbol)
  25. ((uppercase-letter (primary-category letter) "Lu" 'Lu)
  26. (lowercase-letter (primary-category letter) "Ll" 'Ll)
  27. (titlecase-letter (primary-category letter) "Lt" 'Lt)
  28. (modified-letter (primary-category letter) "Lm" 'Lm)
  29. (other-letter (primary-category letter) "Lo" 'Lo)
  30. (non-spacing-mark (primary-category mark) "Mn" 'Mn)
  31. (combining-spacing-mark (primary-category mark) "Mc" 'Mc)
  32. (enclosing-mark (primary-category mark) "Me" 'Me)
  33. (decimal-digit-number (primary-category number) "Nd" 'Nd)
  34. (letter-number (primary-category number) "Nl" 'Nl)
  35. (other-number (primary-category number) "No" 'No)
  36. (opening-punctuation (primary-category punctuation) "Ps" 'Ps)
  37. (closing-punctuation (primary-category punctuation) "Pe" 'Pe)
  38. (initial-quote-punctuation (primary-category punctuation) "Pi" 'Pi)
  39. (final-quote-punctuation (primary-category punctuation) "Pf" 'Pf)
  40. (dash-punctuation (primary-category punctuation) "Pd" 'Pd)
  41. (connector-punctuation (primary-category punctuation) "Pc" 'Pc)
  42. (other-punctuation (primary-category punctuation) "Po" 'Po)
  43. (currency-symbol (primary-category symbol) "Sc" 'Sc)
  44. (mathematical-symbol (primary-category symbol) "Sm" 'Sm)
  45. (modifier-symbol (primary-category symbol) "Sk" 'Sk)
  46. (other-symbol (primary-category symbol) "So" 'So)
  47. (space-separator (primary-category separator) "Zs" 'Zs)
  48. (paragraph-separator (primary-category separator) "Zp" 'Zp)
  49. (line-separator (primary-category separator) "Zl" 'Zl)
  50. (control-character (primary-category miscellaneous) "Cc" 'Cc)
  51. (formatting-character (primary-category miscellaneous) "Cf" 'Cf)
  52. (surrogate (primary-category miscellaneous) "Cs" 'Cs)
  53. (private-use-character (primary-category miscellaneous) "Co" 'Co)
  54. (unassigned (primary-category miscellaneous) "Cn" 'Cn)))
  55. (define (bits-necessary count)
  56. (let loop ((e 0)
  57. (reached 1))
  58. (if (>= reached count)
  59. e
  60. (loop (+ e 1) (* 2 reached)))))
  61. (define *general-category-bits*
  62. (bits-necessary (vector-length general-categories)))
  63. (define (id->general-category id)
  64. (let ((count (vector-length general-categories)))
  65. (let loop ((i 0))
  66. (cond
  67. ((>= i count) #f)
  68. ((string=? (general-category-id (vector-ref general-categories i))
  69. id)
  70. (vector-ref general-categories i))
  71. (else
  72. (loop (+ 1 i)))))))