preprocessing.lisp 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. ;;;; preprocessing.lisp
  2. (in-package #:bcgreek)
  3. (defvar *greek-letters*)
  4. (defvar *bc-letters*)
  5. (defvar *bc-accents*)
  6. (defvar *bc-misc*)
  7. (defvar *bc-accent-groups*)
  8. (defun bcgreek-to-greek ()
  9. (let ((junk '(greek small letter with and)))
  10. (let ((translations (append *bc-letters*
  11. *bc-accents*
  12. (loop for j in junk collect (list j nil)))))
  13. (let ((substitutions (append `((capital . ,+capital-mark+))
  14. (loop for (name bc-char) in translations
  15. collect (cons name bc-char))))
  16. (admissible-elements (cons 'capital (mapcar #'first translations))))
  17. (let ((letters (loop for (code . name) in *greek-letters*
  18. when (and (subsetp name admissible-elements)
  19. (or (not (member 'sigma name))
  20. (member 'capital name)))
  21. collect (list (remove nil (sublis substitutions (if (and (member 'digamma name)
  22. (not (member 'small name)))
  23. (cons 'capital name)
  24. name)))
  25. (let ((greek-char (code-char code)))
  26. (if (or (member 'capital name)
  27. (and (member 'digamma name) (not (member 'small name))))
  28. (string greek-char)
  29. greek-char))))))
  30. (let ((letter-names (mapcar #'first *bc-letters*)))
  31. (let ((fragments-of-capitals (remove-duplicates (loop for (code . name) in *greek-letters*
  32. when (and (subsetp name admissible-elements)
  33. (member 'capital name))
  34. collect (list (remove nil (sublis substitutions (set-difference name letter-names))) t))
  35. :test #'equal)))
  36. (append letters fragments-of-capitals))))))))
  37. (defun bc-char-codes ()
  38. (let ((letter-byte-length (ceiling (log (1+ (length *bc-letters*)) 2)))
  39. after-accent)
  40. (append (loop for i from 1
  41. for letter in (mapcar #'second *bc-letters*)
  42. collect (list letter i 0 letter-byte-length))
  43. (loop for byte-start = letter-byte-length then (+ byte-start byte-length)
  44. and group in *bc-accent-groups*
  45. for byte-length = (ceiling (log (1+ (length group)) 2))
  46. do (setf after-accent (+ byte-start byte-length))
  47. nconc (loop for accent in group
  48. for i from 1
  49. collect (list accent i byte-start byte-length)))
  50. (list (list +capital-mark+ 1 after-accent 1)))))
  51. (defun encode-char (char)
  52. (destructuring-bind (char code byte-start byte-length) (assoc char (bc-char-codes))
  53. (declare (ignore char))
  54. (dpb code (byte byte-length byte-start) 0)))
  55. (defun make-decoding-table ()
  56. (loop with decoding-table = (make-hash-table)
  57. for (bcgreek greek-char) in (bcgreek-to-greek)
  58. for code = (loop for c in bcgreek
  59. for encoding-info = (encode-char c)
  60. sum encoding-info)
  61. do (setf (gethash code decoding-table) greek-char)
  62. finally (return decoding-table)))
  63. (defvar *mask-v*)
  64. (defgeneric mask-definition-form (name))
  65. (defmethod mask-definition-form ((name (eql 'with-mask)))
  66. `(defmacro ,name (&body body)
  67. `(let (,',*mask-v*)
  68. ,@body)))
  69. (defmethod mask-definition-form ((name (eql 'reset-mask)))
  70. `(defmacro ,name ()
  71. `(setf ,',*mask-v* 0)))
  72. (defmethod mask-definition-form ((name (eql 'add-to-mask)))
  73. (let ((clauses (loop for (char code byte-start byte-length) in (bc-char-codes)
  74. collect `(,char (setf ,*mask-v* (if (zerop (ldb (byte ,byte-length ,byte-start) ,*mask-v*))
  75. (dpb ,code (byte ,byte-length ,byte-start) ,*mask-v*)
  76. nil))))))
  77. `(defmacro ,name (char)
  78. `(case ,char
  79. ,@',clauses
  80. (otherwise (setf ,',*mask-v* nil))))))
  81. (defmethod mask-definition-form ((name (eql 'decode-mask)))
  82. (let ((decoding-table (make-decoding-table)))
  83. `(defmacro ,name ()
  84. `(values (gethash ,',*mask-v* ,,decoding-table)))))
  85. (defun mask-definitions ()
  86. (let ((*mask-v* (gensym "MASK-")))
  87. `(progn
  88. ,@(mapcar #'mask-definition-form '(with-mask reset-mask add-to-mask decode-mask)))))
  89. (defun simple-small-to-greek-0 ()
  90. (flet ((find-greek (char)
  91. (list char (second (assoc (list char) (bcgreek-to-greek)
  92. :test #'equal)))))
  93. (mapcar #'find-greek (remove #\s (set-difference (mapcar #'second *bc-letters*)
  94. (remove-duplicates (labels ((base-letter-p (char)
  95. (member char (mapcar #'second *bc-letters*)))
  96. (base-letter (list)
  97. (find-if #'base-letter-p list)))
  98. (mapcar #'base-letter
  99. (remove-if-not #'rest
  100. (remove #\* (mapcar #'first (bcgreek-to-greek))
  101. :test #'member))))))))))
  102. (defun bc-definitions ()
  103. `(progn
  104. ,(mask-definitions)
  105. (defmacro misc-bc-to-greek (c)
  106. `(case ,c
  107. ,,@(loop for (char-code bc) in *bc-misc*
  108. collect `'(,bc ,(code-char char-code)))))
  109. (defmacro simple-small-to-greek (c)
  110. `(case ,c
  111. ,@',(simple-small-to-greek-0)))
  112. (defmacro bc-letter-base ()
  113. ,(map 'string #'second *bc-letters*))
  114. (defmacro capital-buffer-length ()
  115. ,(flet ((capitalp (list)
  116. (member #\* list)))
  117. (loop for list in (mapcar #'first (bcgreek-to-greek))
  118. when (capitalp list)
  119. maximize (length list))))))