123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- ;;;; preprocessing.lisp
- (in-package #:bcgreek)
- (defvar *greek-letters*)
- (defvar *bc-letters*)
- (defvar *bc-accents*)
- (defvar *bc-misc*)
- (defvar *bc-accent-groups*)
- (defun bcgreek-to-greek ()
- (let ((junk '(greek small letter with and)))
- (let ((translations (append *bc-letters*
- *bc-accents*
- (loop for j in junk collect (list j nil)))))
- (let ((substitutions (append `((capital . ,+capital-mark+))
- (loop for (name bc-char) in translations
- collect (cons name bc-char))))
- (admissible-elements (cons 'capital (mapcar #'first translations))))
- (let ((letters (loop for (code . name) in *greek-letters*
- when (and (subsetp name admissible-elements)
- (or (not (member 'sigma name))
- (member 'capital name)))
- collect (list (remove nil (sublis substitutions (if (and (member 'digamma name)
- (not (member 'small name)))
- (cons 'capital name)
- name)))
- (let ((greek-char (code-char code)))
- (if (or (member 'capital name)
- (and (member 'digamma name) (not (member 'small name))))
- (string greek-char)
- greek-char))))))
- (let ((letter-names (mapcar #'first *bc-letters*)))
- (let ((fragments-of-capitals (remove-duplicates (loop for (code . name) in *greek-letters*
- when (and (subsetp name admissible-elements)
- (member 'capital name))
- collect (list (remove nil (sublis substitutions (set-difference name letter-names))) t))
- :test #'equal)))
- (append letters fragments-of-capitals))))))))
- (defun bc-char-codes ()
- (let ((letter-byte-length (ceiling (log (1+ (length *bc-letters*)) 2)))
- after-accent)
- (append (loop for i from 1
- for letter in (mapcar #'second *bc-letters*)
- collect (list letter i 0 letter-byte-length))
- (loop for byte-start = letter-byte-length then (+ byte-start byte-length)
- and group in *bc-accent-groups*
- for byte-length = (ceiling (log (1+ (length group)) 2))
- do (setf after-accent (+ byte-start byte-length))
- nconc (loop for accent in group
- for i from 1
- collect (list accent i byte-start byte-length)))
- (list (list +capital-mark+ 1 after-accent 1)))))
- (defun encode-char (char)
- (destructuring-bind (char code byte-start byte-length) (assoc char (bc-char-codes))
- (declare (ignore char))
- (dpb code (byte byte-length byte-start) 0)))
- (defun make-decoding-table ()
- (loop with decoding-table = (make-hash-table)
- for (bcgreek greek-char) in (bcgreek-to-greek)
- for code = (loop for c in bcgreek
- for encoding-info = (encode-char c)
- sum encoding-info)
- do (setf (gethash code decoding-table) greek-char)
- finally (return decoding-table)))
- (defvar *mask-v*)
- (defgeneric mask-definition-form (name))
- (defmethod mask-definition-form ((name (eql 'with-mask)))
- `(defmacro ,name (&body body)
- `(let (,',*mask-v*)
- ,@body)))
- (defmethod mask-definition-form ((name (eql 'reset-mask)))
- `(defmacro ,name ()
- `(setf ,',*mask-v* 0)))
- (defmethod mask-definition-form ((name (eql 'add-to-mask)))
- (let ((clauses (loop for (char code byte-start byte-length) in (bc-char-codes)
- collect `(,char (setf ,*mask-v* (if (zerop (ldb (byte ,byte-length ,byte-start) ,*mask-v*))
- (dpb ,code (byte ,byte-length ,byte-start) ,*mask-v*)
- nil))))))
- `(defmacro ,name (char)
- `(case ,char
- ,@',clauses
- (otherwise (setf ,',*mask-v* nil))))))
- (defmethod mask-definition-form ((name (eql 'decode-mask)))
- (let ((decoding-table (make-decoding-table)))
- `(defmacro ,name ()
- `(values (gethash ,',*mask-v* ,,decoding-table)))))
- (defun mask-definitions ()
- (let ((*mask-v* (gensym "MASK-")))
- `(progn
- ,@(mapcar #'mask-definition-form '(with-mask reset-mask add-to-mask decode-mask)))))
- (defun simple-small-to-greek-0 ()
- (flet ((find-greek (char)
- (list char (second (assoc (list char) (bcgreek-to-greek)
- :test #'equal)))))
- (mapcar #'find-greek (remove #\s (set-difference (mapcar #'second *bc-letters*)
- (remove-duplicates (labels ((base-letter-p (char)
- (member char (mapcar #'second *bc-letters*)))
- (base-letter (list)
- (find-if #'base-letter-p list)))
- (mapcar #'base-letter
- (remove-if-not #'rest
- (remove #\* (mapcar #'first (bcgreek-to-greek))
- :test #'member))))))))))
- (defun bc-definitions ()
- `(progn
- ,(mask-definitions)
- (defmacro misc-bc-to-greek (c)
- `(case ,c
- ,,@(loop for (char-code bc) in *bc-misc*
- collect `'(,bc ,(code-char char-code)))))
- (defmacro simple-small-to-greek (c)
- `(case ,c
- ,@',(simple-small-to-greek-0)))
- (defmacro bc-letter-base ()
- ,(map 'string #'second *bc-letters*))
-
- (defmacro capital-buffer-length ()
- ,(flet ((capitalp (list)
- (member #\* list)))
- (loop for list in (mapcar #'first (bcgreek-to-greek))
- when (capitalp list)
- maximize (length list))))))
|