hebrew.el 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. ;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*-
  2. ;; Copyright (C) 2001-2012 Free Software Foundation, Inc.
  3. ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
  4. ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
  5. ;; National Institute of Advanced Industrial Science and Technology (AIST)
  6. ;; Registration Number H14PRO021
  7. ;; Copyright (C) 2003
  8. ;; National Institute of Advanced Industrial Science and Technology (AIST)
  9. ;; Registration Number H13PRO009
  10. ;; Keywords: multilingual, Hebrew
  11. ;; This file is part of GNU Emacs.
  12. ;; GNU Emacs is free software: you can redistribute it and/or modify
  13. ;; it under the terms of the GNU General Public License as published by
  14. ;; the Free Software Foundation, either version 3 of the License, or
  15. ;; (at your option) any later version.
  16. ;; GNU Emacs is distributed in the hope that it will be useful,
  17. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. ;; GNU General Public License for more details.
  20. ;; You should have received a copy of the GNU General Public License
  21. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  22. ;;; Commentary:
  23. ;; For Hebrew, the character set ISO8859-8 is supported.
  24. ;; See http://www.ecma.ch/ecma1/STAND/ECMA-121.HTM.
  25. ;; Windows-1255 is also supported.
  26. ;;; Code:
  27. (define-coding-system 'hebrew-iso-8bit
  28. "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)."
  29. :coding-type 'charset
  30. :mnemonic ?8
  31. :charset-list '(iso-8859-8)
  32. :mime-charset 'iso-8859-8)
  33. (define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit)
  34. ;; These are for Explicit and Implicit directionality information, as
  35. ;; defined in RFC 1556.
  36. (define-coding-system-alias 'iso-8859-8-e 'hebrew-iso-8bit)
  37. (define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit)
  38. (set-language-info-alist
  39. "Hebrew" '((tutorial . "TUTORIAL.he")
  40. (charset iso-8859-8)
  41. (coding-priority hebrew-iso-8bit)
  42. (coding-system hebrew-iso-8bit windows-1255 cp862)
  43. (nonascii-translation . iso-8859-8)
  44. (input-method . "hebrew")
  45. (unibyte-display . hebrew-iso-8bit)
  46. (sample-text . "Hebrew שלום")
  47. (documentation . "Bidirectional editing is supported.")))
  48. (set-language-info-alist
  49. "Windows-1255" '((coding-priority windows-1255)
  50. (coding-system windows-1255)
  51. (documentation . "\
  52. Support for Windows-1255 encoding, e.g. for Yiddish.
  53. Bidirectional editing is supported.")))
  54. (define-coding-system 'windows-1255
  55. "windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)"
  56. :coding-type 'charset
  57. :mnemonic ?h
  58. :charset-list '(windows-1255)
  59. :mime-charset 'windows-1255)
  60. (define-coding-system-alias 'cp1255 'windows-1255)
  61. (define-coding-system 'cp862
  62. "DOS codepage 862 (Hebrew)"
  63. :coding-type 'charset
  64. :mnemonic ?D
  65. :charset-list '(cp862)
  66. :mime-charset 'cp862)
  67. (define-coding-system-alias 'ibm862 'cp862)
  68. ;; Return a nested alist of Hebrew character sequences vs the
  69. ;; corresponding glyph of FONT-OBJECT.
  70. (defun hebrew-font-get-precomposed (font-object)
  71. (let ((precomposed (font-get font-object 'hebrew-precomposed))
  72. ;; Vector of Hebrew precomposed characters.
  73. (chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31
  74. #xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A
  75. #xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46
  76. #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E])
  77. ;; Vector of decomposition character sequences corresponding
  78. ;; to the above vector.
  79. (decomposed
  80. [[#x05E9 #x05C1]
  81. [#x05E9 #x05C2]
  82. [#x05E9 #x05BC #x05C1]
  83. [#x05E9 #x05BC #x05C2]
  84. [#x05D0 #x05B7]
  85. [#x05D0 #x05B8]
  86. [#x05D0 #x05BC]
  87. [#x05D1 #x05BC]
  88. [#x05D2 #x05BC]
  89. [#x05D3 #x05BC]
  90. [#x05D4 #x05BC]
  91. [#x05D5 #x05BC]
  92. [#x05D6 #x05BC]
  93. [#x05D8 #x05BC]
  94. [#x05D9 #x05BC]
  95. [#x05DA #x05BC]
  96. [#x05DB #x05BC]
  97. [#x05DC #x05BC]
  98. [#x05DE #x05BC]
  99. [#x05E0 #x05BC]
  100. [#x05E1 #x05BC]
  101. [#x05E3 #x05BC]
  102. [#x05E4 #x05BC]
  103. [#x05E6 #x05BC]
  104. [#x05E7 #x05BC]
  105. [#x05E8 #x05BC]
  106. [#x05E9 #x05BC]
  107. [#x05EA #x05BC]
  108. [#x05D5 #x05B9]
  109. [#x05D1 #x05BF]
  110. [#x05DB #x05BF]
  111. [#x05E4 #x05BF]]))
  112. (unless precomposed
  113. (setq precomposed (list t))
  114. (let ((gvec (font-get-glyphs font-object 0 (length chars) chars)))
  115. (dotimes (i (length chars))
  116. (if (aref gvec i)
  117. (set-nested-alist (aref decomposed i) (aref gvec i)
  118. precomposed))))
  119. ;; Cache the result in FONT-OBJECT's property.
  120. (font-put font-object 'hebrew-precomposed precomposed))
  121. precomposed))
  122. ;; Composition function for hebrew. GSTRING is made of a Hebrew base
  123. ;; character followed by Hebrew diacritical marks, or is made of
  124. ;; single Hebrew diacritical mark. Adjust GSTRING to display that
  125. ;; sequence properly. The basic strategy is:
  126. ;;
  127. ;; (1) If there's single diacritical, add padding space to the left
  128. ;; and right of the glyph.
  129. ;;
  130. ;; (2) If the font has OpenType features for Hebrew, ask the OTF
  131. ;; driver the whole work.
  132. ;;
  133. ;; (3) If the font has precomposed glyphs, use them as far as
  134. ;; possible. Adjust the remaining glyphs artificially.
  135. (defun hebrew-shape-gstring (gstring)
  136. (let* ((font (lgstring-font gstring))
  137. (otf (font-get font :otf))
  138. (nchars (lgstring-char-len gstring))
  139. header nglyphs base-width glyph precomposed val idx)
  140. (cond
  141. ((= nchars 1)
  142. ;; Independent diacritical mark. Add padding space to left or
  143. ;; right so that the glyph doesn't overlap with the surrounding
  144. ;; chars.
  145. (setq glyph (lgstring-glyph gstring 0))
  146. (let ((width (lglyph-width glyph))
  147. bearing)
  148. (if (< (setq bearing (lglyph-lbearing glyph)) 0)
  149. (lglyph-set-adjustment glyph bearing 0 (- width bearing)))
  150. (if (> (setq bearing (lglyph-rbearing glyph)) width)
  151. (lglyph-set-adjustment glyph 0 0 bearing))))
  152. ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf)))
  153. ;; FONT has OpenType features for Hebrew.
  154. (font-shape-gstring gstring))
  155. (t
  156. ;; FONT doesn't have OpenType features for Hebrew.
  157. ;; Try a precomposed glyph.
  158. ;; Now GSTRING is in this form:
  159. ;; [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...]
  160. (setq precomposed (hebrew-font-get-precomposed font)
  161. header (lgstring-header gstring)
  162. val (lookup-nested-alist header precomposed nil 1))
  163. (if (and (consp val) (vectorp (car val)))
  164. ;; All characters can be displayed by a single precomposed glyph.
  165. ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...]
  166. (let ((glyph (copy-sequence (car val))))
  167. (lglyph-set-from-to glyph 0 (1- nchars))
  168. (lgstring-set-glyph gstring 0 glyph)
  169. (lgstring-set-glyph gstring 1 nil))
  170. (if (and (integerp val) (> val 2)
  171. (setq glyph (lookup-nested-alist header precomposed val 1))
  172. (consp glyph) (vectorp (car glyph)))
  173. ;; The first (1- VAL) characters can be displayed by a
  174. ;; precomposed glyph. Provided that VAL is 3, the first
  175. ;; two glyphs should be replaced by the precomposed glyph.
  176. ;; In that case, reform GSTRING to:
  177. ;; [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...]
  178. (let* ((ncmp (1- val)) ; number of composed glyphs
  179. (diff (1- ncmp))) ; number of reduced glyphs
  180. (setq glyph (copy-sequence (car glyph)))
  181. (lglyph-set-from-to glyph 0 (1- nchars))
  182. (lgstring-set-glyph gstring 0 glyph)
  183. (setq idx ncmp)
  184. (while (< idx nchars)
  185. (setq glyph (lgstring-glyph gstring idx))
  186. (lglyph-set-from-to glyph 0 (1- nchars))
  187. (lgstring-set-glyph gstring (- idx diff) glyph)
  188. (setq idx (1+ idx)))
  189. (lgstring-set-glyph gstring (- idx diff) nil)
  190. (setq idx (- ncmp diff)
  191. nglyphs (- nchars diff)))
  192. (setq glyph (lgstring-glyph gstring 0))
  193. (lglyph-set-from-to glyph 0 (1- nchars))
  194. (setq idx 1 nglyphs nchars))
  195. ;; Now IDX is an index to the first non-precomposed glyph.
  196. ;; Adjust positions of the remaining glyphs artificially.
  197. (setq base-width (lglyph-width (lgstring-glyph gstring 0)))
  198. (while (< idx nglyphs)
  199. (setq glyph (lgstring-glyph gstring idx))
  200. (lglyph-set-from-to glyph 0 (1- nchars))
  201. (if (>= (lglyph-lbearing glyph) (lglyph-width glyph))
  202. ;; It seems that this glyph is designed to be rendered
  203. ;; before the base glyph.
  204. (lglyph-set-adjustment glyph (- base-width) 0 0)
  205. (if (>= (lglyph-lbearing glyph) 0)
  206. ;; Align the horizontal center of this glyph to the
  207. ;; horizontal center of the base glyph.
  208. (let ((width (- (lglyph-rbearing glyph)
  209. (lglyph-lbearing glyph))))
  210. (lglyph-set-adjustment glyph
  211. (- (/ (- base-width width) 2)
  212. (lglyph-lbearing glyph)
  213. base-width) 0 0))))
  214. (setq idx (1+ idx))))))
  215. gstring))
  216. (let* ((base "[\u05D0-\u05F2]")
  217. (combining "[\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7]+")
  218. (pattern1 (concat base combining))
  219. (pattern2 (concat base "\u200D" combining)))
  220. (set-char-table-range
  221. composition-function-table '(#x591 . #x5C7)
  222. (list (vector pattern2 3 'hebrew-shape-gstring)
  223. (vector pattern2 2 'hebrew-shape-gstring)
  224. (vector pattern1 1 'hebrew-shape-gstring)
  225. [nil 0 hebrew-shape-gstring]))
  226. ;; Exclude non-combining characters.
  227. (set-char-table-range
  228. composition-function-table #x5BE nil)
  229. (set-char-table-range
  230. composition-function-table #x5C0 nil)
  231. (set-char-table-range
  232. composition-function-table #x5C3 nil)
  233. (set-char-table-range
  234. composition-function-table #x5C6 nil))
  235. (provide 'hebrew)
  236. ;;; hebrew.el ends here