cyrillic.el 9.0 KB


  1. ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*-
  2. ;; Copyright (C) 1997-1998, 2001-2012 Free Software Foundation, Inc.
  3. ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
  4. ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
  5. ;; National Institute of Advanced Industrial Science and Technology (AIST)
  6. ;; Registration Number H14PRO021
  7. ;; Copyright (C) 2003
  8. ;; National Institute of Advanced Industrial Science and Technology (AIST)
  9. ;; Registration Number H13PRO009
  10. ;; Author: Kenichi Handa <handa@etl.go.jp>
  11. ;; Keywords: multilingual, Cyrillic, i18n
  12. ;; This file is part of GNU Emacs.
  13. ;; GNU Emacs is free software: you can redistribute it and/or modify
  14. ;; it under the terms of the GNU General Public License as published by
  15. ;; the Free Software Foundation, either version 3 of the License, or
  16. ;; (at your option) any later version.
  17. ;; GNU Emacs is distributed in the hope that it will be useful,
  18. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  20. ;; GNU General Public License for more details.
  21. ;; You should have received a copy of the GNU General Public License
  22. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  23. ;;; Commentary:
  24. ;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ
  25. ;; are converted to Unicode internally. See
  26. ;; <URL:http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM>. For more info
  27. ;; on Cyrillic charsets, see
  28. ;; <URL:http://czyborra.com/charsets/cyrillic.html>. The KOI and
  29. ;; Alternativnyj coding systems should live in code-pages.el, but
  30. ;; they've always been preloaded and the coding system autoload
  31. ;; mechanism didn't get accepted, so they have to stay here and
  32. ;; duplicate code-pages stuff.
  33. ;; Note that 8859-5 maps directly onto the Unicode Cyrillic block,
  34. ;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen,
  35. ;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and
  36. ;; Alternativnyj coding systems encode both 8859-5 and Unicode.
  37. ;; ucs-tables.el provides unification for cyrillic-iso-8bit.
  38. ;; Customizing `utf-fragment-on-decoding' allows decoding characters
  39. ;; from KOI and Alternativnyj into 8859-5 where that's possible.
  40. ;; cyrillic-iso8859-5 characters take half as much space in the buffer
  41. ;; as the mule-unicode-0100-24ff equivalents, though that's probably
  42. ;; not normally a big deal.
  43. ;;; Code:
  44. ;; Cyrillic (general)
  45. ;; ISO-8859-5 stuff
  46. (define-coding-system 'cyrillic-iso-8bit
  47. "ISO 2022 based 8-bit encoding for Cyrillic script (MIME:ISO-8859-5)."
  48. :coding-type 'charset
  49. :mnemonic ?5
  50. :charset-list '(iso-8859-5)
  51. :mime-charset 'iso-8859-5)
  52. (define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit)
  53. (set-language-info-alist
  54. "Cyrillic-ISO" '((charset iso-8859-5)
  55. (coding-system cyrillic-iso-8bit)
  56. (coding-priority cyrillic-iso-8bit)
  57. (input-method . "cyrillic-yawerty") ; fixme
  58. (nonascii-translation . iso-8859-5)
  59. (unibyte-display . cyrillic-iso-8bit)
  60. (features cyril-util)
  61. (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")
  62. (documentation . "Support for Cyrillic ISO-8859-5."))
  63. '("Cyrillic"))
  64. ;; KOI-8R stuff
  65. (define-coding-system 'cyrillic-koi8
  66. "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)."
  67. :coding-type 'charset
  68. ;; We used to use ?K. It is true that ?K is more strictly correct,
  69. ;; but it is also used for Korean. So people who use koi8 for
  70. ;; languages other than Russian will have to forgive us.
  71. :mnemonic ?R
  72. :charset-list '(koi8)
  73. :mime-charset 'koi8-r)
  74. (define-coding-system-alias 'koi8-r 'cyrillic-koi8)
  75. (define-coding-system-alias 'koi8 'cyrillic-koi8)
  76. (define-coding-system-alias 'cp878 'cyrillic-koi8)
  77. (set-language-info-alist
  78. "Cyrillic-KOI8" `((charset koi8)
  79. (coding-system cyrillic-koi8)
  80. (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
  81. (ctext-non-standard-encodings "koi8-r")
  82. (nonascii-translation . koi8)
  83. (input-method . "russian-typewriter")
  84. (features cyril-util)
  85. (unibyte-display . cyrillic-koi8)
  86. (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")
  87. (documentation . "Support for Cyrillic KOI8-R."))
  88. '("Cyrillic"))
  89. (set-language-info-alist
  90. "Russian" `((charset cyrillic-iso8859-5)
  91. (nonascii-translation
  92. . ,(get 'cyrillic-koi8-r-nonascii-translation-table
  93. 'translation-table))
  94. (coding-system cyrillic-koi8)
  95. (coding-priority cyrillic-koi8 cyrillic-iso-8bit)
  96. (input-method . "russian-computer")
  97. (features cyril-util)
  98. (unibyte-display . cyrillic-koi8)
  99. (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")
  100. (documentation . "\
  101. Support for Russian using koi8-r and the russian-computer input method.")
  102. (tutorial . "TUTORIAL.ru"))
  103. '("Cyrillic"))
  104. (define-coding-system 'koi8-u
  105. "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
  106. :coding-type 'charset
  107. :mnemonic ?U
  108. :charset-list '(koi8-u)
  109. :mime-charset 'koi8-u)
  110. (set-language-info-alist
  111. "Ukrainian" `((charset koi8-u)
  112. (coding-system koi8-u)
  113. (coding-priority koi8-u)
  114. (nonascii-translation . koi8-u)
  115. (input-method . "ukrainian-computer")
  116. (documentation
  117. . "Support for Ukrainian with KOI8-U character set."))
  118. '("Cyrillic"))
  119. ;;; ALTERNATIVNYJ stuff
  120. (define-coding-system 'cyrillic-alternativnyj
  121. "ALTERNATIVNYJ 8-bit encoding for Cyrillic."
  122. :coding-type 'charset
  123. :mnemonic ?A
  124. :charset-list '(alternativnyj))
  125. (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj)
  126. (set-language-info-alist
  127. "Cyrillic-ALT" `((charset alternativnyj)
  128. (coding-system cyrillic-alternativnyj)
  129. (coding-priority cyrillic-alternativnyj)
  130. (nonascii-translation . alternativnyj)
  131. (input-method . "russian-typewriter")
  132. (features cyril-util)
  133. (unibyte-display . cyrillic-alternativnyj)
  134. (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")
  135. (documentation . "Support for Cyrillic ALTERNATIVNYJ."))
  136. '("Cyrillic"))
  137. (define-coding-system 'cp866
  138. "CP866 encoding for Cyrillic."
  139. :coding-type 'charset
  140. :mnemonic ?*
  141. :charset-list '(ibm866)
  142. :mime-charset 'cp866)
  143. (define-coding-system 'koi8-u
  144. "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)"
  145. :coding-type 'charset
  146. :mnemonic ?U
  147. :charset-list '(koi8-u)
  148. :mime-charset 'koi8-u)
  149. (define-coding-system 'koi8-t
  150. "KOI8-T 8-bit encoding for Cyrillic"
  151. :coding-type 'charset
  152. :mnemonic ?*
  153. :charset-list '(koi8-t)
  154. :mime-charset 'koi8-t)
  155. (define-coding-system 'windows-1251
  156. "windows-1251 8-bit encoding for Cyrillic (MIME: WINDOWS-1251)"
  157. :coding-type 'charset
  158. :mnemonic ?b
  159. :charset-list '(windows-1251)
  160. :mime-charset 'windows-1251)
  161. (define-coding-system-alias 'cp1251 'windows-1251)
  162. (define-coding-system 'cp1125
  163. "cp1125 8-bit encoding for Cyrillic"
  164. :coding-type 'charset
  165. :mnemonic ?*
  166. :charset-list '(cp1125))
  167. (define-coding-system-alias 'ruscii 'cp1125)
  168. ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
  169. (define-coding-system-alias 'cp866u 'cp1125)
  170. (define-coding-system 'cp855
  171. "DOS codepage 855 (Russian)"
  172. :coding-type 'charset
  173. :mnemonic ?D
  174. :charset-list '(cp855)
  175. :mime-charset 'cp855)
  176. (define-coding-system-alias 'ibm855 'cp855)
  177. (define-coding-system 'mik
  178. "Bulgarian DOS codepage"
  179. :coding-type 'charset
  180. :mnemonic ?D
  181. :charset-list '(mik))
  182. (define-coding-system 'pt154
  183. "Parattype Asian Cyrillic codepage"
  184. :coding-type 'charset
  185. :mnemonic ?D
  186. :charset-list '(pt154))
  187. ;; (set-language-info-alist
  188. ;; "Windows-1251" `((coding-system windows-1251)
  189. ;; (coding-priority windows-1251)
  190. ;; (input-method . "russian-typewriter") ; fixme?
  191. ;; (features code-pages)
  192. ;; (documentation . "Support for windows-1251 character set."))
  193. ;; '("Cyrillic"))
  194. (set-language-info-alist
  195. "Tajik" `((coding-system koi8-t)
  196. (coding-priority koi8-t)
  197. (nonascii-translation . cyrillic-koi8-t)
  198. (charset koi8-t)
  199. (input-method . "russian-typewriter") ; fixme?
  200. (features code-pages)
  201. (documentation . "Support for Tajik using KOI8-T."))
  202. '("Cyrillic"))
  203. (set-language-info-alist
  204. "Bulgarian" `((coding-system windows-1251)
  205. (coding-priority windows-1251)
  206. (nonascii-translation . windows-1251)
  207. (charset windows-1251)
  208. (ctext-non-standard-encodings "microsoft-cp1251")
  209. (input-method . "bulgarian-bds")
  210. (documentation
  211. . "Support for Bulgarian with windows-1251 character set."))
  212. '("Cyrillic"))
  213. (set-language-info-alist
  214. "Belarusian" `((coding-system windows-1251)
  215. (coding-priority windows-1251)
  216. (nonascii-translation . windows-1251)
  217. (charset windows-1251)
  218. (ctext-non-standard-encodings "microsoft-cp1251")
  219. (input-method . "belarusian")
  220. (documentation
  221. . "Support for Belarusian with windows-1251 character set.
  222. \(The name Belarusian replaced Byelorussian in the early 1990s.)"))
  223. '("Cyrillic"))
  224. (set-language-info-alist
  225. "Ukrainian" `((coding-system koi8-u)
  226. (coding-priority koi8-u)
  227. (input-method . "ukrainian-computer")
  228. (documentation
  229. . "Support for Ukrainian with koi8-u character set."))
  230. '("Cyrillic"))
  231. (provide 'cyrillic)
  232. ;;; cyrillic.el ends here