mule-conf.el 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558
  1. ;;; mule-conf.el --- configure multilingual environment
  2. ;; Copyright (C) 1997-2012 Free Software Foundation, Inc.
  3. ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
  4. ;; National Institute of Advanced Industrial Science and Technology (AIST)
  5. ;; Registration Number H14PRO021
  6. ;; Copyright (C) 2003
  7. ;; National Institute of Advanced Industrial Science and Technology (AIST)
  8. ;; Registration Number H13PRO009
  9. ;; Keywords: i18n, mule, multilingual, character set, coding system
  10. ;; This file is part of GNU Emacs.
  11. ;; GNU Emacs is free software: you can redistribute it and/or modify
  12. ;; it under the terms of the GNU General Public License as published by
  13. ;; the Free Software Foundation, either version 3 of the License, or
  14. ;; (at your option) any later version.
  15. ;; GNU Emacs is distributed in the hope that it will be useful,
  16. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. ;; GNU General Public License for more details.
  19. ;; You should have received a copy of the GNU General Public License
  20. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  21. ;;; Commentary:
  22. ;; This file defines the Emacs charsets and some basic coding systems.
  23. ;; Other coding systems are defined in the files in directory
  24. ;; lisp/language.
  25. ;;; Code:
  26. ;;; Remarks
  27. ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/.
  28. ;; Standards docs equivalent to iso-2022 and iso-8859 are at
  29. ;; http://www.ecma.ch/.
  30. ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for
  31. ;; MS Windows, which are presumably the only charsets we really need
  32. ;; to worry about on such systems:
  33. ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866
  34. ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257,
  35. ;; 1258, 874, 932, 936, 949, 950
  36. ;;; Definitions of character sets.
  37. ;; The charsets `ascii', `unicode' and `eight-bit' are already defined
  38. ;; in charset.c as below:
  39. ;;
  40. ;; (define-charset 'ascii
  41. ;; ""
  42. ;; :dimension 1
  43. ;; :code-space [0 127]
  44. ;; :iso-final-char ?B
  45. ;; :ascii-compatible-p t
  46. ;; :emacs-mule-id 0
  47. ;; :code-offset 0)
  48. ;;
  49. ;; (define-charset 'unicode
  50. ;; ""
  51. ;; :dimension 3
  52. ;; :code-space [0 255 0 255 0 16]
  53. ;; :ascii-compatible-p t
  54. ;; :code-offset 0)
  55. ;;
  56. ;; (define-charset 'emacs
  57. ;; ""
  58. ;; :dimension 3
  59. ;; :code-space [0 255 0 255 0 63]
  60. ;; :ascii-compatible-p t
  61. ;; :supplementary-p t
  62. ;; :code-offset 0)
  63. ;;
  64. ;; (define-charset 'eight-bit
  65. ;; ""
  66. ;; :dimension 1
  67. ;; :code-space [128 255]
  68. ;; :code-offset #x3FFF80)
  69. ;;
  70. ;; We now set :docstring, :short-name, and :long-name properties.
  71. (put-charset-property
  72. 'ascii :docstring "ASCII (ISO646 IRV)")
  73. (put-charset-property
  74. 'ascii :short-name "ASCII")
  75. (put-charset-property
  76. 'ascii :long-name "ASCII (ISO646 IRV)")
  77. (put-charset-property
  78. 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)")
  79. (put-charset-property
  80. 'iso-8859-1 :short-name "Latin-1")
  81. (put-charset-property
  82. 'iso-8859-1 :long-name "Latin-1")
  83. (put-charset-property
  84. 'unicode :docstring "Unicode (ISO10646)")
  85. (put-charset-property
  86. 'unicode :short-name "Unicode")
  87. (put-charset-property
  88. 'unicode :long-name "Unicode (ISO10646)")
  89. (put-charset-property
  90. 'emacs :docstring "Full Emacs charset (excluding eight bit chars)")
  91. (put-charset-property
  92. 'emacs :short-name "Emacs")
  93. (put-charset-property
  94. 'emacs :long-name "Emacs")
  95. (put-charset-property 'eight-bit :docstring "Raw bytes 128-255")
  96. (put-charset-property 'eight-bit :short-name "Raw bytes")
  97. (define-charset-alias 'ucs 'unicode)
  98. (define-charset 'latin-iso8859-1
  99. "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
  100. :short-name "RHP of Latin-1"
  101. :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100"
  102. :iso-final-char ?A
  103. :emacs-mule-id 129
  104. :code-space [32 127]
  105. :code-offset 160)
  106. ;; Name perhaps not ideal, but is XEmacs-compatible.
  107. (define-charset 'control-1
  108. "8-bit control code (0x80..0x9F)"
  109. :short-name "8-bit control code"
  110. :code-space [128 159]
  111. :code-offset 128)
  112. (define-charset 'eight-bit-control
  113. "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)"
  114. :short-name "Raw bytes 0x80..0x9F"
  115. :supplementary-p t
  116. :code-space [128 159]
  117. :code-offset #x3FFF80) ; see character.h
  118. (define-charset 'eight-bit-graphic
  119. "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)"
  120. :short-name "Raw bytes 0xA0..0xFF"
  121. :supplementary-p t
  122. :code-space [160 255]
  123. :code-offset #x3FFFA0) ; see character.h
  124. (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname
  125. iso-ir iso-final
  126. emacs-mule-id map)
  127. `(progn
  128. (define-charset ,symbol
  129. ,name
  130. :short-name ,nickname
  131. :long-name ,name
  132. :ascii-compatible-p t
  133. :code-space [0 255]
  134. :map ,map)
  135. (if ,iso-symbol
  136. (define-charset ,iso-symbol
  137. (if ,iso-ir
  138. (format "Right-Hand Part of %s (%s): ISO-IR-%d"
  139. ,name ,nickname ,iso-ir)
  140. (format "Right-Hand Part of %s (%s)" ,name ,nickname))
  141. :short-name (format "RHP of %s" ,name)
  142. :long-name (format "RHP of %s (%s)" ,name ,nickname)
  143. :iso-final-char ,iso-final
  144. :emacs-mule-id ,emacs-mule-id
  145. :code-space [32 127]
  146. :subset (list ,symbol 160 255 -128)))))
  147. (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2
  148. "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2")
  149. (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3
  150. "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3")
  151. (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4
  152. "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4")
  153. (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5
  154. "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5")
  155. (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6
  156. "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6")
  157. (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7
  158. "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7")
  159. (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8
  160. "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8")
  161. (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9
  162. "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9")
  163. (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10
  164. "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10")
  165. ;; http://www.nectec.or.th/it-standards/iso8859-11/
  166. ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620
  167. ;; plus nbsp
  168. (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11
  169. "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11")
  170. ;; 8859-12 doesn't (yet?) exist.
  171. (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13
  172. "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13")
  173. (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14
  174. "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14")
  175. (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15
  176. "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15")
  177. (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16
  178. "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16")
  179. ;; No point in keeping it around.
  180. (fmakunbound 'define-iso-single-byte-charset)
  181. ;; Can this be shared with 8859-11?
  182. ;; N.b. not all of these are defined in Unicode.
  183. (define-charset 'thai-tis620
  184. "TIS620.2533"
  185. :short-name "TIS620.2533"
  186. :iso-final-char ?T
  187. :emacs-mule-id 133
  188. :code-space [32 127]
  189. :code-offset #x0E00)
  190. ;; Fixme: doc for this, c.f. above
  191. (define-charset 'tis620-2533
  192. "TIS620.2533"
  193. :short-name "TIS620.2533"
  194. :ascii-compatible-p t
  195. :code-space [0 255]
  196. :superset '(ascii eight-bit-control (thai-tis620 . 128)))
  197. (define-charset 'jisx0201
  198. "JISX0201"
  199. :short-name "JISX0201"
  200. :code-space [0 #xDF]
  201. :map "JISX0201")
  202. (define-charset 'latin-jisx0201
  203. "Roman Part of JISX0201.1976"
  204. :short-name "JISX0201 Roman"
  205. :long-name "Japanese Roman (JISX0201.1976)"
  206. :iso-final-char ?J
  207. :emacs-mule-id 138
  208. :supplementary-p t
  209. :code-space [33 126]
  210. :subset '(jisx0201 33 126 0))
  211. (define-charset 'katakana-jisx0201
  212. "Katakana Part of JISX0201.1976"
  213. :short-name "JISX0201 Katakana"
  214. :long-name "Japanese Katakana (JISX0201.1976)"
  215. :iso-final-char ?I
  216. :emacs-mule-id 137
  217. :supplementary-p t
  218. :code-space [33 126]
  219. :subset '(jisx0201 161 254 -128))
  220. (define-charset 'chinese-gb2312
  221. "GB2312 Chinese simplified: ISO-IR-58"
  222. :short-name "GB2312"
  223. :long-name "GB2312: ISO-IR-58"
  224. :iso-final-char ?A
  225. :emacs-mule-id 145
  226. :code-space [33 126 33 126]
  227. :code-offset #x110000
  228. :unify-map "GB2312")
  229. (define-charset 'chinese-gbk
  230. "GBK Chinese simplified."
  231. :short-name "GBK"
  232. :code-space [#x40 #xFE #x81 #xFE]
  233. :code-offset #x160000
  234. :unify-map "GBK")
  235. (define-charset-alias 'cp936 'chinese-gbk)
  236. (define-charset-alias 'windows-936 'chinese-gbk)
  237. (define-charset 'chinese-cns11643-1
  238. "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
  239. :short-name "CNS11643-1"
  240. :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
  241. :iso-final-char ?G
  242. :emacs-mule-id 149
  243. :code-space [33 126 33 126]
  244. :code-offset #x114000
  245. :unify-map "CNS-1")
  246. (define-charset 'chinese-cns11643-2
  247. "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
  248. :short-name "CNS11643-2"
  249. :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
  250. :iso-final-char ?H
  251. :emacs-mule-id 150
  252. :code-space [33 126 33 126]
  253. :code-offset #x118000
  254. :unify-map "CNS-2")
  255. (define-charset 'chinese-cns11643-3
  256. "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
  257. :short-name "CNS11643-3"
  258. :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
  259. :iso-final-char ?I
  260. :code-space [33 126 33 126]
  261. :emacs-mule-id 246
  262. :code-offset #x11C000
  263. :unify-map "CNS-3")
  264. (define-charset 'chinese-cns11643-4
  265. "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
  266. :short-name "CNS11643-4"
  267. :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
  268. :iso-final-char ?J
  269. :emacs-mule-id 247
  270. :code-space [33 126 33 126]
  271. :code-offset #x120000
  272. :unify-map "CNS-4")
  273. (define-charset 'chinese-cns11643-5
  274. "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
  275. :short-name "CNS11643-5"
  276. :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
  277. :iso-final-char ?K
  278. :emacs-mule-id 248
  279. :code-space [33 126 33 126]
  280. :code-offset #x124000
  281. :unify-map "CNS-5")
  282. (define-charset 'chinese-cns11643-6
  283. "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
  284. :short-name "CNS11643-6"
  285. :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
  286. :iso-final-char ?L
  287. :emacs-mule-id 249
  288. :code-space [33 126 33 126]
  289. :code-offset #x128000
  290. :unify-map "CNS-6")
  291. (define-charset 'chinese-cns11643-7
  292. "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
  293. :short-name "CNS11643-7"
  294. :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
  295. :iso-final-char ?M
  296. :emacs-mule-id 250
  297. :code-space [33 126 33 126]
  298. :code-offset #x12C000
  299. :unify-map "CNS-7")
  300. (define-charset 'big5
  301. "Big5 (Chinese traditional)"
  302. :short-name "Big5"
  303. :code-space [#x40 #xFE #xA1 #xFE]
  304. :code-offset #x130000
  305. :unify-map "BIG5")
  306. ;; Fixme: AKA cp950 according to
  307. ;; <URL:http://www.microsoft.com/globaldev/reference/WinCP.asp>. Is
  308. ;; that correct?
  309. (define-charset 'chinese-big5-1
  310. "Frequently used part (A141-C67E) of Big5 (Chinese traditional)"
  311. :short-name "Big5 (Level-1)"
  312. :long-name "Big5 (Level-1) A141-C67F"
  313. :iso-final-char ?0
  314. :emacs-mule-id 152
  315. :supplementary-p t
  316. :code-space [#x21 #x7E #x21 #x7E]
  317. :code-offset #x135000
  318. :unify-map "BIG5-1")
  319. (define-charset 'chinese-big5-2
  320. "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
  321. :short-name "Big5 (Level-2)"
  322. :long-name "Big5 (Level-2) C940-FEFE"
  323. :iso-final-char ?1
  324. :emacs-mule-id 153
  325. :supplementary-p t
  326. :code-space [#x21 #x7E #x21 #x7E]
  327. :code-offset #x137800
  328. :unify-map "BIG5-2")
  329. (define-charset 'japanese-jisx0208
  330. "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
  331. :short-name "JISX0208"
  332. :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
  333. :iso-final-char ?B
  334. :emacs-mule-id 146
  335. :code-space [33 126 33 126]
  336. :code-offset #x140000
  337. :unify-map "JISX0208")
  338. (define-charset 'japanese-jisx0208-1978
  339. "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
  340. :short-name "JISX0208.1978"
  341. :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42"
  342. :iso-final-char ?@
  343. :emacs-mule-id 144
  344. :code-space [33 126 33 126]
  345. :code-offset #x144000
  346. :unify-map "JISC6226")
  347. (define-charset 'japanese-jisx0212
  348. "JISX0212 Japanese supplement: ISO-IR-159"
  349. :short-name "JISX0212"
  350. :long-name "JISX0212 (Japanese): ISO-IR-159"
  351. :iso-final-char ?D
  352. :emacs-mule-id 148
  353. :code-space [33 126 33 126]
  354. :code-offset #x148000
  355. :unify-map "JISX0212")
  356. ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's
  357. ;; arguable whether it should have a unify-map.
  358. (define-charset 'japanese-jisx0213-1
  359. "JISX0213.2000 Plane 1 (Japanese)"
  360. :short-name "JISX0213-1"
  361. :iso-final-char ?O
  362. :emacs-mule-id 151
  363. :unify-map "JISX2131"
  364. :code-space [33 126 33 126]
  365. :code-offset #x14C000)
  366. (define-charset 'japanese-jisx0213-2
  367. "JISX0213.2000 Plane 2 (Japanese)"
  368. :short-name "JISX0213-2"
  369. :iso-final-char ?P
  370. :emacs-mule-id 254
  371. :unify-map "JISX2132"
  372. :code-space [33 126 33 126]
  373. :code-offset #x150000)
  374. (define-charset 'japanese-jisx0213-a
  375. "JISX0213.2004 adds these characters to JISX0213.2000."
  376. :short-name "JISX0213A"
  377. :dimension 2
  378. :code-space [33 126 33 126]
  379. :supplementary-p t
  380. :map "JISX213A")
  381. (define-charset 'japanese-jisx0213.2004-1
  382. "JISX0213.2004 Plane1 (Japanese)"
  383. :short-name "JISX0213.2004-1"
  384. :dimension 2
  385. :code-space [33 126 33 126]
  386. :iso-final-char ?Q
  387. :superset '(japanese-jisx0213-a japanese-jisx0213-1))
  388. (define-charset 'katakana-sjis
  389. "Katakana part of Shift-JIS"
  390. :dimension 1
  391. :code-space [#xA1 #xDF]
  392. :subset '(jisx0201 #xA1 #xDF 0)
  393. :supplementary-p t)
  394. (define-charset 'cp932-2-byte
  395. "2-byte part of CP932"
  396. :dimension 2
  397. :map "CP932-2BYTE"
  398. :code-space [#x40 #xFC #x81 #xFC]
  399. :supplementary-p t)
  400. (define-charset 'cp932
  401. "CP932 (Microsoft shift-jis)"
  402. :code-space [#x00 #xFF #x00 #xFE]
  403. :short-name "CP932"
  404. :superset '(ascii katakana-sjis cp932-2-byte))
  405. (define-charset 'korean-ksc5601
  406. "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
  407. :short-name "KSC5601"
  408. :long-name "KSC5601 (Korean): ISO-IR-149"
  409. :iso-final-char ?C
  410. :emacs-mule-id 147
  411. :code-space [33 126 33 126]
  412. :code-offset #x279f94 ; ... #x27c217
  413. :unify-map "KSC5601")
  414. (define-charset 'big5-hkscs
  415. "Big5-HKSCS (Chinese traditional, Hong Kong supplement)"
  416. :short-name "Big5"
  417. :code-space [#x40 #xFE #xA1 #xFE]
  418. :code-offset #x27c218 ; ... #x280839
  419. :unify-map "BIG5-HKSCS")
  420. (define-charset 'cp949-2-byte
  421. "2-byte part of CP949"
  422. :dimension 2
  423. :map "CP949-2BYTE"
  424. :code-space [#x41 #xFE #x81 #xFD]
  425. :supplementary-p t)
  426. (define-charset 'cp949
  427. "CP949 (Korean)"
  428. :short-name "CP949"
  429. :long-name "CP949 (Korean)"
  430. :code-space [#x00 #xFE #x00 #xFD]
  431. :superset '(ascii cp949-2-byte))
  432. (define-charset 'chinese-sisheng
  433. "SiSheng characters for PinYin/ZhuYin"
  434. :short-name "SiSheng"
  435. :long-name "SiSheng (PinYin/ZhuYin)"
  436. :iso-final-char ?0
  437. :emacs-mule-id 160
  438. :code-space [33 126]
  439. :unify-map "MULE-sisheng"
  440. :supplementary-p t
  441. :code-offset #x200000)
  442. ;; A subset of the 1989 version of IPA. It consists of the consonant
  443. ;; signs used in English, French, German and Italian, and all vowels
  444. ;; signs in the table. [says old MULE doc]
  445. (define-charset 'ipa
  446. "IPA (International Phonetic Association)"
  447. :short-name "IPA"
  448. :iso-final-char ?0
  449. :emacs-mule-id 161
  450. :unify-map "MULE-ipa"
  451. :code-space [32 127]
  452. :supplementary-p t
  453. :code-offset #x200080)
  454. (define-charset 'viscii
  455. "VISCII1.1"
  456. :short-name "VISCII"
  457. :long-name "VISCII 1.1"
  458. :code-space [0 255]
  459. :map "VISCII")
  460. (define-charset 'vietnamese-viscii-lower
  461. "VISCII1.1 lower-case"
  462. :short-name "VISCII lower"
  463. :long-name "VISCII lower-case"
  464. :iso-final-char ?1
  465. :emacs-mule-id 162
  466. :code-space [32 127]
  467. :code-offset #x200200
  468. :supplementary-p t
  469. :unify-map "MULE-lviscii")
  470. (define-charset 'vietnamese-viscii-upper
  471. "VISCII1.1 upper-case"
  472. :short-name "VISCII upper"
  473. :long-name "VISCII upper-case"
  474. :iso-final-char ?2
  475. :emacs-mule-id 163
  476. :code-space [32 127]
  477. :code-offset #x200280
  478. :supplementary-p t
  479. :unify-map "MULE-uviscii")
  480. (define-charset 'vscii
  481. "VSCII1.1 (TCVN-5712 VN1)"
  482. :short-name "VSCII"
  483. :code-space [0 255]
  484. :map "VSCII")
  485. (define-charset-alias 'tcvn-5712 'vscii)
  486. ;; Fixme: see note in tcvn.map about combining characters
  487. (define-charset 'vscii-2
  488. "VSCII-2 (TCVN-5712 VN2)"
  489. :code-space [0 255]
  490. :map "VSCII-2")
  491. (define-charset 'koi8-r
  492. "KOI8-R"
  493. :short-name "KOI8-R"
  494. :ascii-compatible-p t
  495. :code-space [0 255]
  496. :map "KOI8-R")
  497. (define-charset-alias 'koi8 'koi8-r)
  498. (define-charset 'alternativnyj
  499. "ALTERNATIVNYJ"
  500. :short-name "alternativnyj"
  501. :ascii-compatible-p t
  502. :code-space [0 255]
  503. :map "ALTERNATIVNYJ")
  504. (define-charset 'cp866
  505. "CP866"
  506. :short-name "cp866"
  507. :ascii-compatible-p t
  508. :code-space [0 255]
  509. :map "IBM866")
  510. (define-charset-alias 'ibm866 'cp866)
  511. (define-charset 'koi8-u
  512. "KOI8-U"
  513. :short-name "KOI8-U"
  514. :ascii-compatible-p t
  515. :code-space [0 255]
  516. :map "KOI8-U")
  517. (define-charset 'koi8-t
  518. "KOI8-T"
  519. :short-name "KOI8-T"
  520. :ascii-compatible-p t
  521. :code-space [0 255]
  522. :map "KOI8-T")
  523. (define-charset 'georgian-ps
  524. "GEORGIAN-PS"
  525. :short-name "GEORGIAN-PS"
  526. :ascii-compatible-p t
  527. :code-space [0 255]
  528. :map "KA-PS")
  529. (define-charset 'georgian-academy
  530. "GEORGIAN-ACADEMY"
  531. :short-name "GEORGIAN-ACADEMY"
  532. :ascii-compatible-p t
  533. :code-space [0 255]
  534. :map "KA-ACADEMY")
  535. (define-charset 'windows-1250
  536. "WINDOWS-1250 (Central Europe)"
  537. :short-name "WINDOWS-1250"
  538. :ascii-compatible-p t
  539. :code-space [0 255]
  540. :map "CP1250")
  541. (define-charset-alias 'cp1250 'windows-1250)
  542. (define-charset 'windows-1251
  543. "WINDOWS-1251 (Cyrillic)"
  544. :short-name "WINDOWS-1251"
  545. :ascii-compatible-p t
  546. :code-space [0 255]
  547. :map "CP1251")
  548. (define-charset-alias 'cp1251 'windows-1251)
  549. (define-charset 'windows-1252
  550. "WINDOWS-1252 (Latin I)"
  551. :short-name "WINDOWS-1252"
  552. :ascii-compatible-p t
  553. :code-space [0 255]
  554. :map "CP1252")
  555. (define-charset-alias 'cp1252 'windows-1252)
  556. (define-charset 'windows-1253
  557. "WINDOWS-1253 (Greek)"
  558. :short-name "WINDOWS-1253"
  559. :ascii-compatible-p t
  560. :code-space [0 255]
  561. :map "CP1253")
  562. (define-charset-alias 'cp1253 'windows-1253)
  563. (define-charset 'windows-1254
  564. "WINDOWS-1254 (Turkish)"
  565. :short-name "WINDOWS-1254"
  566. :ascii-compatible-p t
  567. :code-space [0 255]
  568. :map "CP1254")
  569. (define-charset-alias 'cp1254 'windows-1254)
  570. (define-charset 'windows-1255
  571. "WINDOWS-1255 (Hebrew)"
  572. :short-name "WINDOWS-1255"
  573. :ascii-compatible-p t
  574. :code-space [0 255]
  575. :map "CP1255")
  576. (define-charset-alias 'cp1255 'windows-1255)
  577. (define-charset 'windows-1256
  578. "WINDOWS-1256 (Arabic)"
  579. :short-name "WINDOWS-1256"
  580. :ascii-compatible-p t
  581. :code-space [0 255]
  582. :map "CP1256")
  583. (define-charset-alias 'cp1256 'windows-1256)
  584. (define-charset 'windows-1257
  585. "WINDOWS-1257 (Baltic)"
  586. :short-name "WINDOWS-1257"
  587. :ascii-compatible-p t
  588. :code-space [0 255]
  589. :map "CP1257")
  590. (define-charset-alias 'cp1257 'windows-1257)
  591. (define-charset 'windows-1258
  592. "WINDOWS-1258 (Viet Nam)"
  593. :short-name "WINDOWS-1258"
  594. :ascii-compatible-p t
  595. :code-space [0 255]
  596. :map "CP1258")
  597. (define-charset-alias 'cp1258 'windows-1258)
  598. (define-charset 'next
  599. "NEXT"
  600. :short-name "NEXT"
  601. :ascii-compatible-p t
  602. :code-space [0 255]
  603. :map "NEXTSTEP")
  604. (define-charset 'cp1125
  605. "CP1125"
  606. :short-name "CP1125"
  607. :code-space [0 255]
  608. :ascii-compatible-p t
  609. :map "CP1125")
  610. (define-charset-alias 'ruscii 'cp1125)
  611. ;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
  612. (define-charset-alias 'cp866u 'cp1125)
  613. ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html
  614. ;; shows this as not ASCII compatible, with various graphics in
  615. ;; 0x01-0x1F.
  616. (define-charset 'cp437
  617. "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)"
  618. :short-name "CP437"
  619. :code-space [0 255]
  620. :ascii-compatible-p t
  621. :map "IBM437")
  622. (define-charset 'cp720
  623. "CP720 (Arabic)"
  624. :short-name "CP720"
  625. :code-space [0 255]
  626. :ascii-compatible-p t
  627. :map "CP720")
  628. (define-charset 'cp737
  629. "CP737 (PC Greek)"
  630. :short-name "CP737"
  631. :code-space [0 255]
  632. :ascii-compatible-p t
  633. :map "CP737")
  634. (define-charset 'cp775
  635. "CP775 (PC Baltic)"
  636. :short-name "CP775"
  637. :code-space [0 255]
  638. :ascii-compatible-p t
  639. :map "CP775")
  640. (define-charset 'cp851
  641. "CP851 (Greek)"
  642. :short-name "CP851"
  643. :code-space [0 255]
  644. :ascii-compatible-p t
  645. :map "IBM851")
  646. (define-charset 'cp852
  647. "CP852 (MS-DOS Latin-2)"
  648. :short-name "CP852"
  649. :code-space [0 255]
  650. :ascii-compatible-p t
  651. :map "IBM852")
  652. (define-charset 'cp855
  653. "CP855 (IBM Cyrillic)"
  654. :short-name "CP855"
  655. :code-space [0 255]
  656. :ascii-compatible-p t
  657. :map "IBM855")
  658. (define-charset 'cp857
  659. "CP857 (IBM Turkish)"
  660. :short-name "CP857"
  661. :code-space [0 255]
  662. :ascii-compatible-p t
  663. :map "IBM857")
  664. (define-charset 'cp858
  665. "CP858 (Multilingual Latin I + Euro)"
  666. :short-name "CP858"
  667. :code-space [0 255]
  668. :ascii-compatible-p t
  669. :map "CP858")
  670. (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858
  671. (define-charset 'cp860
  672. "CP860 (MS-DOS Portuguese)"
  673. :short-name "CP860"
  674. :code-space [0 255]
  675. :ascii-compatible-p t
  676. :map "IBM860")
  677. (define-charset 'cp861
  678. "CP861 (MS-DOS Icelandic)"
  679. :short-name "CP861"
  680. :code-space [0 255]
  681. :ascii-compatible-p t
  682. :map "IBM861")
  683. (define-charset 'cp862
  684. "CP862 (PC Hebrew)"
  685. :short-name "CP862"
  686. :code-space [0 255]
  687. :ascii-compatible-p t
  688. :map "IBM862")
  689. (define-charset 'cp863
  690. "CP863 (MS-DOS Canadian French)"
  691. :short-name "CP863"
  692. :code-space [0 255]
  693. :ascii-compatible-p t
  694. :map "IBM863")
  695. (define-charset 'cp864
  696. "CP864 (PC Arabic)"
  697. :short-name "CP864"
  698. :code-space [0 255]
  699. :ascii-compatible-p t
  700. :map "IBM864")
  701. (define-charset 'cp865
  702. "CP865 (MS-DOS Nordic)"
  703. :short-name "CP865"
  704. :code-space [0 255]
  705. :ascii-compatible-p t
  706. :map "IBM865")
  707. (define-charset 'cp869
  708. "CP869 (IBM Modern Greek)"
  709. :short-name "CP869"
  710. :code-space [0 255]
  711. :ascii-compatible-p t
  712. :map "IBM869")
  713. (define-charset 'cp874
  714. "CP874 (IBM Thai)"
  715. :short-name "CP874"
  716. :code-space [0 255]
  717. :ascii-compatible-p t
  718. :map "IBM874")
  719. ;; For Arabic, we need three different types of character sets.
  720. ;; Digits are of direction left-to-right and of width 1-column.
  721. ;; Others are of direction right-to-left and of width 1-column or
  722. ;; 2-column.
  723. (define-charset 'arabic-digit
  724. "Arabic digit"
  725. :short-name "Arabic digit"
  726. :iso-final-char ?2
  727. :emacs-mule-id 164
  728. :supplementary-p t
  729. :code-space [34 42]
  730. :code-offset #x0600)
  731. (define-charset 'arabic-1-column
  732. "Arabic 1-column"
  733. :short-name "Arabic 1-col"
  734. :long-name "Arabic 1-column"
  735. :iso-final-char ?3
  736. :emacs-mule-id 165
  737. :supplementary-p t
  738. :code-space [33 126]
  739. :code-offset #x200100)
  740. (define-charset 'arabic-2-column
  741. "Arabic 2-column"
  742. :short-name "Arabic 2-col"
  743. :long-name "Arabic 2-column"
  744. :iso-final-char ?4
  745. :emacs-mule-id 224
  746. :supplementary-p t
  747. :code-space [33 126]
  748. :code-offset #x200180)
  749. ;; Lao script.
  750. ;; Codes 0x21..0x7E are mapped to Unicode U+0E81..U+0EDF.
  751. ;; Not all of them are defined in Unicode.
  752. (define-charset 'lao
  753. "Lao characters (ISO10646 0E81..0EDF)"
  754. :short-name "Lao"
  755. :iso-final-char ?1
  756. :emacs-mule-id 167
  757. :supplementary-p t
  758. :code-space [33 126]
  759. :code-offset #x0E81)
  760. (define-charset 'mule-lao
  761. "Lao characters (ISO10646 0E81..0EDF)"
  762. :short-name "Lao"
  763. :code-space [0 255]
  764. :supplementary-p t
  765. :superset '(ascii eight-bit-control (lao . 128)))
  766. ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
  767. ;; not assigned. They are automatically converted to each Indian
  768. ;; script which IS-13194 supports.
  769. (define-charset 'indian-is13194
  770. "Generic Indian charset for data exchange with IS 13194"
  771. :short-name "IS 13194"
  772. :long-name "Indian IS 13194"
  773. :iso-final-char ?5
  774. :emacs-mule-id 225
  775. :supplementary-p t
  776. :code-space [33 126]
  777. :code-offset #x180000)
  778. (let ((code-offset #x180100))
  779. (dolist (script '(devanagari sanskrit bengali tamil telugu assamese
  780. oriya kannada malayalam gujarati punjabi))
  781. (define-charset (intern (format "%s-cdac" script))
  782. (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'."
  783. (capitalize (symbol-name script)))
  784. :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script)))
  785. :supplementary-p t
  786. :code-space [0 255]
  787. :code-offset code-offset)
  788. (setq code-offset (+ code-offset #x100)))
  789. (dolist (script '(devanagari bengali punjabi gujarati
  790. oriya tamil telugu kannada malayalam))
  791. (define-charset (intern (format "%s-akruti" script))
  792. (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'."
  793. (capitalize (symbol-name script)))
  794. :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script)))
  795. :supplementary-p t
  796. :code-space [0 255]
  797. :code-offset code-offset)
  798. (setq code-offset (+ code-offset #x100))))
  799. (define-charset 'indian-glyph
  800. "Glyphs for Indian characters."
  801. :short-name "Indian glyph"
  802. :iso-final-char ?4
  803. :emacs-mule-id 240
  804. :supplementary-p t
  805. :code-space [32 127 32 127]
  806. :code-offset #x180100)
  807. ;; Actual Glyph for 1-column width.
  808. (define-charset 'indian-1-column
  809. "Indian charset for 1-column width glyphs."
  810. :short-name "Indian 1-col"
  811. :long-name "Indian 1 Column"
  812. :iso-final-char ?6
  813. :emacs-mule-id 251
  814. :supplementary-p t
  815. :code-space [33 126 33 126]
  816. :code-offset #x184000)
  817. ;; Actual Glyph for 2-column width.
  818. (define-charset 'indian-2-column
  819. "Indian charset for 2-column width glyphs."
  820. :short-name "Indian 2-col"
  821. :long-name "Indian 2 Column"
  822. :iso-final-char ?5
  823. :emacs-mule-id 251
  824. :supplementary-p t
  825. :code-space [33 126 33 126]
  826. :code-offset #x184000)
  827. (define-charset 'tibetan
  828. "Tibetan characters"
  829. :iso-final-char ?7
  830. :short-name "Tibetan 2-col"
  831. :long-name "Tibetan 2 column"
  832. :iso-final-char ?7
  833. :emacs-mule-id 252
  834. :unify-map "MULE-tibetan"
  835. :supplementary-p t
  836. :code-space [33 126 33 37]
  837. :code-offset #x190000)
  838. (define-charset 'tibetan-1-column
  839. "Tibetan 1 column glyph"
  840. :short-name "Tibetan 1-col"
  841. :long-name "Tibetan 1 column"
  842. :iso-final-char ?8
  843. :emacs-mule-id 241
  844. :supplementary-p t
  845. :code-space [33 126 33 37]
  846. :code-offset #x190000)
  847. ;; Subsets of Unicode.
  848. (define-charset 'mule-unicode-2500-33ff
  849. "Unicode characters of the range U+2500..U+33FF."
  850. :short-name "Unicode subset 2"
  851. :long-name "Unicode subset (U+2500..U+33FF)"
  852. :iso-final-char ?2
  853. :emacs-mule-id 242
  854. :supplementary-p t
  855. :code-space [#x20 #x7f #x20 #x47]
  856. :code-offset #x2500)
  857. (define-charset 'mule-unicode-e000-ffff
  858. "Unicode characters of the range U+E000..U+FFFF."
  859. :short-name "Unicode subset 3"
  860. :long-name "Unicode subset (U+E000+FFFF)"
  861. :iso-final-char ?3
  862. :emacs-mule-id 243
  863. :supplementary-p t
  864. :code-space [#x20 #x7F #x20 #x75]
  865. :code-offset #xE000
  866. :max-code 30015) ; U+FFFF
  867. (define-charset 'mule-unicode-0100-24ff
  868. "Unicode characters of the range U+0100..U+24FF."
  869. :short-name "Unicode subset"
  870. :long-name "Unicode subset (U+0100..U+24FF)"
  871. :iso-final-char ?1
  872. :emacs-mule-id 244
  873. :supplementary-p t
  874. :code-space [#x20 #x7F #x20 #x7F]
  875. :code-offset #x100)
  876. (define-charset 'unicode-bmp
  877. "Unicode Basic Multilingual Plane (U+0000..U+FFFF)"
  878. :short-name "Unicode BMP"
  879. :code-space [0 255 0 255]
  880. :code-offset 0)
  881. (define-charset 'unicode-smp
  882. "Unicode Supplementary Multilingual Plane (U+10000..U+1FFFF)"
  883. :short-name "Unicode SMP "
  884. :code-space [0 255 0 255]
  885. :code-offset #x10000)
  886. (define-charset 'unicode-sip
  887. "Unicode Supplementary Ideographic Plane (U+20000..U+2FFFF)"
  888. :short-name "Unicode SIP"
  889. :code-space [0 255 0 255]
  890. :code-offset #x20000)
  891. (define-charset 'unicode-ssp
  892. "Unicode Supplementary Special-purpose Plane (U+E0000..U+EFFFF)"
  893. :short-name "Unicode SSP"
  894. :code-space [0 255 0 255]
  895. :code-offset #xE0000)
  896. (define-charset 'ethiopic
  897. "Ethiopic characters for Amharic and Tigrigna."
  898. :short-name "Ethiopic"
  899. :long-name "Ethiopic characters"
  900. :iso-final-char ?3
  901. :emacs-mule-id 245
  902. :supplementary-p t
  903. :unify-map "MULE-ethiopic"
  904. :code-space [33 126 33 126]
  905. :code-offset #x1A0000)
  906. (define-charset 'mac-roman
  907. "Mac Roman charset"
  908. :short-name "Mac Roman"
  909. :ascii-compatible-p t
  910. :code-space [0 255]
  911. :map "MACINTOSH")
  912. ;; Fixme: modern EBCDIC variants, e.g. IBM00924?
  913. (define-charset 'ebcdic-us
  914. "US version of EBCDIC"
  915. :short-name "EBCDIC-US"
  916. :code-space [0 255]
  917. :mime-charset 'ebcdic-us
  918. :map "EBCDICUS")
  919. (define-charset 'ebcdic-uk
  920. "UK version of EBCDIC"
  921. :short-name "EBCDIC-UK"
  922. :code-space [0 255]
  923. :mime-charset 'ebcdic-uk
  924. :map "EBCDICUK")
  925. (define-charset 'ibm1047
  926. ;; Says groff:
  927. "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix."
  928. :short-name "IBM1047"
  929. :code-space [0 255]
  930. :mime-charset 'ibm1047
  931. :map "IBM1047")
  932. (define-charset-alias 'cp1047 'ibm1047)
  933. (define-charset 'hp-roman8
  934. "Encoding used by Hewlet-Packard printer software"
  935. :short-name "HP-ROMAN8"
  936. :ascii-compatible-p t
  937. :code-space [0 255]
  938. :map "HP-ROMAN8")
  939. ;; To make a coding system with this, a pre-write-conversion should
  940. ;; account for the commented-out multi-valued code points in
  941. ;; stdenc.map.
  942. (define-charset 'adobe-standard-encoding
  943. "Adobe `standard encoding' used in PostScript"
  944. :short-name "ADOBE-STANDARD-ENCODING"
  945. :code-space [#x20 255]
  946. :map "stdenc")
  947. (define-charset 'symbol
  948. "Adobe symbol encoding used in PostScript"
  949. :short-name "ADOBE-SYMBOL"
  950. :code-space [#x20 255]
  951. :map "symbol")
  952. (define-charset 'ibm850
  953. "DOS codepage 850 (Latin-1)"
  954. :short-name "IBM850"
  955. :ascii-compatible-p t
  956. :code-space [0 255]
  957. :map "IBM850")
  958. (define-charset-alias 'cp850 'ibm850)
  959. (define-charset 'mik
  960. "Bulgarian DOS codepage"
  961. :short-name "MIK"
  962. :ascii-compatible-p t
  963. :code-space [0 255]
  964. :map "MIK")
  965. (define-charset 'ptcp154
  966. "`Paratype' codepage (Asian Cyrillic)"
  967. :short-name "PT154"
  968. :ascii-compatible-p t
  969. :code-space [0 255]
  970. :mime-charset 'pt154
  971. :map "PTCP154")
  972. (define-charset-alias 'pt154 'ptcp154)
  973. (define-charset-alias 'cp154 'ptcp154)
  974. (define-charset 'gb18030-2-byte
  975. "GB18030 2-byte (0x814E..0xFEFE)"
  976. :code-space [#x40 #xFE #x81 #xFE]
  977. :supplementary-p t
  978. :map "GB180302")
  979. (define-charset 'gb18030-4-byte-bmp
  980. "GB18030 4-byte for BMP (0x81308130-0x8431A439)"
  981. :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84]
  982. :supplementary-p t
  983. :map "GB180304")
  984. (define-charset 'gb18030-4-byte-smp
  985. "GB18030 4-byte for SMP (0x90308130-0xE3329A35)"
  986. :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3]
  987. :min-code '(#x9030 . #x8130)
  988. :max-code '(#xE332 . #x9A35)
  989. :supplementary-p t
  990. :code-offset #x10000)
  991. (define-charset 'gb18030-4-byte-ext-1
  992. "GB18030 4-byte (0x8431A530-0x8F39FE39)"
  993. :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F]
  994. :min-code '(#x8431 . #xA530)
  995. :max-code '(#x8F39 . #xFE39)
  996. :supplementary-p t
  997. :code-offset #x200000 ; ... #x22484B
  998. )
  999. (define-charset 'gb18030-4-byte-ext-2
  1000. "GB18030 4-byte (0xE3329A36-0xFE39FE39)"
  1001. :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE]
  1002. :min-code '(#xE332 . #x9A36)
  1003. :max-code '(#xFE39 . #xFE39)
  1004. :supplementary-p t
  1005. :code-offset #x22484C ; ... #x279f93
  1006. )
  1007. (define-charset 'gb18030
  1008. "GB18030"
  1009. :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE]
  1010. :min-code 0
  1011. :max-code '(#xFE39 . #xFE39)
  1012. :superset '(ascii gb18030-2-byte
  1013. gb18030-4-byte-bmp gb18030-4-byte-smp
  1014. gb18030-4-byte-ext-1 gb18030-4-byte-ext-2))
  1015. (define-charset 'chinese-cns11643-15
  1016. "CNS11643 Plane 15 Chinese Traditional"
  1017. :short-name "CNS11643-15"
  1018. :long-name "CNS11643-15 (Chinese traditional)"
  1019. :code-space [33 126 33 126]
  1020. :code-offset #x27A000)
  1021. (unify-charset 'chinese-gb2312)
  1022. (unify-charset 'chinese-gbk)
  1023. (unify-charset 'chinese-cns11643-1)
  1024. (unify-charset 'chinese-cns11643-2)
  1025. (unify-charset 'chinese-cns11643-3)
  1026. (unify-charset 'chinese-cns11643-4)
  1027. (unify-charset 'chinese-cns11643-5)
  1028. (unify-charset 'chinese-cns11643-6)
  1029. (unify-charset 'chinese-cns11643-7)
  1030. (unify-charset 'big5)
  1031. (unify-charset 'chinese-big5-1)
  1032. (unify-charset 'chinese-big5-2)
  1033. (unify-charset 'big5-hkscs)
  1034. (unify-charset 'korean-ksc5601)
  1035. (unify-charset 'vietnamese-viscii-lower)
  1036. (unify-charset 'vietnamese-viscii-upper)
  1037. (unify-charset 'chinese-sisheng)
  1038. (unify-charset 'ipa)
  1039. (unify-charset 'tibetan)
  1040. (unify-charset 'ethiopic)
  1041. (unify-charset 'japanese-jisx0208-1978)
  1042. (unify-charset 'japanese-jisx0208)
  1043. (unify-charset 'japanese-jisx0212)
  1044. (unify-charset 'japanese-jisx0213-1)
  1045. (unify-charset 'japanese-jisx0213-2)
  1046. ;; These are tables for translating characters on decoding and
  1047. ;; encoding.
  1048. ;; Fixme: these aren't used now -- should they be?
  1049. (setq standard-translation-table-for-decode nil)
  1050. (setq standard-translation-table-for-encode nil)
  1051. ;;; Make fundamental coding systems.
  1052. ;; The coding system `no-conversion' and `undecided' are already
  1053. ;; defined in coding.c as below:
  1054. ;;
  1055. ;; (define-coding-system 'no-conversion
  1056. ;; "..."
  1057. ;; :coding-type 'raw-text
  1058. ;; ...)
  1059. ;; (define-coding-system 'undecided
  1060. ;; "..."
  1061. ;; :coding-type 'undecided
  1062. ;; ...)
  1063. (define-coding-system-alias 'binary 'no-conversion)
  1064. (define-coding-system-alias 'unix 'undecided-unix)
  1065. (define-coding-system-alias 'dos 'undecided-dos)
  1066. (define-coding-system-alias 'mac 'undecided-mac)
  1067. (define-coding-system 'raw-text
  1068. "Raw text, which means text contains random 8-bit codes.
  1069. Encoding text with this coding system produces the actual byte
  1070. sequence of the text in buffers and strings. An exception is made for
  1071. characters from the `eight-bit' character set. Each of them is encoded
  1072. into a single byte.
  1073. When you visit a file with this coding, the file is read into a
  1074. unibyte buffer as is (except for EOL format), thus each byte of a file
  1075. is treated as a character."
  1076. :coding-type 'raw-text
  1077. :for-unibyte t
  1078. :mnemonic ?t)
  1079. (define-coding-system 'no-conversion-multibyte
  1080. "Like `no-conversion' but don't read a file into a unibyte buffer."
  1081. :coding-type 'raw-text
  1082. :eol-type 'unix
  1083. :mnemonic ?=)
  1084. (define-coding-system 'iso-latin-1
  1085. "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)."
  1086. :coding-type 'charset
  1087. :mnemonic ?1
  1088. :charset-list '(iso-8859-1)
  1089. :mime-charset 'iso-8859-1)
  1090. (define-coding-system-alias 'iso-8859-1 'iso-latin-1)
  1091. (define-coding-system-alias 'latin-1 'iso-latin-1)
  1092. ;; Coding systems not specific to each language environment.
  1093. (define-coding-system 'emacs-mule
  1094. "Emacs 21 internal format used in buffer and string."
  1095. :coding-type 'emacs-mule
  1096. :charset-list 'emacs-mule
  1097. :mnemonic ?M)
  1098. (define-coding-system 'utf-8
  1099. "UTF-8 (no signature (BOM))"
  1100. :coding-type 'utf-8
  1101. :mnemonic ?U
  1102. :charset-list '(unicode)
  1103. :mime-charset 'utf-8)
  1104. (define-coding-system 'utf-8-with-signature
  1105. "UTF-8 (with signature (BOM))"
  1106. :coding-type 'utf-8
  1107. :mnemonic ?U
  1108. :charset-list '(unicode)
  1109. :bom t)
  1110. (define-coding-system 'utf-8-auto
  1111. "UTF-8 (auto-detect signature (BOM))"
  1112. :coding-type 'utf-8
  1113. :mnemonic ?U
  1114. :charset-list '(unicode)
  1115. :bom '(utf-8-with-signature . utf-8))
  1116. (define-coding-system-alias 'mule-utf-8 'utf-8)
  1117. (define-coding-system 'utf-8-emacs
  1118. "Support for all Emacs characters (including non-Unicode characters)."
  1119. :coding-type 'utf-8
  1120. :mnemonic ?U
  1121. :charset-list '(emacs))
  1122. ;; The encoding used internally. This encoding is meant to be able to save
  1123. ;; any multibyte buffer without losing information. It can change between
  1124. ;; Emacs releases, tho, so should only be used for internal files.
  1125. (define-coding-system-alias 'emacs-internal 'utf-8-emacs-unix)
  1126. (define-coding-system 'utf-16le
  1127. "UTF-16LE (little endian, no signature (BOM))."
  1128. :coding-type 'utf-16
  1129. :mnemonic ?U
  1130. :charset-list '(unicode)
  1131. :endian 'little
  1132. :mime-text-unsuitable t
  1133. :mime-charset 'utf-16le)
  1134. (define-coding-system 'utf-16be
  1135. "UTF-16BE (big endian, no signature (BOM))."
  1136. :coding-type 'utf-16
  1137. :mnemonic ?U
  1138. :charset-list '(unicode)
  1139. :endian 'big
  1140. :mime-text-unsuitable t
  1141. :mime-charset 'utf-16be)
  1142. (define-coding-system 'utf-16le-with-signature
  1143. "UTF-16 (little endian, with signature (BOM))."
  1144. :coding-type 'utf-16
  1145. :mnemonic ?U
  1146. :charset-list '(unicode)
  1147. :bom t
  1148. :endian 'little
  1149. :mime-text-unsuitable t
  1150. :mime-charset 'utf-16)
  1151. (define-coding-system 'utf-16be-with-signature
  1152. "UTF-16 (big endian, with signature (BOM))."
  1153. :coding-type 'utf-16
  1154. :mnemonic ?U
  1155. :charset-list '(unicode)
  1156. :bom t
  1157. :endian 'big
  1158. :mime-text-unsuitable t
  1159. :mime-charset 'utf-16)
  1160. (define-coding-system 'utf-16
  1161. "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)."
  1162. :coding-type 'utf-16
  1163. :mnemonic ?U
  1164. :charset-list '(unicode)
  1165. :bom '(utf-16le-with-signature . utf-16be-with-signature)
  1166. :endian 'big
  1167. :mime-text-unsuitable t
  1168. :mime-charset 'utf-16)
  1169. ;; Backwards compatibility (old names, also used by Mule-UCS). We
  1170. ;; prefer the MIME names.
  1171. (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature)
  1172. (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature)
  1173. (define-coding-system 'iso-2022-7bit
  1174. "ISO 2022 based 7-bit encoding using only G0."
  1175. :coding-type 'iso-2022
  1176. :mnemonic ?J
  1177. :charset-list 'iso-2022
  1178. :designation [(ascii t) nil nil nil]
  1179. :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition))
  1180. (define-coding-system 'iso-2022-7bit-ss2
  1181. "ISO 2022 based 7-bit encoding using SS2 for 96-charset."
  1182. :coding-type 'iso-2022
  1183. :mnemonic ?$
  1184. :charset-list 'iso-2022
  1185. :designation [(ascii 94) nil (nil 96) nil]
  1186. :flags '(short ascii-at-eol ascii-at-cntl 7-bit
  1187. designation single-shift composition))
  1188. (define-coding-system 'iso-2022-7bit-lock
  1189. "ISO-2022 coding system using Locking-Shift for 96-charset."
  1190. :coding-type 'iso-2022
  1191. :mnemonic ?&
  1192. :charset-list 'iso-2022
  1193. :designation [(ascii 94) (nil 96) nil nil]
  1194. :flags '(ascii-at-eol ascii-at-cntl 7-bit
  1195. designation locking-shift composition))
  1196. (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock)
  1197. (define-coding-system 'iso-2022-7bit-lock-ss2
  1198. "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN."
  1199. :coding-type 'iso-2022
  1200. :mnemonic ?i
  1201. :charset-list '(ascii
  1202. japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201
  1203. korean-ksc5601
  1204. chinese-gb2312
  1205. chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
  1206. chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
  1207. chinese-cns11643-7)
  1208. :designation [(ascii 94)
  1209. (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96)
  1210. (nil chinese-cns11643-2)
  1211. (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
  1212. chinese-cns11643-6 chinese-cns11643-7)]
  1213. :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift
  1214. single-shift init-bol))
  1215. (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2)
  1216. (define-coding-system 'iso-2022-8bit-ss2
  1217. "ISO 2022 based 8-bit encoding using SS2 for 96-charset."
  1218. :coding-type 'iso-2022
  1219. :mnemonic ?@
  1220. :charset-list 'iso-2022
  1221. :designation [(ascii 94) nil (nil 96) nil]
  1222. :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
  1223. (define-coding-system 'compound-text
  1224. "Compound text based generic encoding.
  1225. This coding system is an extension of X's \"Compound Text Encoding\".
  1226. It encodes many characters using the normal ISO-2022 designation sequences,
  1227. but it doesn't support extended segments of CTEXT."
  1228. :coding-type 'iso-2022
  1229. :mnemonic ?x
  1230. :charset-list 'iso-2022
  1231. :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
  1232. :flags '(ascii-at-eol ascii-at-cntl long-form
  1233. designation locking-shift single-shift composition)
  1234. ;; Fixme: this isn't a valid MIME charset and has to be
  1235. ;; special-cased elsewhere -- fx
  1236. :mime-charset 'x-ctext)
  1237. (define-coding-system-alias 'x-ctext 'compound-text)
  1238. (define-coding-system-alias 'ctext 'compound-text)
  1239. ;; Same as compound-text, but doesn't produce composition escape
  1240. ;; sequences. Used in post-read and pre-write conversions of
  1241. ;; compound-text-with-extensions, see mule.el. Note that this should
  1242. ;; not have a mime-charset property, to prevent it from showing up
  1243. ;; close to the beginning of coding systems ordered by priority.
  1244. (define-coding-system 'ctext-no-compositions
  1245. "Compound text based generic encoding.
  1246. Like `compound-text', but does not produce escape sequences for compositions."
  1247. :coding-type 'iso-2022
  1248. :mnemonic ?x
  1249. :charset-list 'iso-2022
  1250. :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
  1251. :flags '(ascii-at-eol ascii-at-cntl
  1252. designation locking-shift single-shift))
  1253. (define-coding-system 'compound-text-with-extensions
  1254. "Compound text encoding with ICCCM Extended Segment extensions.
  1255. See the variables `ctext-standard-encodings' and
  1256. `ctext-non-standard-encodings-alist' for the detail about how
  1257. extended segments are handled.
  1258. This coding system should be used only for X selections. It is inappropriate
  1259. for decoding and encoding files, process I/O, etc."
  1260. :coding-type 'iso-2022
  1261. :mnemonic ?x
  1262. :charset-list 'iso-2022
  1263. :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil]
  1264. :flags '(ascii-at-eol ascii-at-cntl long-form
  1265. designation locking-shift single-shift)
  1266. :post-read-conversion 'ctext-post-read-conversion
  1267. :pre-write-conversion 'ctext-pre-write-conversion)
  1268. (define-coding-system-alias
  1269. 'x-ctext-with-extensions 'compound-text-with-extensions)
  1270. (define-coding-system-alias
  1271. 'ctext-with-extensions 'compound-text-with-extensions)
  1272. (define-coding-system 'us-ascii
  1273. "Encode ASCII as-is and encode non-ASCII characters to `?'."
  1274. :coding-type 'charset
  1275. :mnemonic ?-
  1276. :charset-list '(ascii)
  1277. :default-char ??
  1278. :mime-charset 'us-ascii)
  1279. (define-coding-system-alias 'iso-safe 'us-ascii)
  1280. (define-coding-system 'utf-7
  1281. "UTF-7 encoding of Unicode (RFC 2152)."
  1282. :coding-type 'utf-8
  1283. :mnemonic ?U
  1284. :mime-charset 'utf-7
  1285. :charset-list '(unicode)
  1286. :pre-write-conversion 'utf-7-pre-write-conversion
  1287. :post-read-conversion 'utf-7-post-read-conversion)
  1288. (define-coding-system 'utf-7-imap
  1289. "UTF-7 encoding of Unicode, IMAP version (RFC 2060)"
  1290. :coding-type 'utf-8
  1291. :mnemonic ?u
  1292. :charset-list '(unicode)
  1293. :pre-write-conversion 'utf-7-imap-pre-write-conversion
  1294. :post-read-conversion 'utf-7-imap-post-read-conversion)
  1295. ;; Use us-ascii for terminal output if some other coding system is not
  1296. ;; specified explicitly.
  1297. (set-safe-terminal-coding-system-internal 'us-ascii)
  1298. ;; The other coding-systems are defined in each language specific
  1299. ;; files under lisp/language.
  1300. ;; Normally, set coding system to `undecided' before reading a file.
  1301. ;; Compiled Emacs Lisp files (*.elc) are not decoded at all,
  1302. ;; but we regard them as containing multibyte characters.
  1303. ;; Tar files are not decoded at all, but we treat them as raw bytes.
  1304. (setq file-coding-system-alist
  1305. (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
  1306. '(("\\.elc\\'" . utf-8-emacs)
  1307. ("\\.utf\\(-8\\)?\\'" . utf-8)
  1308. ("\\.xml\\'" . xml-find-file-coding-system)
  1309. ;; We use raw-text for reading loaddefs.el so that if it
  1310. ;; happens to have DOS or Mac EOLs, they are converted to
  1311. ;; newlines. This is required to make the special treatment
  1312. ;; of the "\ newline" combination in loaddefs.el, which marks
  1313. ;; the beginning of a doc string, work.
  1314. ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix))
  1315. ("\\.tar\\'" . (no-conversion . no-conversion))
  1316. ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system)
  1317. ("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system)
  1318. ("" . (undecided . nil)))))
  1319. ;;; Setting coding categories and their priorities.
  1320. ;; This setting is just to read an Emacs Lisp source files which
  1321. ;; contain multilingual text while dumping Emacs. More appropriate
  1322. ;; values are set by the command `set-language-environment' for each
  1323. ;; language environment.
  1324. (set-coding-system-priority
  1325. 'iso-latin-1
  1326. 'utf-8
  1327. 'iso-2022-7bit
  1328. )
  1329. ;;; Miscellaneous settings.
  1330. ;; Make all multibyte characters self-insert.
  1331. (set-char-table-range (nth 1 global-map)
  1332. (cons 128 (max-char))
  1333. 'self-insert-command)
  1334. (aset latin-extra-code-table ?\221 t)
  1335. (aset latin-extra-code-table ?\222 t)
  1336. (aset latin-extra-code-table ?\223 t)
  1337. (aset latin-extra-code-table ?\224 t)
  1338. (aset latin-extra-code-table ?\225 t)
  1339. (aset latin-extra-code-table ?\226 t)
  1340. ;; The old code-pages library is obsoleted by coding systems based on
  1341. ;; the charsets defined in this file but might be required by user
  1342. ;; code.
  1343. (provide 'code-pages)
  1344. ;;; mule-conf.el ends here