ArabicShaping.txt 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. # ArabicShaping-6.0.0.txt
  2. # Date: 2010-04-30, 13:47:00 PDT [KW]
  3. #
  4. # This file is a normative contributory data file in the
  5. # Unicode Character Database.
  6. #
  7. # Copyright (c) 1991-2010 Unicode, Inc.
  8. # For terms of use, see http://www.unicode.org/terms_of_use.html
  9. #
  10. # This file defines the shaping classes for Arabic, Syriac, and N'Ko
  11. # positional shaping, repeating in machine readable form the
  12. # information exemplified in Tables 8-3, 8-7, 8-8, 8-11, 8-12,
  13. # 8-13, and 13-5 of The Unicode Standard, Version 6.0.
  14. #
  15. # See sections 8.2, 8.3, and 13.5 of The Unicode Standard, Version 6.0
  16. # for more information.
  17. #
  18. # Each line contains four fields, separated by a semicolon.
  19. #
  20. # Field 0: the code point, in 4-digit hexadecimal
  21. # form, of an Arabic, Syriac, or N'Ko character.
  22. #
  23. # Field 1: gives a short schematic name for that character,
  24. # abbreviated from the normative Unicode character name.
  25. # Note that this schematic name is considered a comment,
  26. # and does not constitute a formal property value.
  27. #
  28. # Field 2: defines the joining type (property name: Joining_Type)
  29. # R Right_Joining
  30. # L Left_Joining
  31. # D Dual_Joining
  32. # C Join_Causing
  33. # U Non_Joining
  34. # T Transparent
  35. # See Section 8.2, Arabic for more information on these types.
  36. #
  37. # Field 3: defines the joining group (property name: Joining_Group)
  38. #
  39. # The values of the joining group are based schematically on character
  40. # names. Where a schematic character name consists of two or more parts separated
  41. # by spaces, the formal Joining_Group property value, as specified in
  42. # PropertyValueAliases.txt, consists of the same name parts joined by
  43. # underscores. Hence, the entry:
  44. #
  45. # 0629; TEH MARBUTA; R; TEH MARBUTA
  46. #
  47. # corresponds to [Joining_Group = Teh_Marbuta].
  48. #
  49. # Note: The property value now designated [Joining_Group = Teh_Marbuta_Goal]
  50. # used to apply to both of the following characters
  51. # in earlier versions of the standard:
  52. #
  53. # U+06C2 ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
  54. # U+06C3 ARABIC LETTER TEH MARBUTA GOAL
  55. #
  56. # However, it currently applies only to U+06C3, and *not* to U+06C2.
  57. # To avoid destabilizing existing Joining_Group property aliases, the
  58. # prior Joining_Group value for U+06C3 (Hamza_On_Heh_Goal) has been
  59. # retained as a property value alias, despite the fact that it
  60. # no longer applies to its namesake character, U+06C2.
  61. # See PropertyValueAliases.txt.
  62. #
  63. # When other cursive scripts are added to the Unicode Standard in
  64. # the future, the joining group value of all its letters will default
  65. # to jg=No_Joining_Group in this data file. Other, more specific
  66. # joining group values will be defined only if an explicit proposal
  67. # to define those values exactly has been approved by the UTC. This
  68. # is the convention exemplified by the N'Ko script. Only the Arabic
  69. # and Syriac scripts currently have explicit joining group values defined.
  70. #
  71. # Note: Code points that are not explicitly listed in this file are
  72. # either of joining type T or U:
  73. #
  74. # - Those that not explicitly listed that are of General Category Mn, Me, or Cf
  75. # have joining type T.
  76. # - All others not explicitly listed have joining type U.
  77. #
  78. # For an explicit listing of characters of joining type T, see
  79. # the derived property file DerivedJoiningType.txt.
  80. #
  81. # There are currently no characters of joining type L defined in Unicode.
  82. #
  83. # #############################################################
  84. # Unicode; Schematic Name; Joining Type; Joining Group
  85. # Arabic characters
  86. 0600; ARABIC NUMBER SIGN; U; No_Joining_Group
  87. 0601; ARABIC SIGN SANAH; U; No_Joining_Group
  88. 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group
  89. 0603; ARABIC SIGN SAFHA; U; No_Joining_Group
  90. 0608; ARABIC RAY; U; No_Joining_Group
  91. 060B; AFGHANI SIGN; U; No_Joining_Group
  92. 0620; YEH WITH RING; D; YEH
  93. 0621; HAMZA; U; No_Joining_Group
  94. 0622; MADDA ON ALEF; R; ALEF
  95. 0623; HAMZA ON ALEF; R; ALEF
  96. 0624; HAMZA ON WAW; R; WAW
  97. 0625; HAMZA UNDER ALEF; R; ALEF
  98. 0626; HAMZA ON YEH; D; YEH
  99. 0627; ALEF; R; ALEF
  100. 0628; BEH; D; BEH
  101. 0629; TEH MARBUTA; R; TEH MARBUTA
  102. 062A; TEH; D; BEH
  103. 062B; THEH; D; BEH
  104. 062C; JEEM; D; HAH
  105. 062D; HAH; D; HAH
  106. 062E; KHAH; D; HAH
  107. 062F; DAL; R; DAL
  108. 0630; THAL; R; DAL
  109. 0631; REH; R; REH
  110. 0632; ZAIN; R; REH
  111. 0633; SEEN; D; SEEN
  112. 0634; SHEEN; D; SEEN
  113. 0635; SAD; D; SAD
  114. 0636; DAD; D; SAD
  115. 0637; TAH; D; TAH
  116. 0638; ZAH; D; TAH
  117. 0639; AIN; D; AIN
  118. 063A; GHAIN; D; AIN
  119. 063B; KEHEH WITH 2 DOTS ABOVE; D; GAF
  120. 063C; KEHEH WITH 3 DOTS BELOW; D; GAF
  121. 063D; FARSI YEH WITH INVERTED V; D; FARSI YEH
  122. 063E; FARSI YEH WITH 2 DOTS ABOVE; D; FARSI YEH
  123. 063F; FARSI YEH WITH 3 DOTS ABOVE; D; FARSI YEH
  124. 0640; TATWEEL; C; No_Joining_Group
  125. 0641; FEH; D; FEH
  126. 0642; QAF; D; QAF
  127. 0643; KAF; D; KAF
  128. 0644; LAM; D; LAM
  129. 0645; MEEM; D; MEEM
  130. 0646; NOON; D; NOON
  131. 0647; HEH; D; HEH
  132. 0648; WAW; R; WAW
  133. 0649; ALEF MAKSURA; D; YEH
  134. 064A; YEH; D; YEH
  135. 066E; DOTLESS BEH; D; BEH
  136. 066F; DOTLESS QAF; D; QAF
  137. 0671; HAMZAT WASL ON ALEF; R; ALEF
  138. 0672; WAVY HAMZA ON ALEF; R; ALEF
  139. 0673; WAVY HAMZA UNDER ALEF; R; ALEF
  140. 0674; HIGH HAMZA; U; No_Joining_Group
  141. 0675; HIGH HAMZA ALEF; R; ALEF
  142. 0676; HIGH HAMZA WAW; R; WAW
  143. 0677; HIGH HAMZA WAW WITH DAMMA; R; WAW
  144. 0678; HIGH HAMZA YEH; D; YEH
  145. 0679; TEH WITH SMALL TAH; D; BEH
  146. 067A; TEH WITH 2 DOTS VERTICAL ABOVE; D; BEH
  147. 067B; BEH WITH 2 DOTS VERTICAL BELOW; D; BEH
  148. 067C; TEH WITH RING; D; BEH
  149. 067D; TEH WITH 3 DOTS ABOVE DOWNWARD; D; BEH
  150. 067E; TEH WITH 3 DOTS BELOW; D; BEH
  151. 067F; TEH WITH 4 DOTS ABOVE; D; BEH
  152. 0680; BEH WITH 4 DOTS BELOW; D; BEH
  153. 0681; HAMZA ON HAH; D; HAH
  154. 0682; HAH WITH 2 DOTS VERTICAL ABOVE; D; HAH
  155. 0683; HAH WITH MIDDLE 2 DOTS; D; HAH
  156. 0684; HAH WITH MIDDLE 2 DOTS VERTICAL; D; HAH
  157. 0685; HAH WITH 3 DOTS ABOVE; D; HAH
  158. 0686; HAH WITH MIDDLE 3 DOTS DOWNWARD; D; HAH
  159. 0687; HAH WITH MIDDLE 4 DOTS; D; HAH
  160. 0688; DAL WITH SMALL TAH; R; DAL
  161. 0689; DAL WITH RING; R; DAL
  162. 068A; DAL WITH DOT BELOW; R; DAL
  163. 068B; DAL WITH DOT BELOW AND SMALL TAH; R; DAL
  164. 068C; DAL WITH 2 DOTS ABOVE; R; DAL
  165. 068D; DAL WITH 2 DOTS BELOW; R; DAL
  166. 068E; DAL WITH 3 DOTS ABOVE; R; DAL
  167. 068F; DAL WITH 3 DOTS ABOVE DOWNWARD; R; DAL
  168. 0690; DAL WITH 4 DOTS ABOVE; R; DAL
  169. 0691; REH WITH SMALL TAH; R; REH
  170. 0692; REH WITH SMALL V; R; REH
  171. 0693; REH WITH RING; R; REH
  172. 0694; REH WITH DOT BELOW; R; REH
  173. 0695; REH WITH SMALL V BELOW; R; REH
  174. 0696; REH WITH DOT BELOW AND DOT ABOVE; R; REH
  175. 0697; REH WITH 2 DOTS ABOVE; R; REH
  176. 0698; REH WITH 3 DOTS ABOVE; R; REH
  177. 0699; REH WITH 4 DOTS ABOVE; R; REH
  178. 069A; SEEN WITH DOT BELOW AND DOT ABOVE; D; SEEN
  179. 069B; SEEN WITH 3 DOTS BELOW; D; SEEN
  180. 069C; SEEN WITH 3 DOTS BELOW AND 3 DOTS ABOVE; D; SEEN
  181. 069D; SAD WITH 2 DOTS BELOW; D; SAD
  182. 069E; SAD WITH 3 DOTS ABOVE; D; SAD
  183. 069F; TAH WITH 3 DOTS ABOVE; D; TAH
  184. 06A0; AIN WITH 3 DOTS ABOVE; D; AIN
  185. 06A1; DOTLESS FEH; D; FEH
  186. 06A2; FEH WITH DOT MOVED BELOW; D; FEH
  187. 06A3; FEH WITH DOT BELOW; D; FEH
  188. 06A4; FEH WITH 3 DOTS ABOVE; D; FEH
  189. 06A5; FEH WITH 3 DOTS BELOW; D; FEH
  190. 06A6; FEH WITH 4 DOTS ABOVE; D; FEH
  191. 06A7; QAF WITH DOT ABOVE; D; QAF
  192. 06A8; QAF WITH 3 DOTS ABOVE; D; QAF
  193. 06A9; KEHEH; D; GAF
  194. 06AA; SWASH KAF; D; SWASH KAF
  195. 06AB; KAF WITH RING; D; GAF
  196. 06AC; KAF WITH DOT ABOVE; D; KAF
  197. 06AD; KAF WITH 3 DOTS ABOVE; D; KAF
  198. 06AE; KAF WITH 3 DOTS BELOW; D; KAF
  199. 06AF; GAF; D; GAF
  200. 06B0; GAF WITH RING; D; GAF
  201. 06B1; GAF WITH 2 DOTS ABOVE; D; GAF
  202. 06B2; GAF WITH 2 DOTS BELOW; D; GAF
  203. 06B3; GAF WITH 2 DOTS VERTICAL BELOW; D; GAF
  204. 06B4; GAF WITH 3 DOTS ABOVE; D; GAF
  205. 06B5; LAM WITH SMALL V; D; LAM
  206. 06B6; LAM WITH DOT ABOVE; D; LAM
  207. 06B7; LAM WITH 3 DOTS ABOVE; D; LAM
  208. 06B8; LAM WITH 3 DOTS BELOW; D; LAM
  209. 06B9; NOON WITH DOT BELOW; D; NOON
  210. 06BA; DOTLESS NOON; D; NOON
  211. 06BB; DOTLESS NOON WITH SMALL TAH; D; NOON
  212. 06BC; NOON WITH RING; D; NOON
  213. 06BD; NYA; D; NYA
  214. 06BE; KNOTTED HEH; D; KNOTTED HEH
  215. 06BF; HAH WITH MIDDLE 3 DOTS DOWNWARD AND DOT ABOVE; D; HAH
  216. 06C0; HAMZA ON HEH; R; TEH MARBUTA
  217. 06C1; HEH GOAL; D; HEH GOAL
  218. 06C2; HAMZA ON HEH GOAL; D; HEH GOAL
  219. 06C3; TEH MARBUTA GOAL; R; TEH MARBUTA GOAL
  220. 06C4; WAW WITH RING; R; WAW
  221. 06C5; WAW WITH BAR; R; WAW
  222. 06C6; WAW WITH SMALL V; R; WAW
  223. 06C7; WAW WITH DAMMA; R; WAW
  224. 06C8; WAW WITH ALEF ABOVE; R; WAW
  225. 06C9; WAW WITH INVERTED SMALL V; R; WAW
  226. 06CA; WAW WITH 2 DOTS ABOVE; R; WAW
  227. 06CB; WAW WITH 3 DOTS ABOVE; R; WAW
  228. 06CC; FARSI YEH; D; FARSI YEH
  229. 06CD; YEH WITH TAIL; R; YEH WITH TAIL
  230. 06CE; FARSI YEH WITH SMALL V; D; FARSI YEH
  231. 06CF; WAW WITH DOT ABOVE; R; WAW
  232. 06D0; YEH WITH 2 DOTS VERTICAL BELOW; D; YEH
  233. 06D1; YEH WITH 3 DOTS BELOW; D; YEH
  234. 06D2; YEH BARREE; R; YEH BARREE
  235. 06D3; HAMZA ON YEH BARREE; R; YEH BARREE
  236. 06D5; AE; R; TEH MARBUTA
  237. 06DD; ARABIC END OF AYAH; U; No_Joining_Group
  238. 06EE; DAL WITH INVERTED V; R; DAL
  239. 06EF; REH WITH INVERTED V; R; REH
  240. 06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
  241. 06FB; DAD WITH DOT BELOW; D; SAD
  242. 06FC; GHAIN WITH DOT BELOW; D; AIN
  243. 06FF; HEH WITH INVERTED V; D; KNOTTED HEH
  244. # Syriac characters
  245. 0710; ALAPH; R; ALAPH
  246. 0712; BETH; D; BETH
  247. 0713; GAMAL; D; GAMAL
  248. 0714; GAMAL GARSHUNI; D; GAMAL
  249. 0715; DALATH; R; DALATH RISH
  250. 0716; DOTLESS DALATH RISH; R; DALATH RISH
  251. 0717; HE; R; HE
  252. 0718; WAW; R; SYRIAC WAW
  253. 0719; ZAIN; R; ZAIN
  254. 071A; HETH; D; HETH
  255. 071B; TETH; D; TETH
  256. 071C; TETH GARSHUNI; D; TETH
  257. 071D; YUDH; D; YUDH
  258. 071E; YUDH HE; R; YUDH HE
  259. 071F; KAPH; D; KAPH
  260. 0720; LAMADH; D; LAMADH
  261. 0721; MIM; D; MIM
  262. 0722; NUN; D; NUN
  263. 0723; SEMKATH; D; SEMKATH
  264. 0724; FINAL SEMKATH; D; FINAL SEMKATH
  265. 0725; E; D; E
  266. 0726; PE; D; PE
  267. 0727; REVERSED PE; D; REVERSED PE
  268. 0728; SADHE; R; SADHE
  269. 0729; QAPH; D; QAPH
  270. 072A; RISH; R; DALATH RISH
  271. 072B; SHIN; D; SHIN
  272. 072C; TAW; R; TAW
  273. 072D; PERSIAN BHETH; D; BETH
  274. 072E; PERSIAN GHAMAL; D; GAMAL
  275. 072F; PERSIAN DHALATH; R; DALATH RISH
  276. 074D; SOGDIAN ZHAIN; R; ZHAIN
  277. 074E; SOGDIAN KHAPH; D; KHAPH
  278. 074F; SOGDIAN FE; D; FE
  279. # Arabic supplement characters
  280. 0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH
  281. 0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH
  282. 0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH
  283. 0753; BEH WITH 3 DOTS POINTING UPWARDS BELOW AND 2 DOTS ABOVE; D; BEH
  284. 0754; BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH
  285. 0755; BEH WITH INVERTED SMALL V BELOW; D; BEH
  286. 0756; BEH WITH SMALL V; D; BEH
  287. 0757; HAH WITH 2 DOTS ABOVE; D; HAH
  288. 0758; HAH WITH 3 DOTS POINTING UPWARDS BELOW; D; HAH
  289. 0759; DAL WITH 2 DOTS VERTICALLY BELOW AND SMALL TAH; R; DAL
  290. 075A; DAL WITH INVERTED SMALL V BELOW; R; DAL
  291. 075B; REH WITH STROKE; R; REH
  292. 075C; SEEN WITH 4 DOTS ABOVE; D; SEEN
  293. 075D; AIN WITH 2 DOTS ABOVE; D; AIN
  294. 075E; AIN WITH 3 DOTS POINTING DOWNWARDS ABOVE; D; AIN
  295. 075F; AIN WITH 2 DOTS VERTICALLY ABOVE; D; AIN
  296. 0760; FEH WITH 2 DOTS BELOW; D; FEH
  297. 0761; FEH WITH 3 DOTS POINTING UPWARDS BELOW; D; FEH
  298. 0762; KEHEH WITH DOT ABOVE; D; GAF
  299. 0763; KEHEH WITH 3 DOTS ABOVE; D; GAF
  300. 0764; KEHEH WITH 3 DOTS POINTING UPWARDS BELOW; D; GAF
  301. 0765; MEEM WITH DOT ABOVE; D; MEEM
  302. 0766; MEEM WITH DOT BELOW; D; MEEM
  303. 0767; NOON WITH 2 DOTS BELOW; D; NOON
  304. 0768; NOON WITH SMALL TAH; D; NOON
  305. 0769; NOON WITH SMALL V; D; NOON
  306. 076A; LAM WITH BAR; D; LAM
  307. 076B; REH WITH 2 DOTS VERTICALLY ABOVE; R; REH
  308. 076C; REH WITH HAMZA ABOVE; R; REH
  309. 076D; SEEN WITH 2 DOTS VERTICALLY ABOVE; D; SEEN
  310. 076E; HAH WITH SMALL TAH BELOW; D; HAH
  311. 076F; HAH WITH SMALL TAH AND 2 DOTS; D; HAH
  312. 0770; SEEN WITH SMALL TAH AND 2 DOTS; D; SEEN
  313. 0771; REH WITH SMALL TAH AND 2 DOTS; R; REH
  314. 0772; HAH WITH SMALL TAH ABOVE; D; HAH
  315. 0773; ALEF WITH DIGIT TWO ABOVE; R; ALEF
  316. 0774; ALEF WITH DIGIT THREE ABOVE; R; ALEF
  317. 0775; FARSI YEH WITH DIGIT TWO ABOVE; D; FARSI YEH
  318. 0776; FARSI YEH WITH DIGIT THREE ABOVE; D; FARSI YEH
  319. 0777; YEH WITH DIGIT FOUR BELOW; D; YEH
  320. 0778; WAW WITH DIGIT TWO ABOVE; R; WAW
  321. 0779; WAW WITH DIGIT THREE ABOVE; R; WAW
  322. 077A; YEH BARREE WITH DIGIT TWO ABOVE; D; BURUSHASKI YEH BARREE
  323. 077B; YEH BARREE WITH DIGIT THREE ABOVE; D; BURUSHASKI YEH BARREE
  324. 077C; HAH WITH DIGIT FOUR BELOW; D; HAH
  325. 077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN
  326. 077E; SEEN WITH INVERTED V; D; SEEN
  327. 077F; KAF WITH 2 DOTS ABOVE; D; KAF
  328. # N'Ko Characters
  329. 07CA; NKO A; D; No_Joining_Group
  330. 07CB; NKO EE; D; No_Joining_Group
  331. 07CC; NKO I; D; No_Joining_Group
  332. 07CD; NKO E; D; No_Joining_Group
  333. 07CE; NKO U; D; No_Joining_Group
  334. 07CF; NKO OO; D; No_Joining_Group
  335. 07D0; NKO O; D; No_Joining_Group
  336. 07D1; NKO DAGBASINNA; D; No_Joining_Group
  337. 07D2; NKO N; D; No_Joining_Group
  338. 07D3; NKO BA; D; No_Joining_Group
  339. 07D4; NKO PA; D; No_Joining_Group
  340. 07D5; NKO TA; D; No_Joining_Group
  341. 07D6; NKO JA; D; No_Joining_Group
  342. 07D7; NKO CHA; D; No_Joining_Group
  343. 07D8; NKO DA; D; No_Joining_Group
  344. 07D9; NKO RA; D; No_Joining_Group
  345. 07DA; NKO RRA; D; No_Joining_Group
  346. 07DB; NKO SA; D; No_Joining_Group
  347. 07DC; NKO GBA; D; No_Joining_Group
  348. 07DD; NKO FA; D; No_Joining_Group
  349. 07DE; NKO KA; D; No_Joining_Group
  350. 07DF; NKO LA; D; No_Joining_Group
  351. 07E0; NKO NA WOLOSO; D; No_Joining_Group
  352. 07E1; NKO MA; D; No_Joining_Group
  353. 07E2; NKO NYA; D; No_Joining_Group
  354. 07E3; NKO NA; D; No_Joining_Group
  355. 07E4; NKO HA; D; No_Joining_Group
  356. 07E5; NKO WA; D; No_Joining_Group
  357. 07E6; NKO YA; D; No_Joining_Group
  358. 07E7; NKO NYA WOLOSO; D; No_Joining_Group
  359. 07E8; NKO JONA JA; D; No_Joining_Group
  360. 07E9; NKO JONA CHA; D; No_Joining_Group
  361. 07EA; NKO JONA RA; D; No_Joining_Group
  362. 07FA; NKO LAJANYALAN; C; No_Joining_Group
  363. # Other
  364. 200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group
  365. 200D; ZERO WIDTH JOINER; C; No_Joining_Group
  366. # EOF