CompositionExclusions.txt 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. # CompositionExclusions-5.0.0.txt
  2. # Date: 2006-05-23, 12:42:00 PST [KW]
  3. #
  4. # This file lists the characters for the Composition Exclusion Table
  5. # defined in UAX #15, Unicode Normalization Forms.
  6. #
  7. # This file is a normative contributory data file in the
  8. # Unicode Character Database.
  9. #
  10. # Copyright (c) 1991-2006 Unicode, Inc.
  11. # For terms of use, see http://www.unicode.org/terms_of_use.html
  12. #
  13. # For more information, see
  14. # http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
  15. #
  16. # For a full derivation of composition exclusions, see the derived property
  17. # Full_Composition_Exclusion in DerivedNormalizationProps.txt
  18. #
  19. # ================================================
  20. # (1) Script Specifics
  21. #
  22. # This list of characters cannot be derived from the UnicodeData.txt file.
  23. # ================================================
  24. 0958 # DEVANAGARI LETTER QA
  25. 0959 # DEVANAGARI LETTER KHHA
  26. 095A # DEVANAGARI LETTER GHHA
  27. 095B # DEVANAGARI LETTER ZA
  28. 095C # DEVANAGARI LETTER DDDHA
  29. 095D # DEVANAGARI LETTER RHA
  30. 095E # DEVANAGARI LETTER FA
  31. 095F # DEVANAGARI LETTER YYA
  32. 09DC # BENGALI LETTER RRA
  33. 09DD # BENGALI LETTER RHA
  34. 09DF # BENGALI LETTER YYA
  35. 0A33 # GURMUKHI LETTER LLA
  36. 0A36 # GURMUKHI LETTER SHA
  37. 0A59 # GURMUKHI LETTER KHHA
  38. 0A5A # GURMUKHI LETTER GHHA
  39. 0A5B # GURMUKHI LETTER ZA
  40. 0A5E # GURMUKHI LETTER FA
  41. 0B5C # ORIYA LETTER RRA
  42. 0B5D # ORIYA LETTER RHA
  43. 0F43 # TIBETAN LETTER GHA
  44. 0F4D # TIBETAN LETTER DDHA
  45. 0F52 # TIBETAN LETTER DHA
  46. 0F57 # TIBETAN LETTER BHA
  47. 0F5C # TIBETAN LETTER DZHA
  48. 0F69 # TIBETAN LETTER KSSA
  49. 0F76 # TIBETAN VOWEL SIGN VOCALIC R
  50. 0F78 # TIBETAN VOWEL SIGN VOCALIC L
  51. 0F93 # TIBETAN SUBJOINED LETTER GHA
  52. 0F9D # TIBETAN SUBJOINED LETTER DDHA
  53. 0FA2 # TIBETAN SUBJOINED LETTER DHA
  54. 0FA7 # TIBETAN SUBJOINED LETTER BHA
  55. 0FAC # TIBETAN SUBJOINED LETTER DZHA
  56. 0FB9 # TIBETAN SUBJOINED LETTER KSSA
  57. FB1D # HEBREW LETTER YOD WITH HIRIQ
  58. FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
  59. FB2A # HEBREW LETTER SHIN WITH SHIN DOT
  60. FB2B # HEBREW LETTER SHIN WITH SIN DOT
  61. FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
  62. FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
  63. FB2E # HEBREW LETTER ALEF WITH PATAH
  64. FB2F # HEBREW LETTER ALEF WITH QAMATS
  65. FB30 # HEBREW LETTER ALEF WITH MAPIQ
  66. FB31 # HEBREW LETTER BET WITH DAGESH
  67. FB32 # HEBREW LETTER GIMEL WITH DAGESH
  68. FB33 # HEBREW LETTER DALET WITH DAGESH
  69. FB34 # HEBREW LETTER HE WITH MAPIQ
  70. FB35 # HEBREW LETTER VAV WITH DAGESH
  71. FB36 # HEBREW LETTER ZAYIN WITH DAGESH
  72. FB38 # HEBREW LETTER TET WITH DAGESH
  73. FB39 # HEBREW LETTER YOD WITH DAGESH
  74. FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
  75. FB3B # HEBREW LETTER KAF WITH DAGESH
  76. FB3C # HEBREW LETTER LAMED WITH DAGESH
  77. FB3E # HEBREW LETTER MEM WITH DAGESH
  78. FB40 # HEBREW LETTER NUN WITH DAGESH
  79. FB41 # HEBREW LETTER SAMEKH WITH DAGESH
  80. FB43 # HEBREW LETTER FINAL PE WITH DAGESH
  81. FB44 # HEBREW LETTER PE WITH DAGESH
  82. FB46 # HEBREW LETTER TSADI WITH DAGESH
  83. FB47 # HEBREW LETTER QOF WITH DAGESH
  84. FB48 # HEBREW LETTER RESH WITH DAGESH
  85. FB49 # HEBREW LETTER SHIN WITH DAGESH
  86. FB4A # HEBREW LETTER TAV WITH DAGESH
  87. FB4B # HEBREW LETTER VAV WITH HOLAM
  88. FB4C # HEBREW LETTER BET WITH RAFE
  89. FB4D # HEBREW LETTER KAF WITH RAFE
  90. FB4E # HEBREW LETTER PE WITH RAFE
  91. # Total code points: 67
  92. # ================================================
  93. # (2) Post Composition Version precomposed characters
  94. #
  95. # These characters cannot be derived solely from the UnicodeData.txt file
  96. # in this version of Unicode.
  97. #
  98. # Note that characters added to the standard after the
  99. # Composition Version and which have canonical decomposition mappings
  100. # are not automatically added to this list of Post Composition
  101. # Version precomposed characters.
  102. # ================================================
  103. 2ADC # FORKING
  104. 1D15E # MUSICAL SYMBOL HALF NOTE
  105. 1D15F # MUSICAL SYMBOL QUARTER NOTE
  106. 1D160 # MUSICAL SYMBOL EIGHTH NOTE
  107. 1D161 # MUSICAL SYMBOL SIXTEENTH NOTE
  108. 1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
  109. 1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
  110. 1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
  111. 1D1BB # MUSICAL SYMBOL MINIMA
  112. 1D1BC # MUSICAL SYMBOL MINIMA BLACK
  113. 1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
  114. 1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
  115. 1D1BF # MUSICAL SYMBOL FUSA WHITE
  116. 1D1C0 # MUSICAL SYMBOL FUSA BLACK
  117. # Total code points: 14
  118. # ================================================
  119. # (3) Singleton Decompositions
  120. #
  121. # These characters can be derived from the UnicodeData.txt file
  122. # by including all characters whose canonical decomposition
  123. # consists of a single character.
  124. #
  125. # These characters are simply quoted here for reference.
  126. # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
  127. # ================================================
  128. # 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
  129. # 0343 COMBINING GREEK KORONIS
  130. # 0374 GREEK NUMERAL SIGN
  131. # 037E GREEK QUESTION MARK
  132. # 0387 GREEK ANO TELEIA
  133. # 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
  134. # 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
  135. # 1F75 GREEK SMALL LETTER ETA WITH OXIA
  136. # 1F77 GREEK SMALL LETTER IOTA WITH OXIA
  137. # 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
  138. # 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
  139. # 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
  140. # 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
  141. # 1FBE GREEK PROSGEGRAMMENI
  142. # 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
  143. # 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
  144. # 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
  145. # 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
  146. # 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
  147. # 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
  148. # 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
  149. # 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
  150. # 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
  151. # 1FFD GREEK OXIA
  152. # 2000..2001 [2] EN QUAD..EM QUAD
  153. # 2126 OHM SIGN
  154. # 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
  155. # 2329 LEFT-POINTING ANGLE BRACKET
  156. # 232A RIGHT-POINTING ANGLE BRACKET
  157. # F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
  158. # FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
  159. # FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
  160. # FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
  161. # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
  162. # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
  163. # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
  164. # FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
  165. # FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
  166. # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
  167. # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
  168. # Total code points: 924
  169. # ================================================
  170. # (4) Non-Starter Decompositions
  171. #
  172. # These characters can be derived from the UnicodeData file
  173. # by including all characters whose canonical decomposition consists
  174. # of a sequence of characters, the first of which has a non-zero
  175. # combining class.
  176. #
  177. # These characters are simply quoted here for reference.
  178. # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
  179. # ================================================
  180. # 0344 COMBINING GREEK DIALYTIKA TONOS
  181. # 0F73 TIBETAN VOWEL SIGN II
  182. # 0F75 TIBETAN VOWEL SIGN UU
  183. # 0F81 TIBETAN VOWEL SIGN REVERSED II
  184. # Total code points: 4