const.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. UNICODE_RANGES = [
  2. [
  3. "0020-007F",
  4. "Basic Latin"
  5. ],
  6. [
  7. "2580-259F",
  8. "Block Elements"
  9. ],
  10. [
  11. "00A0-00FF",
  12. "Latin-1 Supplement"
  13. ],
  14. [
  15. "25A0-25FF",
  16. "Geometric Shapes"
  17. ],
  18. [
  19. "0100-017F",
  20. "Latin Extended-A"
  21. ],
  22. [
  23. "2600-26FF",
  24. "Miscellaneous Symbols"
  25. ],
  26. [
  27. "0180-024F",
  28. "Latin Extended-B"
  29. ],
  30. [
  31. "2700-27BF",
  32. "Dingbats"
  33. ],
  34. [
  35. "0250-02AF",
  36. "IPA Extensions"
  37. ],
  38. [
  39. "27C0-27EF",
  40. "Miscellaneous Mathematical Symbols-A"
  41. ],
  42. [
  43. "02B0-02FF",
  44. "Spacing Modifier Letters"
  45. ],
  46. [
  47. "27F0-27FF",
  48. "Supplemental Arrows-A"
  49. ],
  50. [
  51. "0300-036F",
  52. "Combining Diacritical Marks"
  53. ],
  54. [
  55. "2800-28FF",
  56. "Braille Patterns"
  57. ],
  58. [
  59. "0370-03FF",
  60. "Greek and Coptic"
  61. ],
  62. [
  63. "2900-297F",
  64. "Supplemental Arrows-B"
  65. ],
  66. [
  67. "0400-04FF",
  68. "Cyrillic"
  69. ],
  70. [
  71. "2980-29FF",
  72. "Miscellaneous Mathematical Symbols-B"
  73. ],
  74. [
  75. "0500-052F",
  76. "Cyrillic Supplementary"
  77. ],
  78. [
  79. "2A00-2AFF",
  80. "Supplemental Mathematical Operators"
  81. ],
  82. [
  83. "0530-058F",
  84. "Armenian"
  85. ],
  86. [
  87. "2B00-2BFF",
  88. "Miscellaneous Symbols and Arrows"
  89. ],
  90. [
  91. "0590-05FF",
  92. "Hebrew"
  93. ],
  94. [
  95. "2E80-2EFF",
  96. "CJK Radicals Supplement"
  97. ],
  98. [
  99. "0600-06FF",
  100. "Arabic"
  101. ],
  102. [
  103. "2F00-2FDF",
  104. "Kangxi Radicals"
  105. ],
  106. [
  107. "0700-074F",
  108. "Syriac"
  109. ],
  110. [
  111. "2FF0-2FFF",
  112. "Ideographic Description Characters"
  113. ],
  114. [
  115. "0780-07BF",
  116. "Thaana"
  117. ],
  118. [
  119. "3000-303F",
  120. "CJK Symbols and Punctuation"
  121. ],
  122. [
  123. "0900-097F",
  124. "Devanagari"
  125. ],
  126. [
  127. "3040-309F",
  128. "Hiragana"
  129. ],
  130. [
  131. "0980-09FF",
  132. "Bengali"
  133. ],
  134. [
  135. "30A0-30FF",
  136. "Katakana"
  137. ],
  138. [
  139. "0A00-0A7F",
  140. "Gurmukhi"
  141. ],
  142. [
  143. "3100-312F",
  144. "Bopomofo"
  145. ],
  146. [
  147. "0A80-0AFF",
  148. "Gujarati"
  149. ],
  150. [
  151. "3130-318F",
  152. "Hangul Compatibility Jamo"
  153. ],
  154. [
  155. "0B00-0B7F",
  156. "Oriya"
  157. ],
  158. [
  159. "3190-319F",
  160. "Kanbun"
  161. ],
  162. [
  163. "0B80-0BFF",
  164. "Tamil"
  165. ],
  166. [
  167. "31A0-31BF",
  168. "Bopomofo Extended"
  169. ],
  170. [
  171. "0C00-0C7F",
  172. "Telugu"
  173. ],
  174. [
  175. "31F0-31FF",
  176. "Katakana Phonetic Extensions"
  177. ],
  178. [
  179. "0C80-0CFF",
  180. "Kannada"
  181. ],
  182. [
  183. "3200-32FF",
  184. "Enclosed CJK Letters and Months"
  185. ],
  186. [
  187. "0D00-0D7F",
  188. "Malayalam"
  189. ],
  190. [
  191. "3300-33FF",
  192. "CJK Compatibility"
  193. ],
  194. [
  195. "0D80-0DFF",
  196. "Sinhala"
  197. ],
  198. [
  199. "3400-4DBF",
  200. "CJK Unified Ideographs Extension A"
  201. ],
  202. [
  203. "0E00-0E7F",
  204. "Thai"
  205. ],
  206. [
  207. "4DC0-4DFF",
  208. "Yijing Hexagram Symbols"
  209. ],
  210. [
  211. "0E80-0EFF",
  212. "Lao"
  213. ],
  214. [
  215. "4E00-9FFF",
  216. "CJK Unified Ideographs"
  217. ],
  218. [
  219. "0F00-0FFF",
  220. "Tibetan"
  221. ],
  222. [
  223. "A000-A48F",
  224. "Yi Syllables"
  225. ],
  226. [
  227. "1000-109F",
  228. "Myanmar"
  229. ],
  230. [
  231. "A490-A4CF",
  232. "Yi Radicals"
  233. ],
  234. [
  235. "10A0-10FF",
  236. "Georgian"
  237. ],
  238. [
  239. "AC00-D7AF",
  240. "Hangul Syllables"
  241. ],
  242. [
  243. "1100-11FF",
  244. "Hangul Jamo"
  245. ],
  246. [
  247. "D800-DB7F",
  248. "High Surrogates"
  249. ],
  250. [
  251. "1200-137F",
  252. "Ethiopic"
  253. ],
  254. [
  255. "DB80-DBFF",
  256. "High Private Use Surrogates"
  257. ],
  258. [
  259. "13A0-13FF",
  260. "Cherokee"
  261. ],
  262. [
  263. "DC00-DFFF",
  264. "Low Surrogates"
  265. ],
  266. [
  267. "1400-167F",
  268. "Unified Canadian Aboriginal Syllabics"
  269. ],
  270. [
  271. "E000-F8FF",
  272. "Private Use Area"
  273. ],
  274. [
  275. "1680-169F",
  276. "Ogham"
  277. ],
  278. [
  279. "F900-FAFF",
  280. "CJK Compatibility Ideographs"
  281. ],
  282. [
  283. "16A0-16FF",
  284. "Runic"
  285. ],
  286. [
  287. "FB00-FB4F",
  288. "Alphabetic Presentation Forms"
  289. ],
  290. [
  291. "1700-171F",
  292. "Tagalog"
  293. ],
  294. [
  295. "FB50-FDFF",
  296. "Arabic Presentation Forms-A"
  297. ],
  298. [
  299. "1720-173F",
  300. "Hanunoo"
  301. ],
  302. [
  303. "FE00-FE0F",
  304. "Variation Selectors"
  305. ],
  306. [
  307. "1740-175F",
  308. "Buhid"
  309. ],
  310. [
  311. "FE20-FE2F",
  312. "Combining Half Marks"
  313. ],
  314. [
  315. "1760-177F",
  316. "Tagbanwa"
  317. ],
  318. [
  319. "FE30-FE4F",
  320. "CJK Compatibility Forms"
  321. ],
  322. [
  323. "1780-17FF",
  324. "Khmer"
  325. ],
  326. [
  327. "FE50-FE6F",
  328. "Small Form Variants"
  329. ],
  330. [
  331. "1800-18AF",
  332. "Mongolian"
  333. ],
  334. [
  335. "FE70-FEFF",
  336. "Arabic Presentation Forms-B"
  337. ],
  338. [
  339. "1900-194F",
  340. "Limbu"
  341. ],
  342. [
  343. "FF00-FFEF",
  344. "Halfwidth and Fullwidth Forms"
  345. ],
  346. [
  347. "1950-197F",
  348. "Tai Le"
  349. ],
  350. [
  351. "FFF0-FFFF",
  352. "Specials"
  353. ],
  354. [
  355. "19E0-19FF",
  356. "Khmer Symbols"
  357. ],
  358. [
  359. "10000-1007F",
  360. "Linear B Syllabary"
  361. ],
  362. [
  363. "1D00-1D7F",
  364. "Phonetic Extensions"
  365. ],
  366. [
  367. "10080-100FF",
  368. "Linear B Ideograms"
  369. ],
  370. [
  371. "1E00-1EFF",
  372. "Latin Extended Additional"
  373. ],
  374. [
  375. "10100-1013F",
  376. "Aegean Numbers"
  377. ],
  378. [
  379. "1F00-1FFF",
  380. "Greek Extended"
  381. ],
  382. [
  383. "10300-1032F",
  384. "Old Italic"
  385. ],
  386. [
  387. "2000-206F",
  388. "General Punctuation"
  389. ],
  390. [
  391. "10330-1034F",
  392. "Gothic"
  393. ],
  394. [
  395. "2070-209F",
  396. "Superscripts and Subscripts"
  397. ],
  398. [
  399. "10380-1039F",
  400. "Ugaritic"
  401. ],
  402. [
  403. "20A0-20CF",
  404. "Currency Symbols"
  405. ],
  406. [
  407. "10400-1044F",
  408. "Deseret"
  409. ],
  410. [
  411. "20D0-20FF",
  412. "Combining Diacritical Marks for Symbols"
  413. ],
  414. [
  415. "10450-1047F",
  416. "Shavian"
  417. ],
  418. [
  419. "2100-214F",
  420. "Letterlike Symbols"
  421. ],
  422. [
  423. "10480-104AF",
  424. "Osmanya"
  425. ],
  426. [
  427. "2150-218F",
  428. "Number Forms"
  429. ],
  430. [
  431. "10800-1083F",
  432. "Cypriot Syllabary"
  433. ],
  434. [
  435. "2190-21FF",
  436. "Arrows"
  437. ],
  438. [
  439. "1D000-1D0FF",
  440. "Byzantine Musical Symbols"
  441. ],
  442. [
  443. "2200-22FF",
  444. "Mathematical Operators"
  445. ],
  446. [
  447. "1D100-1D1FF",
  448. "Musical Symbols"
  449. ],
  450. [
  451. "2300-23FF",
  452. "Miscellaneous Technical"
  453. ],
  454. [
  455. "1D300-1D35F",
  456. "Tai Xuan Jing Symbols"
  457. ],
  458. [
  459. "2400-243F",
  460. "Control Pictures"
  461. ],
  462. [
  463. "1D400-1D7FF",
  464. "Mathematical Alphanumeric Symbols"
  465. ],
  466. [
  467. "2440-245F",
  468. "Optical Character Recognition"
  469. ],
  470. [
  471. "20000-2A6DF",
  472. "CJK Unified Ideographs Extension B"
  473. ],
  474. [
  475. "2460-24FF",
  476. "Enclosed Alphanumerics"
  477. ],
  478. [
  479. "2F800-2FA1F",
  480. "CJK Compatibility Ideographs Supplement"
  481. ],
  482. [
  483. "2500-257F",
  484. "Box Drawing"
  485. ],
  486. [
  487. "E0000-E007F",
  488. "Tags"
  489. ]
  490. ]