fts5_unicode2.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. /*
  2. ** 2012-05-25
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. */
  13. /*
  14. ** DO NOT EDIT THIS MACHINE GENERATED FILE.
  15. */
  16. #include <assert.h>
  17. /*
  18. ** If the argument is a codepoint corresponding to a lowercase letter
  19. ** in the ASCII range with a diacritic added, return the codepoint
  20. ** of the ASCII letter only. For example, if passed 235 - "LATIN
  21. ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
  22. ** E"). The resuls of passing a codepoint that corresponds to an
  23. ** uppercase letter are undefined.
  24. */
  25. static int fts5_remove_diacritic(int c, int bComplex){
  26. unsigned short aDia[] = {
  27. 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
  28. 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
  29. 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
  30. 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
  31. 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896,
  32. 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106,
  33. 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344,
  34. 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198,
  35. 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468,
  36. 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704,
  37. 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914,
  38. 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218,
  39. 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554,
  40. 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766,
  41. 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118,
  42. 63182, 63242, 63274, 63310, 63368, 63390,
  43. };
  44. #define HIBIT ((unsigned char)0x80)
  45. unsigned char aChar[] = {
  46. '\0', 'a', 'c', 'e', 'i', 'n',
  47. 'o', 'u', 'y', 'y', 'a', 'c',
  48. 'd', 'e', 'e', 'g', 'h', 'i',
  49. 'j', 'k', 'l', 'n', 'o', 'r',
  50. 's', 't', 'u', 'u', 'w', 'y',
  51. 'z', 'o', 'u', 'a', 'i', 'o',
  52. 'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o',
  53. 'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a',
  54. 'e', 'i', 'o', 'r', 'u', 's',
  55. 't', 'h', 'a', 'e', 'o'|HIBIT, 'o',
  56. 'o'|HIBIT, 'y', '\0', '\0', '\0', '\0',
  57. '\0', '\0', '\0', '\0', 'a', 'b',
  58. 'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT,
  59. 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT,
  60. 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n',
  61. 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's',
  62. 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w',
  63. 'w', 'x', 'y', 'z', 'h', 't',
  64. 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT,
  65. 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT,
  66. 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y',
  67. };
  68. unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  69. int iRes = 0;
  70. int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  71. int iLo = 0;
  72. while( iHi>=iLo ){
  73. int iTest = (iHi + iLo) / 2;
  74. if( key >= aDia[iTest] ){
  75. iRes = iTest;
  76. iLo = iTest+1;
  77. }else{
  78. iHi = iTest-1;
  79. }
  80. }
  81. assert( key>=aDia[iRes] );
  82. if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
  83. return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
  84. }
  85. /*
  86. ** Return true if the argument interpreted as a unicode codepoint
  87. ** is a diacritical modifier character.
  88. */
  89. int sqlite3Fts5UnicodeIsdiacritic(int c){
  90. unsigned int mask0 = 0x08029FDF;
  91. unsigned int mask1 = 0x000361F8;
  92. if( c<768 || c>817 ) return 0;
  93. return (c < 768+32) ?
  94. (mask0 & ((unsigned int)1 << (c-768))) :
  95. (mask1 & ((unsigned int)1 << (c-768-32)));
  96. }
  97. /*
  98. ** Interpret the argument as a unicode codepoint. If the codepoint
  99. ** is an upper case character that has a lower case equivalent,
  100. ** return the codepoint corresponding to the lower case version.
  101. ** Otherwise, return a copy of the argument.
  102. **
  103. ** The results are undefined if the value passed to this function
  104. ** is less than zero.
  105. */
  106. int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){
  107. /* Each entry in the following array defines a rule for folding a range
  108. ** of codepoints to lower case. The rule applies to a range of nRange
  109. ** codepoints starting at codepoint iCode.
  110. **
  111. ** If the least significant bit in flags is clear, then the rule applies
  112. ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  113. ** need to be folded). Or, if it is set, then the rule only applies to
  114. ** every second codepoint in the range, starting with codepoint C.
  115. **
  116. ** The 7 most significant bits in flags are an index into the aiOff[]
  117. ** array. If a specific codepoint C does require folding, then its lower
  118. ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
  119. **
  120. ** The contents of this array are generated by parsing the CaseFolding.txt
  121. ** file distributed as part of the "Unicode Character Database". See
  122. ** http://www.unicode.org for details.
  123. */
  124. static const struct TableEntry {
  125. unsigned short iCode;
  126. unsigned char flags;
  127. unsigned char nRange;
  128. } aEntry[] = {
  129. {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
  130. {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
  131. {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
  132. {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
  133. {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
  134. {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
  135. {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
  136. {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
  137. {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
  138. {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
  139. {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
  140. {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
  141. {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
  142. {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
  143. {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
  144. {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
  145. {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
  146. {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
  147. {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
  148. {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
  149. {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
  150. {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
  151. {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
  152. {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
  153. {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
  154. {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
  155. {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
  156. {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
  157. {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
  158. {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
  159. {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
  160. {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
  161. {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
  162. {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
  163. {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
  164. {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
  165. {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
  166. {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
  167. {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
  168. {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
  169. {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
  170. {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
  171. {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
  172. {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
  173. {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
  174. {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
  175. {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
  176. {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
  177. {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
  178. {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
  179. {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
  180. {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
  181. {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
  182. {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
  183. {65313, 14, 26},
  184. };
  185. static const unsigned short aiOff[] = {
  186. 1, 2, 8, 15, 16, 26, 28, 32,
  187. 37, 38, 40, 48, 63, 64, 69, 71,
  188. 79, 80, 116, 202, 203, 205, 206, 207,
  189. 209, 210, 211, 213, 214, 217, 218, 219,
  190. 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
  191. 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
  192. 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
  193. 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
  194. 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
  195. 65514, 65521, 65527, 65528, 65529,
  196. };
  197. int ret = c;
  198. assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
  199. if( c<128 ){
  200. if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  201. }else if( c<65536 ){
  202. const struct TableEntry *p;
  203. int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
  204. int iLo = 0;
  205. int iRes = -1;
  206. assert( c>aEntry[0].iCode );
  207. while( iHi>=iLo ){
  208. int iTest = (iHi + iLo) / 2;
  209. int cmp = (c - aEntry[iTest].iCode);
  210. if( cmp>=0 ){
  211. iRes = iTest;
  212. iLo = iTest+1;
  213. }else{
  214. iHi = iTest-1;
  215. }
  216. }
  217. assert( iRes>=0 && c>=aEntry[iRes].iCode );
  218. p = &aEntry[iRes];
  219. if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
  220. ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
  221. assert( ret>0 );
  222. }
  223. if( eRemoveDiacritic ){
  224. ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2);
  225. }
  226. }
  227. else if( c>=66560 && c<66600 ){
  228. ret = c + 40;
  229. }
  230. return ret;
  231. }
  232. int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){
  233. aArray[0] = 1;
  234. switch( zCat[0] ){
  235. case 'C':
  236. switch( zCat[1] ){
  237. case 'c': aArray[1] = 1; break;
  238. case 'f': aArray[2] = 1; break;
  239. case 'n': aArray[3] = 1; break;
  240. case 's': aArray[4] = 1; break;
  241. case 'o': aArray[31] = 1; break;
  242. case '*':
  243. aArray[1] = 1;
  244. aArray[2] = 1;
  245. aArray[3] = 1;
  246. aArray[4] = 1;
  247. aArray[31] = 1;
  248. break;
  249. default: return 1; }
  250. break;
  251. case 'L':
  252. switch( zCat[1] ){
  253. case 'l': aArray[5] = 1; break;
  254. case 'm': aArray[6] = 1; break;
  255. case 'o': aArray[7] = 1; break;
  256. case 't': aArray[8] = 1; break;
  257. case 'u': aArray[9] = 1; break;
  258. case 'C': aArray[30] = 1; break;
  259. case '*':
  260. aArray[5] = 1;
  261. aArray[6] = 1;
  262. aArray[7] = 1;
  263. aArray[8] = 1;
  264. aArray[9] = 1;
  265. aArray[30] = 1;
  266. break;
  267. default: return 1; }
  268. break;
  269. case 'M':
  270. switch( zCat[1] ){
  271. case 'c': aArray[10] = 1; break;
  272. case 'e': aArray[11] = 1; break;
  273. case 'n': aArray[12] = 1; break;
  274. case '*':
  275. aArray[10] = 1;
  276. aArray[11] = 1;
  277. aArray[12] = 1;
  278. break;
  279. default: return 1; }
  280. break;
  281. case 'N':
  282. switch( zCat[1] ){
  283. case 'd': aArray[13] = 1; break;
  284. case 'l': aArray[14] = 1; break;
  285. case 'o': aArray[15] = 1; break;
  286. case '*':
  287. aArray[13] = 1;
  288. aArray[14] = 1;
  289. aArray[15] = 1;
  290. break;
  291. default: return 1; }
  292. break;
  293. case 'P':
  294. switch( zCat[1] ){
  295. case 'c': aArray[16] = 1; break;
  296. case 'd': aArray[17] = 1; break;
  297. case 'e': aArray[18] = 1; break;
  298. case 'f': aArray[19] = 1; break;
  299. case 'i': aArray[20] = 1; break;
  300. case 'o': aArray[21] = 1; break;
  301. case 's': aArray[22] = 1; break;
  302. case '*':
  303. aArray[16] = 1;
  304. aArray[17] = 1;
  305. aArray[18] = 1;
  306. aArray[19] = 1;
  307. aArray[20] = 1;
  308. aArray[21] = 1;
  309. aArray[22] = 1;
  310. break;
  311. default: return 1; }
  312. break;
  313. case 'S':
  314. switch( zCat[1] ){
  315. case 'c': aArray[23] = 1; break;
  316. case 'k': aArray[24] = 1; break;
  317. case 'm': aArray[25] = 1; break;
  318. case 'o': aArray[26] = 1; break;
  319. case '*':
  320. aArray[23] = 1;
  321. aArray[24] = 1;
  322. aArray[25] = 1;
  323. aArray[26] = 1;
  324. break;
  325. default: return 1; }
  326. break;
  327. case 'Z':
  328. switch( zCat[1] ){
  329. case 'l': aArray[27] = 1; break;
  330. case 'p': aArray[28] = 1; break;
  331. case 's': aArray[29] = 1; break;
  332. case '*':
  333. aArray[27] = 1;
  334. aArray[28] = 1;
  335. aArray[29] = 1;
  336. break;
  337. default: return 1; }
  338. break;
  339. default:
  340. return 1;
  341. }
  342. return 0;
  343. }
  344. static u16 aFts5UnicodeBlock[] = {
  345. 0, 1471, 1753, 1760, 1760, 1760, 1760, 1760, 1760, 1760,
  346. 1760, 1760, 1760, 1760, 1760, 1763, 1765,
  347. };
  348. static u16 aFts5UnicodeMap[] = {
  349. 0, 32, 33, 36, 37, 40, 41, 42, 43, 44,
  350. 45, 46, 48, 58, 60, 63, 65, 91, 92, 93,
  351. 94, 95, 96, 97, 123, 124, 125, 126, 127, 160,
  352. 161, 162, 166, 167, 168, 169, 170, 171, 172, 173,
  353. 174, 175, 176, 177, 178, 180, 181, 182, 184, 185,
  354. 186, 187, 188, 191, 192, 215, 216, 223, 247, 248,
  355. 256, 312, 313, 329, 330, 377, 383, 385, 387, 388,
  356. 391, 394, 396, 398, 402, 403, 405, 406, 409, 412,
  357. 414, 415, 417, 418, 423, 427, 428, 431, 434, 436,
  358. 437, 440, 442, 443, 444, 446, 448, 452, 453, 454,
  359. 455, 456, 457, 458, 459, 460, 461, 477, 478, 496,
  360. 497, 498, 499, 500, 503, 505, 506, 564, 570, 572,
  361. 573, 575, 577, 580, 583, 584, 592, 660, 661, 688,
  362. 706, 710, 722, 736, 741, 748, 749, 750, 751, 768,
  363. 880, 884, 885, 886, 890, 891, 894, 900, 902, 903,
  364. 904, 908, 910, 912, 913, 931, 940, 975, 977, 978,
  365. 981, 984, 1008, 1012, 1014, 1015, 1018, 1020, 1021, 1072,
  366. 1120, 1154, 1155, 1160, 1162, 1217, 1231, 1232, 1329, 1369,
  367. 1370, 1377, 1417, 1418, 1423, 1425, 1470, 1471, 1472, 1473,
  368. 1475, 1476, 1478, 1479, 1488, 1520, 1523, 1536, 1542, 1545,
  369. 1547, 1548, 1550, 1552, 1563, 1566, 1568, 1600, 1601, 1611,
  370. 1632, 1642, 1646, 1648, 1649, 1748, 1749, 1750, 1757, 1758,
  371. 1759, 1765, 1767, 1769, 1770, 1774, 1776, 1786, 1789, 1791,
  372. 1792, 1807, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1984,
  373. 1994, 2027, 2036, 2038, 2039, 2042, 2048, 2070, 2074, 2075,
  374. 2084, 2085, 2088, 2089, 2096, 2112, 2137, 2142, 2208, 2210,
  375. 2276, 2304, 2307, 2308, 2362, 2363, 2364, 2365, 2366, 2369,
  376. 2377, 2381, 2382, 2384, 2385, 2392, 2402, 2404, 2406, 2416,
  377. 2417, 2418, 2425, 2433, 2434, 2437, 2447, 2451, 2474, 2482,
  378. 2486, 2492, 2493, 2494, 2497, 2503, 2507, 2509, 2510, 2519,
  379. 2524, 2527, 2530, 2534, 2544, 2546, 2548, 2554, 2555, 2561,
  380. 2563, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622,
  381. 2625, 2631, 2635, 2641, 2649, 2654, 2662, 2672, 2674, 2677,
  382. 2689, 2691, 2693, 2703, 2707, 2730, 2738, 2741, 2748, 2749,
  383. 2750, 2753, 2759, 2761, 2763, 2765, 2768, 2784, 2786, 2790,
  384. 2800, 2801, 2817, 2818, 2821, 2831, 2835, 2858, 2866, 2869,
  385. 2876, 2877, 2878, 2879, 2880, 2881, 2887, 2891, 2893, 2902,
  386. 2903, 2908, 2911, 2914, 2918, 2928, 2929, 2930, 2946, 2947,
  387. 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006,
  388. 3008, 3009, 3014, 3018, 3021, 3024, 3031, 3046, 3056, 3059,
  389. 3065, 3066, 3073, 3077, 3086, 3090, 3114, 3125, 3133, 3134,
  390. 3137, 3142, 3146, 3157, 3160, 3168, 3170, 3174, 3192, 3199,
  391. 3202, 3205, 3214, 3218, 3242, 3253, 3260, 3261, 3262, 3263,
  392. 3264, 3270, 3271, 3274, 3276, 3285, 3294, 3296, 3298, 3302,
  393. 3313, 3330, 3333, 3342, 3346, 3389, 3390, 3393, 3398, 3402,
  394. 3405, 3406, 3415, 3424, 3426, 3430, 3440, 3449, 3450, 3458,
  395. 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3538, 3542, 3544,
  396. 3570, 3572, 3585, 3633, 3634, 3636, 3647, 3648, 3654, 3655,
  397. 3663, 3664, 3674, 3713, 3716, 3719, 3722, 3725, 3732, 3737,
  398. 3745, 3749, 3751, 3754, 3757, 3761, 3762, 3764, 3771, 3773,
  399. 3776, 3782, 3784, 3792, 3804, 3840, 3841, 3844, 3859, 3860,
  400. 3861, 3864, 3866, 3872, 3882, 3892, 3893, 3894, 3895, 3896,
  401. 3897, 3898, 3899, 3900, 3901, 3902, 3904, 3913, 3953, 3967,
  402. 3968, 3973, 3974, 3976, 3981, 3993, 4030, 4038, 4039, 4046,
  403. 4048, 4053, 4057, 4096, 4139, 4141, 4145, 4146, 4152, 4153,
  404. 4155, 4157, 4159, 4160, 4170, 4176, 4182, 4184, 4186, 4190,
  405. 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4227, 4229,
  406. 4231, 4237, 4238, 4239, 4240, 4250, 4253, 4254, 4256, 4295,
  407. 4301, 4304, 4347, 4348, 4349, 4682, 4688, 4696, 4698, 4704,
  408. 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888,
  409. 4957, 4960, 4969, 4992, 5008, 5024, 5120, 5121, 5741, 5743,
  410. 5760, 5761, 5787, 5788, 5792, 5867, 5870, 5888, 5902, 5906,
  411. 5920, 5938, 5941, 5952, 5970, 5984, 5998, 6002, 6016, 6068,
  412. 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6103, 6104, 6107,
  413. 6108, 6109, 6112, 6128, 6144, 6150, 6151, 6155, 6158, 6160,
  414. 6176, 6211, 6212, 6272, 6313, 6314, 6320, 6400, 6432, 6435,
  415. 6439, 6441, 6448, 6450, 6451, 6457, 6464, 6468, 6470, 6480,
  416. 6512, 6528, 6576, 6593, 6600, 6608, 6618, 6622, 6656, 6679,
  417. 6681, 6686, 6688, 6741, 6742, 6743, 6744, 6752, 6753, 6754,
  418. 6755, 6757, 6765, 6771, 6783, 6784, 6800, 6816, 6823, 6824,
  419. 6912, 6916, 6917, 6964, 6965, 6966, 6971, 6972, 6973, 6978,
  420. 6979, 6981, 6992, 7002, 7009, 7019, 7028, 7040, 7042, 7043,
  421. 7073, 7074, 7078, 7080, 7082, 7083, 7084, 7086, 7088, 7098,
  422. 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7164, 7168,
  423. 7204, 7212, 7220, 7222, 7227, 7232, 7245, 7248, 7258, 7288,
  424. 7294, 7360, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406,
  425. 7410, 7412, 7413, 7424, 7468, 7531, 7544, 7545, 7579, 7616,
  426. 7676, 7680, 7830, 7838, 7936, 7944, 7952, 7960, 7968, 7976,
  427. 7984, 7992, 8000, 8008, 8016, 8025, 8027, 8029, 8031, 8033,
  428. 8040, 8048, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8118,
  429. 8120, 8124, 8125, 8126, 8127, 8130, 8134, 8136, 8140, 8141,
  430. 8144, 8150, 8152, 8157, 8160, 8168, 8173, 8178, 8182, 8184,
  431. 8188, 8189, 8192, 8203, 8208, 8214, 8216, 8217, 8218, 8219,
  432. 8221, 8222, 8223, 8224, 8232, 8233, 8234, 8239, 8240, 8249,
  433. 8250, 8251, 8255, 8257, 8260, 8261, 8262, 8263, 8274, 8275,
  434. 8276, 8277, 8287, 8288, 8298, 8304, 8305, 8308, 8314, 8317,
  435. 8318, 8319, 8320, 8330, 8333, 8334, 8336, 8352, 8400, 8413,
  436. 8417, 8418, 8421, 8448, 8450, 8451, 8455, 8456, 8458, 8459,
  437. 8462, 8464, 8467, 8468, 8469, 8470, 8472, 8473, 8478, 8484,
  438. 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8496, 8500,
  439. 8501, 8505, 8506, 8508, 8510, 8512, 8517, 8519, 8522, 8523,
  440. 8524, 8526, 8527, 8528, 8544, 8579, 8581, 8585, 8592, 8597,
  441. 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623,
  442. 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972,
  443. 8992, 8994, 9001, 9002, 9003, 9084, 9085, 9115, 9140, 9180,
  444. 9186, 9216, 9280, 9312, 9372, 9450, 9472, 9655, 9656, 9665,
  445. 9666, 9720, 9728, 9839, 9840, 9985, 10088, 10089, 10090, 10091,
  446. 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101,
  447. 10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217,
  448. 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627,
  449. 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637,
  450. 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647,
  451. 10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750,
  452. 11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365,
  453. 11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393,
  454. 11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520,
  455. 11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696,
  456. 11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780,
  457. 11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800,
  458. 11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812,
  459. 11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904,
  460. 11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296,
  461. 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306,
  462. 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317,
  463. 12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347,
  464. 12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449,
  465. 12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736,
  466. 12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938,
  467. 12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981,
  468. 40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528,
  469. 42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624,
  470. 42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800,
  471. 42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912,
  472. 43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043,
  473. 43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136,
  474. 43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264,
  475. 43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395,
  476. 43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472,
  477. 43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588,
  478. 43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643,
  479. 43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713,
  480. 43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762,
  481. 43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003,
  482. 44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203,
  483. 55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112,
  484. 64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320,
  485. 64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020,
  486. 65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075,
  487. 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086,
  488. 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097,
  489. 65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118,
  490. 65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279,
  491. 65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294,
  492. 65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343,
  493. 65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378,
  494. 65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490,
  495. 65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529,
  496. 65532, 0, 13, 40, 60, 63, 80, 128, 256, 263,
  497. 311, 320, 373, 377, 394, 400, 464, 509, 640, 672,
  498. 768, 800, 816, 833, 834, 842, 896, 927, 928, 968,
  499. 976, 977, 1024, 1064, 1104, 1184, 2048, 2056, 2058, 2103,
  500. 2108, 2111, 2135, 2136, 2304, 2326, 2335, 2336, 2367, 2432,
  501. 2494, 2560, 2561, 2565, 2572, 2576, 2581, 2585, 2616, 2623,
  502. 2624, 2640, 2656, 2685, 2687, 2816, 2873, 2880, 2904, 2912,
  503. 2936, 3072, 3680, 4096, 4097, 4098, 4099, 4152, 4167, 4178,
  504. 4198, 4224, 4226, 4227, 4272, 4275, 4279, 4281, 4283, 4285,
  505. 4286, 4304, 4336, 4352, 4355, 4391, 4396, 4397, 4406, 4416,
  506. 4480, 4482, 4483, 4531, 4534, 4543, 4545, 4549, 4560, 5760,
  507. 5803, 5804, 5805, 5806, 5808, 5814, 5815, 5824, 8192, 9216,
  508. 9328, 12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248,
  509. 53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637,
  510. 53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298,
  511. 54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441,
  512. 54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541,
  513. 54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662,
  514. 54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922,
  515. 54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062,
  516. 55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178,
  517. 55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961,
  518. 60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003,
  519. 61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028,
  520. 61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099,
  521. 61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744,
  522. 61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368,
  523. 62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971,
  524. 63045, 63104, 63232, 0, 42710, 42752, 46900, 46912, 47133, 63488,
  525. 1, 32, 256, 0, 65533,
  526. };
  527. static u16 aFts5UnicodeData[] = {
  528. 1025, 61, 117, 55, 117, 54, 50, 53, 57, 53,
  529. 49, 85, 333, 85, 121, 85, 841, 54, 53, 50,
  530. 56, 48, 56, 837, 54, 57, 50, 57, 1057, 61,
  531. 53, 151, 58, 53, 56, 58, 39, 52, 57, 34,
  532. 58, 56, 58, 57, 79, 56, 37, 85, 56, 47,
  533. 39, 51, 111, 53, 745, 57, 233, 773, 57, 261,
  534. 1822, 37, 542, 37, 1534, 222, 69, 73, 37, 126,
  535. 126, 73, 69, 137, 37, 73, 37, 105, 101, 73,
  536. 37, 73, 37, 190, 158, 37, 126, 126, 73, 37,
  537. 126, 94, 37, 39, 94, 69, 135, 41, 40, 37,
  538. 41, 40, 37, 41, 40, 37, 542, 37, 606, 37,
  539. 41, 40, 37, 126, 73, 37, 1886, 197, 73, 37,
  540. 73, 69, 126, 105, 37, 286, 2181, 39, 869, 582,
  541. 152, 390, 472, 166, 248, 38, 56, 38, 568, 3596,
  542. 158, 38, 56, 94, 38, 101, 53, 88, 41, 53,
  543. 105, 41, 73, 37, 553, 297, 1125, 94, 37, 105,
  544. 101, 798, 133, 94, 57, 126, 94, 37, 1641, 1541,
  545. 1118, 58, 172, 75, 1790, 478, 37, 2846, 1225, 38,
  546. 213, 1253, 53, 49, 55, 1452, 49, 44, 53, 76,
  547. 53, 76, 53, 44, 871, 103, 85, 162, 121, 85,
  548. 55, 85, 90, 364, 53, 85, 1031, 38, 327, 684,
  549. 333, 149, 71, 44, 3175, 53, 39, 236, 34, 58,
  550. 204, 70, 76, 58, 140, 71, 333, 103, 90, 39,
  551. 469, 34, 39, 44, 967, 876, 2855, 364, 39, 333,
  552. 1063, 300, 70, 58, 117, 38, 711, 140, 38, 300,
  553. 38, 108, 38, 172, 501, 807, 108, 53, 39, 359,
  554. 876, 108, 42, 1735, 44, 42, 44, 39, 106, 268,
  555. 138, 44, 74, 39, 236, 327, 76, 85, 333, 53,
  556. 38, 199, 231, 44, 74, 263, 71, 711, 231, 39,
  557. 135, 44, 39, 106, 140, 74, 74, 44, 39, 42,
  558. 71, 103, 76, 333, 71, 87, 207, 58, 55, 76,
  559. 42, 199, 71, 711, 231, 71, 71, 71, 44, 106,
  560. 76, 76, 108, 44, 135, 39, 333, 76, 103, 44,
  561. 76, 42, 295, 103, 711, 231, 71, 167, 44, 39,
  562. 106, 172, 76, 42, 74, 44, 39, 71, 76, 333,
  563. 53, 55, 44, 74, 263, 71, 711, 231, 71, 167,
  564. 44, 39, 42, 44, 42, 140, 74, 74, 44, 44,
  565. 42, 71, 103, 76, 333, 58, 39, 207, 44, 39,
  566. 199, 103, 135, 71, 39, 71, 71, 103, 391, 74,
  567. 44, 74, 106, 106, 44, 39, 42, 333, 111, 218,
  568. 55, 58, 106, 263, 103, 743, 327, 167, 39, 108,
  569. 138, 108, 140, 76, 71, 71, 76, 333, 239, 58,
  570. 74, 263, 103, 743, 327, 167, 44, 39, 42, 44,
  571. 170, 44, 74, 74, 76, 74, 39, 71, 76, 333,
  572. 71, 74, 263, 103, 1319, 39, 106, 140, 106, 106,
  573. 44, 39, 42, 71, 76, 333, 207, 58, 199, 74,
  574. 583, 775, 295, 39, 231, 44, 106, 108, 44, 266,
  575. 74, 53, 1543, 44, 71, 236, 55, 199, 38, 268,
  576. 53, 333, 85, 71, 39, 71, 39, 39, 135, 231,
  577. 103, 39, 39, 71, 135, 44, 71, 204, 76, 39,
  578. 167, 38, 204, 333, 135, 39, 122, 501, 58, 53,
  579. 122, 76, 218, 333, 335, 58, 44, 58, 44, 58,
  580. 44, 54, 50, 54, 50, 74, 263, 1159, 460, 42,
  581. 172, 53, 76, 167, 364, 1164, 282, 44, 218, 90,
  582. 181, 154, 85, 1383, 74, 140, 42, 204, 42, 76,
  583. 74, 76, 39, 333, 213, 199, 74, 76, 135, 108,
  584. 39, 106, 71, 234, 103, 140, 423, 44, 74, 76,
  585. 202, 44, 39, 42, 333, 106, 44, 90, 1225, 41,
  586. 41, 1383, 53, 38, 10631, 135, 231, 39, 135, 1319,
  587. 135, 1063, 135, 231, 39, 135, 487, 1831, 135, 2151,
  588. 108, 309, 655, 519, 346, 2727, 49, 19847, 85, 551,
  589. 61, 839, 54, 50, 2407, 117, 110, 423, 135, 108,
  590. 583, 108, 85, 583, 76, 423, 103, 76, 1671, 76,
  591. 42, 236, 266, 44, 74, 364, 117, 38, 117, 55,
  592. 39, 44, 333, 335, 213, 49, 149, 108, 61, 333,
  593. 1127, 38, 1671, 1319, 44, 39, 2247, 935, 108, 138,
  594. 76, 106, 74, 44, 202, 108, 58, 85, 333, 967,
  595. 167, 1415, 554, 231, 74, 333, 47, 1114, 743, 76,
  596. 106, 85, 1703, 42, 44, 42, 236, 44, 42, 44,
  597. 74, 268, 202, 332, 44, 333, 333, 245, 38, 213,
  598. 140, 42, 1511, 44, 42, 172, 42, 44, 170, 44,
  599. 74, 231, 333, 245, 346, 300, 314, 76, 42, 967,
  600. 42, 140, 74, 76, 42, 44, 74, 71, 333, 1415,
  601. 44, 42, 76, 106, 44, 42, 108, 74, 149, 1159,
  602. 266, 268, 74, 76, 181, 333, 103, 333, 967, 198,
  603. 85, 277, 108, 53, 428, 42, 236, 135, 44, 135,
  604. 74, 44, 71, 1413, 2022, 421, 38, 1093, 1190, 1260,
  605. 140, 4830, 261, 3166, 261, 265, 197, 201, 261, 265,
  606. 261, 265, 197, 201, 261, 41, 41, 41, 94, 229,
  607. 265, 453, 261, 264, 261, 264, 261, 264, 165, 69,
  608. 137, 40, 56, 37, 120, 101, 69, 137, 40, 120,
  609. 133, 69, 137, 120, 261, 169, 120, 101, 69, 137,
  610. 40, 88, 381, 162, 209, 85, 52, 51, 54, 84,
  611. 51, 54, 52, 277, 59, 60, 162, 61, 309, 52,
  612. 51, 149, 80, 117, 57, 54, 50, 373, 57, 53,
  613. 48, 341, 61, 162, 194, 47, 38, 207, 121, 54,
  614. 50, 38, 335, 121, 54, 50, 422, 855, 428, 139,
  615. 44, 107, 396, 90, 41, 154, 41, 90, 37, 105,
  616. 69, 105, 37, 58, 41, 90, 57, 169, 218, 41,
  617. 58, 41, 58, 41, 58, 137, 58, 37, 137, 37,
  618. 135, 37, 90, 69, 73, 185, 94, 101, 58, 57,
  619. 90, 37, 58, 527, 1134, 94, 142, 47, 185, 186,
  620. 89, 154, 57, 90, 57, 90, 57, 250, 57, 1018,
  621. 89, 90, 57, 58, 57, 1018, 8601, 282, 153, 666,
  622. 89, 250, 54, 50, 2618, 57, 986, 825, 1306, 217,
  623. 602, 1274, 378, 1935, 2522, 719, 5882, 57, 314, 57,
  624. 1754, 281, 3578, 57, 4634, 3322, 54, 50, 54, 50,
  625. 54, 50, 54, 50, 54, 50, 54, 50, 54, 50,
  626. 975, 1434, 185, 54, 50, 1017, 54, 50, 54, 50,
  627. 54, 50, 54, 50, 54, 50, 537, 8218, 4217, 54,
  628. 50, 54, 50, 54, 50, 54, 50, 54, 50, 54,
  629. 50, 54, 50, 54, 50, 54, 50, 54, 50, 54,
  630. 50, 2041, 54, 50, 54, 50, 1049, 54, 50, 8281,
  631. 1562, 697, 90, 217, 346, 1513, 1509, 126, 73, 69,
  632. 254, 105, 37, 94, 37, 94, 165, 70, 105, 37,
  633. 3166, 37, 218, 158, 108, 94, 149, 47, 85, 1221,
  634. 37, 37, 1799, 38, 53, 44, 743, 231, 231, 231,
  635. 231, 231, 231, 231, 231, 1036, 85, 52, 51, 52,
  636. 51, 117, 52, 51, 53, 52, 51, 309, 49, 85,
  637. 49, 53, 52, 51, 85, 52, 51, 54, 50, 54,
  638. 50, 54, 50, 54, 50, 181, 38, 341, 81, 858,
  639. 2874, 6874, 410, 61, 117, 58, 38, 39, 46, 54,
  640. 50, 54, 50, 54, 50, 54, 50, 54, 50, 90,
  641. 54, 50, 54, 50, 54, 50, 54, 50, 49, 54,
  642. 82, 58, 302, 140, 74, 49, 166, 90, 110, 38,
  643. 39, 53, 90, 2759, 76, 88, 70, 39, 49, 2887,
  644. 53, 102, 39, 1319, 3015, 90, 143, 346, 871, 1178,
  645. 519, 1018, 335, 986, 271, 58, 495, 1050, 335, 1274,
  646. 495, 2042, 8218, 39, 39, 2074, 39, 39, 679, 38,
  647. 36583, 1786, 1287, 198, 85, 8583, 38, 117, 519, 333,
  648. 71, 1502, 39, 44, 107, 53, 332, 53, 38, 798,
  649. 44, 2247, 334, 76, 213, 760, 294, 88, 478, 69,
  650. 2014, 38, 261, 190, 350, 38, 88, 158, 158, 382,
  651. 70, 37, 231, 44, 103, 44, 135, 44, 743, 74,
  652. 76, 42, 154, 207, 90, 55, 58, 1671, 149, 74,
  653. 1607, 522, 44, 85, 333, 588, 199, 117, 39, 333,
  654. 903, 268, 85, 743, 364, 74, 53, 935, 108, 42,
  655. 1511, 44, 74, 140, 74, 44, 138, 437, 38, 333,
  656. 85, 1319, 204, 74, 76, 74, 76, 103, 44, 263,
  657. 44, 42, 333, 149, 519, 38, 199, 122, 39, 42,
  658. 1543, 44, 39, 108, 71, 76, 167, 76, 39, 44,
  659. 39, 71, 38, 85, 359, 42, 76, 74, 85, 39,
  660. 70, 42, 44, 199, 199, 199, 231, 231, 1127, 74,
  661. 44, 74, 44, 74, 53, 42, 44, 333, 39, 39,
  662. 743, 1575, 36, 68, 68, 36, 63, 63, 11719, 3399,
  663. 229, 165, 39, 44, 327, 57, 423, 167, 39, 71,
  664. 71, 3463, 536, 11623, 54, 50, 2055, 1735, 391, 55,
  665. 58, 524, 245, 54, 50, 53, 236, 53, 81, 80,
  666. 54, 50, 54, 50, 54, 50, 54, 50, 54, 50,
  667. 54, 50, 54, 50, 54, 50, 85, 54, 50, 149,
  668. 112, 117, 149, 49, 54, 50, 54, 50, 54, 50,
  669. 117, 57, 49, 121, 53, 55, 85, 167, 4327, 34,
  670. 117, 55, 117, 54, 50, 53, 57, 53, 49, 85,
  671. 333, 85, 121, 85, 841, 54, 53, 50, 56, 48,
  672. 56, 837, 54, 57, 50, 57, 54, 50, 53, 54,
  673. 50, 85, 327, 38, 1447, 70, 999, 199, 199, 199,
  674. 103, 87, 57, 56, 58, 87, 58, 153, 90, 98,
  675. 90, 391, 839, 615, 71, 487, 455, 3943, 117, 1455,
  676. 314, 1710, 143, 570, 47, 410, 1466, 44, 935, 1575,
  677. 999, 143, 551, 46, 263, 46, 967, 53, 1159, 263,
  678. 53, 174, 1289, 1285, 2503, 333, 199, 39, 1415, 71,
  679. 39, 743, 53, 271, 711, 207, 53, 839, 53, 1799,
  680. 71, 39, 108, 76, 140, 135, 103, 871, 108, 44,
  681. 271, 309, 935, 79, 53, 1735, 245, 711, 271, 615,
  682. 271, 2343, 1007, 42, 44, 42, 1703, 492, 245, 655,
  683. 333, 76, 42, 1447, 106, 140, 74, 76, 85, 34,
  684. 149, 807, 333, 108, 1159, 172, 42, 268, 333, 149,
  685. 76, 42, 1543, 106, 300, 74, 135, 149, 333, 1383,
  686. 44, 42, 44, 74, 204, 42, 44, 333, 28135, 3182,
  687. 149, 34279, 18215, 2215, 39, 1482, 140, 422, 71, 7898,
  688. 1274, 1946, 74, 108, 122, 202, 258, 268, 90, 236,
  689. 986, 140, 1562, 2138, 108, 58, 2810, 591, 841, 837,
  690. 841, 229, 581, 841, 837, 41, 73, 41, 73, 137,
  691. 265, 133, 37, 229, 357, 841, 837, 73, 137, 265,
  692. 233, 837, 73, 137, 169, 41, 233, 837, 841, 837,
  693. 841, 837, 841, 837, 841, 837, 841, 837, 841, 901,
  694. 809, 57, 805, 57, 197, 809, 57, 805, 57, 197,
  695. 809, 57, 805, 57, 197, 809, 57, 805, 57, 197,
  696. 809, 57, 805, 57, 197, 94, 1613, 135, 871, 71,
  697. 39, 39, 327, 135, 39, 39, 39, 39, 39, 39,
  698. 103, 71, 39, 39, 39, 39, 39, 39, 71, 39,
  699. 135, 231, 135, 135, 39, 327, 551, 103, 167, 551,
  700. 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946,
  701. 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210,
  702. 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266,
  703. 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351,
  704. 34, 3074, 7692, 63, 63,
  705. };
  706. int sqlite3Fts5UnicodeCategory(u32 iCode) {
  707. int iRes = -1;
  708. int iHi;
  709. int iLo;
  710. int ret;
  711. u16 iKey;
  712. if( iCode>=(1<<20) ){
  713. return 0;
  714. }
  715. iLo = aFts5UnicodeBlock[(iCode>>16)];
  716. iHi = aFts5UnicodeBlock[1+(iCode>>16)];
  717. iKey = (iCode & 0xFFFF);
  718. while( iHi>iLo ){
  719. int iTest = (iHi + iLo) / 2;
  720. assert( iTest>=iLo && iTest<iHi );
  721. if( iKey>=aFts5UnicodeMap[iTest] ){
  722. iRes = iTest;
  723. iLo = iTest+1;
  724. }else{
  725. iHi = iTest;
  726. }
  727. }
  728. if( iRes<0 ) return 0;
  729. if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0;
  730. ret = aFts5UnicodeData[iRes] & 0x1F;
  731. if( ret!=30 ) return ret;
  732. return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9;
  733. }
  734. void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){
  735. int i = 0;
  736. int iTbl = 0;
  737. while( i<128 ){
  738. int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ];
  739. int n = (aFts5UnicodeData[iTbl] >> 5) + i;
  740. for(; i<128 && i<n; i++){
  741. aAscii[i] = (u8)bToken;
  742. }
  743. iTbl++;
  744. }
  745. aAscii[0] = 0; /* 0x00 is never a token character */
  746. }