fts3_unicode2.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /*
  2. ** 2012-05-25
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. */
  13. /*
  14. ** DO NOT EDIT THIS MACHINE GENERATED FILE.
  15. */
  16. #ifndef SQLITE_DISABLE_FTS3_UNICODE
  17. #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
  18. #include <assert.h>
  19. /*
  20. ** Return true if the argument corresponds to a unicode codepoint
  21. ** classified as either a letter or a number. Otherwise false.
  22. **
  23. ** The results are undefined if the value passed to this function
  24. ** is less than zero.
  25. */
  26. int sqlite3FtsUnicodeIsalnum(int c){
  27. /* Each unsigned integer in the following array corresponds to a contiguous
  28. ** range of unicode codepoints that are not either letters or numbers (i.e.
  29. ** codepoints for which this function should return 0).
  30. **
  31. ** The most significant 22 bits in each 32-bit value contain the first
  32. ** codepoint in the range. The least significant 10 bits are used to store
  33. ** the size of the range (always at least 1). In other words, the value
  34. ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
  35. ** C. It is not possible to represent a range larger than 1023 codepoints
  36. ** using this format.
  37. */
  38. static const unsigned int aEntry[] = {
  39. 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
  40. 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
  41. 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
  42. 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
  43. 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
  44. 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
  45. 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
  46. 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
  47. 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
  48. 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
  49. 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
  50. 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
  51. 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
  52. 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
  53. 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
  54. 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
  55. 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
  56. 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
  57. 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
  58. 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
  59. 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
  60. 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
  61. 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
  62. 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
  63. 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
  64. 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
  65. 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
  66. 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
  67. 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
  68. 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
  69. 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
  70. 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
  71. 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
  72. 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
  73. 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
  74. 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
  75. 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
  76. 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
  77. 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
  78. 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
  79. 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
  80. 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
  81. 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
  82. 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
  83. 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
  84. 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
  85. 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
  86. 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
  87. 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
  88. 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
  89. 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
  90. 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
  91. 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
  92. 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
  93. 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
  94. 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
  95. 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
  96. 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
  97. 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
  98. 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
  99. 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
  100. 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
  101. 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
  102. 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
  103. 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
  104. 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
  105. 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
  106. 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
  107. 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
  108. 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
  109. 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
  110. 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
  111. 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
  112. 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
  113. 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
  114. 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
  115. 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
  116. 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
  117. 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
  118. 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
  119. 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
  120. 0x380400F0,
  121. };
  122. static const unsigned int aAscii[4] = {
  123. 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
  124. };
  125. if( (unsigned int)c<128 ){
  126. return ( (aAscii[c >> 5] & ((unsigned int)1 << (c & 0x001F)))==0 );
  127. }else if( (unsigned int)c<(1<<22) ){
  128. unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
  129. int iRes = 0;
  130. int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
  131. int iLo = 0;
  132. while( iHi>=iLo ){
  133. int iTest = (iHi + iLo) / 2;
  134. if( key >= aEntry[iTest] ){
  135. iRes = iTest;
  136. iLo = iTest+1;
  137. }else{
  138. iHi = iTest-1;
  139. }
  140. }
  141. assert( aEntry[0]<key );
  142. assert( key>=aEntry[iRes] );
  143. return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
  144. }
  145. return 1;
  146. }
  147. /*
  148. ** If the argument is a codepoint corresponding to a lowercase letter
  149. ** in the ASCII range with a diacritic added, return the codepoint
  150. ** of the ASCII letter only. For example, if passed 235 - "LATIN
  151. ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
  152. ** E"). The resuls of passing a codepoint that corresponds to an
  153. ** uppercase letter are undefined.
  154. */
  155. static int remove_diacritic(int c, int bComplex){
  156. unsigned short aDia[] = {
  157. 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
  158. 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
  159. 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
  160. 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
  161. 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896,
  162. 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106,
  163. 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344,
  164. 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198,
  165. 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468,
  166. 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704,
  167. 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914,
  168. 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218,
  169. 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554,
  170. 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766,
  171. 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118,
  172. 63182, 63242, 63274, 63310, 63368, 63390,
  173. };
  174. #define HIBIT ((unsigned char)0x80)
  175. unsigned char aChar[] = {
  176. '\0', 'a', 'c', 'e', 'i', 'n',
  177. 'o', 'u', 'y', 'y', 'a', 'c',
  178. 'd', 'e', 'e', 'g', 'h', 'i',
  179. 'j', 'k', 'l', 'n', 'o', 'r',
  180. 's', 't', 'u', 'u', 'w', 'y',
  181. 'z', 'o', 'u', 'a', 'i', 'o',
  182. 'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o',
  183. 'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a',
  184. 'e', 'i', 'o', 'r', 'u', 's',
  185. 't', 'h', 'a', 'e', 'o'|HIBIT, 'o',
  186. 'o'|HIBIT, 'y', '\0', '\0', '\0', '\0',
  187. '\0', '\0', '\0', '\0', 'a', 'b',
  188. 'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT,
  189. 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT,
  190. 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n',
  191. 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's',
  192. 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w',
  193. 'w', 'x', 'y', 'z', 'h', 't',
  194. 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT,
  195. 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT,
  196. 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y',
  197. };
  198. unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  199. int iRes = 0;
  200. int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  201. int iLo = 0;
  202. while( iHi>=iLo ){
  203. int iTest = (iHi + iLo) / 2;
  204. if( key >= aDia[iTest] ){
  205. iRes = iTest;
  206. iLo = iTest+1;
  207. }else{
  208. iHi = iTest-1;
  209. }
  210. }
  211. assert( key>=aDia[iRes] );
  212. if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
  213. return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
  214. }
  215. /*
  216. ** Return true if the argument interpreted as a unicode codepoint
  217. ** is a diacritical modifier character.
  218. */
  219. int sqlite3FtsUnicodeIsdiacritic(int c){
  220. unsigned int mask0 = 0x08029FDF;
  221. unsigned int mask1 = 0x000361F8;
  222. if( c<768 || c>817 ) return 0;
  223. return (c < 768+32) ?
  224. (mask0 & ((unsigned int)1 << (c-768))) :
  225. (mask1 & ((unsigned int)1 << (c-768-32)));
  226. }
  227. /*
  228. ** Interpret the argument as a unicode codepoint. If the codepoint
  229. ** is an upper case character that has a lower case equivalent,
  230. ** return the codepoint corresponding to the lower case version.
  231. ** Otherwise, return a copy of the argument.
  232. **
  233. ** The results are undefined if the value passed to this function
  234. ** is less than zero.
  235. */
  236. int sqlite3FtsUnicodeFold(int c, int eRemoveDiacritic){
  237. /* Each entry in the following array defines a rule for folding a range
  238. ** of codepoints to lower case. The rule applies to a range of nRange
  239. ** codepoints starting at codepoint iCode.
  240. **
  241. ** If the least significant bit in flags is clear, then the rule applies
  242. ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  243. ** need to be folded). Or, if it is set, then the rule only applies to
  244. ** every second codepoint in the range, starting with codepoint C.
  245. **
  246. ** The 7 most significant bits in flags are an index into the aiOff[]
  247. ** array. If a specific codepoint C does require folding, then its lower
  248. ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
  249. **
  250. ** The contents of this array are generated by parsing the CaseFolding.txt
  251. ** file distributed as part of the "Unicode Character Database". See
  252. ** http://www.unicode.org for details.
  253. */
  254. static const struct TableEntry {
  255. unsigned short iCode;
  256. unsigned char flags;
  257. unsigned char nRange;
  258. } aEntry[] = {
  259. {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
  260. {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
  261. {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
  262. {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
  263. {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
  264. {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
  265. {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
  266. {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
  267. {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
  268. {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
  269. {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
  270. {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
  271. {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
  272. {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
  273. {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
  274. {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
  275. {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
  276. {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
  277. {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
  278. {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
  279. {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
  280. {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
  281. {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
  282. {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
  283. {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
  284. {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
  285. {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
  286. {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
  287. {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
  288. {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
  289. {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
  290. {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
  291. {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
  292. {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
  293. {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
  294. {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
  295. {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
  296. {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
  297. {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
  298. {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
  299. {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
  300. {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
  301. {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
  302. {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
  303. {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
  304. {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
  305. {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
  306. {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
  307. {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
  308. {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
  309. {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
  310. {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
  311. {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
  312. {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
  313. {65313, 14, 26},
  314. };
  315. static const unsigned short aiOff[] = {
  316. 1, 2, 8, 15, 16, 26, 28, 32,
  317. 37, 38, 40, 48, 63, 64, 69, 71,
  318. 79, 80, 116, 202, 203, 205, 206, 207,
  319. 209, 210, 211, 213, 214, 217, 218, 219,
  320. 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
  321. 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
  322. 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
  323. 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
  324. 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
  325. 65514, 65521, 65527, 65528, 65529,
  326. };
  327. int ret = c;
  328. assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
  329. if( c<128 ){
  330. if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  331. }else if( c<65536 ){
  332. const struct TableEntry *p;
  333. int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
  334. int iLo = 0;
  335. int iRes = -1;
  336. assert( c>aEntry[0].iCode );
  337. while( iHi>=iLo ){
  338. int iTest = (iHi + iLo) / 2;
  339. int cmp = (c - aEntry[iTest].iCode);
  340. if( cmp>=0 ){
  341. iRes = iTest;
  342. iLo = iTest+1;
  343. }else{
  344. iHi = iTest-1;
  345. }
  346. }
  347. assert( iRes>=0 && c>=aEntry[iRes].iCode );
  348. p = &aEntry[iRes];
  349. if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
  350. ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
  351. assert( ret>0 );
  352. }
  353. if( eRemoveDiacritic ){
  354. ret = remove_diacritic(ret, eRemoveDiacritic==2);
  355. }
  356. }
  357. else if( c>=66560 && c<66600 ){
  358. ret = c + 40;
  359. }
  360. return ret;
  361. }
  362. #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
  363. #endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */