mkkeywordhash.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. /*
  2. ** Compile and run this standalone program in order to generate code that
  3. ** implements a function that will translate alphabetic identifiers into
  4. ** parser token codes.
  5. */
  6. #include <stdio.h>
  7. #include <string.h>
  8. #include <stdlib.h>
  9. #include <assert.h>
  10. /*
  11. ** A header comment placed at the beginning of generated code.
  12. */
  13. static const char zHdr[] =
  14. "/***** This file contains automatically generated code ******\n"
  15. "**\n"
  16. "** The code in this file has been automatically generated by\n"
  17. "**\n"
  18. "** sqlite/tool/mkkeywordhash.c\n"
  19. "**\n"
  20. "** The code in this file implements a function that determines whether\n"
  21. "** or not a given identifier is really an SQL keyword. The same thing\n"
  22. "** might be implemented more directly using a hand-written hash table.\n"
  23. "** But by using this automatically generated code, the size of the code\n"
  24. "** is substantially reduced. This is important for embedded applications\n"
  25. "** on platforms with limited memory.\n"
  26. "*/\n"
  27. ;
  28. /*
  29. ** All the keywords of the SQL language are stored in a hash
  30. ** table composed of instances of the following structure.
  31. */
  32. typedef struct Keyword Keyword;
  33. struct Keyword {
  34. char *zName; /* The keyword name */
  35. char *zTokenType; /* Token value for this keyword */
  36. int mask; /* Code this keyword if non-zero */
  37. int priority; /* Put higher priorities earlier in the hash chain */
  38. int id; /* Unique ID for this record */
  39. int hash; /* Hash on the keyword */
  40. int offset; /* Offset to start of name string */
  41. int len; /* Length of this keyword, not counting final \000 */
  42. int prefix; /* Number of characters in prefix */
  43. int longestSuffix; /* Longest suffix that is a prefix on another word */
  44. int iNext; /* Index in aKeywordTable[] of next with same hash */
  45. int substrId; /* Id to another keyword this keyword is embedded in */
  46. int substrOffset; /* Offset into substrId for start of this keyword */
  47. char zOrigName[20]; /* Original keyword name before processing */
  48. };
  49. /*
  50. ** Define masks used to determine which keywords are allowed
  51. */
  52. #if defined(SQLITE_OMIT_ALTERTABLE) || defined(SQLITE_OMIT_VIRTUALTABLE)
  53. # define ALTER 0
  54. #else
  55. # define ALTER 0x00000001
  56. #endif
  57. #define ALWAYS 0x00000002
  58. #ifdef SQLITE_OMIT_ANALYZE
  59. # define ANALYZE 0
  60. #else
  61. # define ANALYZE 0x00000004
  62. #endif
  63. #ifdef SQLITE_OMIT_ATTACH
  64. # define ATTACH 0
  65. #else
  66. # define ATTACH 0x00000008
  67. #endif
  68. #ifdef SQLITE_OMIT_AUTOINCREMENT
  69. # define AUTOINCR 0
  70. #else
  71. # define AUTOINCR 0x00000010
  72. #endif
  73. #ifdef SQLITE_OMIT_CAST
  74. # define CAST 0
  75. #else
  76. # define CAST 0x00000020
  77. #endif
  78. #ifdef SQLITE_OMIT_COMPOUND_SELECT
  79. # define COMPOUND 0
  80. #else
  81. # define COMPOUND 0x00000040
  82. #endif
  83. #ifdef SQLITE_OMIT_CONFLICT_CLAUSE
  84. # define CONFLICT 0
  85. #else
  86. # define CONFLICT 0x00000080
  87. #endif
  88. #ifdef SQLITE_OMIT_EXPLAIN
  89. # define EXPLAIN 0
  90. #else
  91. # define EXPLAIN 0x00000100
  92. #endif
  93. #ifdef SQLITE_OMIT_FOREIGN_KEY
  94. # define FKEY 0
  95. #else
  96. # define FKEY 0x00000200
  97. #endif
  98. #ifdef SQLITE_OMIT_PRAGMA
  99. # define PRAGMA 0
  100. #else
  101. # define PRAGMA 0x00000400
  102. #endif
  103. #ifdef SQLITE_OMIT_REINDEX
  104. # define REINDEX 0
  105. #else
  106. # define REINDEX 0x00000800
  107. #endif
  108. #ifdef SQLITE_OMIT_SUBQUERY
  109. # define SUBQUERY 0
  110. #else
  111. # define SUBQUERY 0x00001000
  112. #endif
  113. #ifdef SQLITE_OMIT_TRIGGER
  114. # define TRIGGER 0
  115. #else
  116. # define TRIGGER 0x00002000
  117. #endif
  118. #if defined(SQLITE_OMIT_AUTOVACUUM) && \
  119. (defined(SQLITE_OMIT_VACUUM) || defined(SQLITE_OMIT_ATTACH))
  120. # define VACUUM 0
  121. #else
  122. # define VACUUM 0x00004000
  123. #endif
  124. #ifdef SQLITE_OMIT_VIEW
  125. # define VIEW 0
  126. #else
  127. # define VIEW 0x00008000
  128. #endif
  129. #ifdef SQLITE_OMIT_VIRTUALTABLE
  130. # define VTAB 0
  131. #else
  132. # define VTAB 0x00010000
  133. #endif
  134. #ifdef SQLITE_OMIT_AUTOVACUUM
  135. # define AUTOVACUUM 0
  136. #else
  137. # define AUTOVACUUM 0x00020000
  138. #endif
  139. #ifdef SQLITE_OMIT_CTE
  140. # define CTE 0
  141. #else
  142. # define CTE 0x00040000
  143. #endif
  144. #ifdef SQLITE_OMIT_UPSERT
  145. # define UPSERT 0
  146. #else
  147. # define UPSERT 0x00080000
  148. #endif
  149. #ifdef SQLITE_OMIT_WINDOWFUNC
  150. # define WINDOWFUNC 0
  151. #else
  152. # define WINDOWFUNC 0x00100000
  153. #endif
  154. #ifdef SQLITE_OMIT_GENERATED_COLUMNS
  155. # define GENCOL 0
  156. #else
  157. # define GENCOL 0x00200000
  158. #endif
  159. #ifdef SQLITE_OMIT_RETURNING
  160. # define RETURNING 0
  161. #else
  162. # define RETURNING 0x00400000
  163. #endif
  164. #ifndef SQLITE_ENABLE_ORDERED_SET_AGGREGATES
  165. # define ORDERSET 0
  166. #else
  167. # define ORDERSET 0x00800000
  168. #endif
  169. /*
  170. ** These are the keywords
  171. */
  172. static Keyword aKeywordTable[] = {
  173. { "ABORT", "TK_ABORT", CONFLICT|TRIGGER, 0 },
  174. { "ACTION", "TK_ACTION", FKEY, 0 },
  175. { "ADD", "TK_ADD", ALTER, 1 },
  176. { "AFTER", "TK_AFTER", TRIGGER, 0 },
  177. { "ALL", "TK_ALL", ALWAYS, 0 },
  178. { "ALTER", "TK_ALTER", ALTER, 0 },
  179. { "ALWAYS", "TK_ALWAYS", GENCOL, 0 },
  180. { "ANALYZE", "TK_ANALYZE", ANALYZE, 0 },
  181. { "AND", "TK_AND", ALWAYS, 10 },
  182. { "AS", "TK_AS", ALWAYS, 10 },
  183. { "ASC", "TK_ASC", ALWAYS, 0 },
  184. { "ATTACH", "TK_ATTACH", ATTACH, 1 },
  185. { "AUTOINCREMENT", "TK_AUTOINCR", AUTOINCR, 0 },
  186. { "BEFORE", "TK_BEFORE", TRIGGER, 0 },
  187. { "BEGIN", "TK_BEGIN", ALWAYS, 1 },
  188. { "BETWEEN", "TK_BETWEEN", ALWAYS, 5 },
  189. { "BY", "TK_BY", ALWAYS, 10 },
  190. { "CASCADE", "TK_CASCADE", FKEY, 1 },
  191. { "CASE", "TK_CASE", ALWAYS, 5 },
  192. { "CAST", "TK_CAST", CAST, 5 },
  193. { "CHECK", "TK_CHECK", ALWAYS, 1 },
  194. { "COLLATE", "TK_COLLATE", ALWAYS, 1 },
  195. { "COLUMN", "TK_COLUMNKW", ALTER, 1 },
  196. { "COMMIT", "TK_COMMIT", ALWAYS, 1 },
  197. { "CONFLICT", "TK_CONFLICT", CONFLICT, 0 },
  198. { "CONSTRAINT", "TK_CONSTRAINT", ALWAYS, 1 },
  199. { "CREATE", "TK_CREATE", ALWAYS, 2 },
  200. { "CROSS", "TK_JOIN_KW", ALWAYS, 3 },
  201. { "CURRENT", "TK_CURRENT", WINDOWFUNC, 1 },
  202. { "CURRENT_DATE", "TK_CTIME_KW", ALWAYS, 1 },
  203. { "CURRENT_TIME", "TK_CTIME_KW", ALWAYS, 1 },
  204. { "CURRENT_TIMESTAMP","TK_CTIME_KW", ALWAYS, 1 },
  205. { "DATABASE", "TK_DATABASE", ATTACH, 0 },
  206. { "DEFAULT", "TK_DEFAULT", ALWAYS, 1 },
  207. { "DEFERRED", "TK_DEFERRED", ALWAYS, 1 },
  208. { "DEFERRABLE", "TK_DEFERRABLE", FKEY, 1 },
  209. { "DELETE", "TK_DELETE", ALWAYS, 10 },
  210. { "DESC", "TK_DESC", ALWAYS, 3 },
  211. { "DETACH", "TK_DETACH", ATTACH, 0 },
  212. { "DISTINCT", "TK_DISTINCT", ALWAYS, 5 },
  213. { "DO", "TK_DO", UPSERT, 2 },
  214. { "DROP", "TK_DROP", ALWAYS, 1 },
  215. { "END", "TK_END", ALWAYS, 1 },
  216. { "EACH", "TK_EACH", TRIGGER, 1 },
  217. { "ELSE", "TK_ELSE", ALWAYS, 2 },
  218. { "ESCAPE", "TK_ESCAPE", ALWAYS, 4 },
  219. { "EXCEPT", "TK_EXCEPT", COMPOUND, 4 },
  220. { "EXCLUSIVE", "TK_EXCLUSIVE", ALWAYS, 1 },
  221. { "EXCLUDE", "TK_EXCLUDE", WINDOWFUNC, 1 },
  222. { "EXISTS", "TK_EXISTS", ALWAYS, 4 },
  223. { "EXPLAIN", "TK_EXPLAIN", EXPLAIN, 1 },
  224. { "FAIL", "TK_FAIL", CONFLICT|TRIGGER, 1 },
  225. { "FILTER", "TK_FILTER", WINDOWFUNC, 4 },
  226. { "FIRST", "TK_FIRST", ALWAYS, 4 },
  227. { "FOLLOWING", "TK_FOLLOWING", WINDOWFUNC, 4 },
  228. { "FOR", "TK_FOR", TRIGGER, 2 },
  229. { "FOREIGN", "TK_FOREIGN", FKEY, 1 },
  230. { "FROM", "TK_FROM", ALWAYS, 10 },
  231. { "FULL", "TK_JOIN_KW", ALWAYS, 3 },
  232. { "GENERATED", "TK_GENERATED", ALWAYS, 1 },
  233. { "GLOB", "TK_LIKE_KW", ALWAYS, 3 },
  234. { "GROUP", "TK_GROUP", ALWAYS, 5 },
  235. { "GROUPS", "TK_GROUPS", WINDOWFUNC, 2 },
  236. { "HAVING", "TK_HAVING", ALWAYS, 5 },
  237. { "IF", "TK_IF", ALWAYS, 2 },
  238. { "IGNORE", "TK_IGNORE", CONFLICT|TRIGGER, 1 },
  239. { "IMMEDIATE", "TK_IMMEDIATE", ALWAYS, 1 },
  240. { "IN", "TK_IN", ALWAYS, 10 },
  241. { "INDEX", "TK_INDEX", ALWAYS, 1 },
  242. { "INDEXED", "TK_INDEXED", ALWAYS, 0 },
  243. { "INITIALLY", "TK_INITIALLY", FKEY, 1 },
  244. { "INNER", "TK_JOIN_KW", ALWAYS, 1 },
  245. { "INSERT", "TK_INSERT", ALWAYS, 10 },
  246. { "INSTEAD", "TK_INSTEAD", TRIGGER, 1 },
  247. { "INTERSECT", "TK_INTERSECT", COMPOUND, 5 },
  248. { "INTO", "TK_INTO", ALWAYS, 10 },
  249. { "IS", "TK_IS", ALWAYS, 5 },
  250. { "ISNULL", "TK_ISNULL", ALWAYS, 5 },
  251. { "JOIN", "TK_JOIN", ALWAYS, 5 },
  252. { "KEY", "TK_KEY", ALWAYS, 1 },
  253. { "LAST", "TK_LAST", ALWAYS, 4 },
  254. { "LEFT", "TK_JOIN_KW", ALWAYS, 5 },
  255. { "LIKE", "TK_LIKE_KW", ALWAYS, 5 },
  256. { "LIMIT", "TK_LIMIT", ALWAYS, 3 },
  257. { "MATCH", "TK_MATCH", ALWAYS, 2 },
  258. { "MATERIALIZED", "TK_MATERIALIZED", CTE, 12 },
  259. { "NATURAL", "TK_JOIN_KW", ALWAYS, 3 },
  260. { "NO", "TK_NO", FKEY|WINDOWFUNC, 2 },
  261. { "NOT", "TK_NOT", ALWAYS, 10 },
  262. { "NOTHING", "TK_NOTHING", UPSERT, 1 },
  263. { "NOTNULL", "TK_NOTNULL", ALWAYS, 3 },
  264. { "NULL", "TK_NULL", ALWAYS, 10 },
  265. { "NULLS", "TK_NULLS", ALWAYS, 3 },
  266. { "OF", "TK_OF", ALWAYS, 3 },
  267. { "OFFSET", "TK_OFFSET", ALWAYS, 1 },
  268. { "ON", "TK_ON", ALWAYS, 1 },
  269. { "OR", "TK_OR", ALWAYS, 9 },
  270. { "ORDER", "TK_ORDER", ALWAYS, 10 },
  271. { "OTHERS", "TK_OTHERS", WINDOWFUNC, 3 },
  272. { "OUTER", "TK_JOIN_KW", ALWAYS, 5 },
  273. { "OVER", "TK_OVER", WINDOWFUNC, 3 },
  274. { "PARTITION", "TK_PARTITION", WINDOWFUNC, 3 },
  275. { "PLAN", "TK_PLAN", EXPLAIN, 0 },
  276. { "PRAGMA", "TK_PRAGMA", PRAGMA, 0 },
  277. { "PRECEDING", "TK_PRECEDING", WINDOWFUNC, 3 },
  278. { "PRIMARY", "TK_PRIMARY", ALWAYS, 1 },
  279. { "QUERY", "TK_QUERY", EXPLAIN, 0 },
  280. { "RAISE", "TK_RAISE", TRIGGER, 1 },
  281. { "RANGE", "TK_RANGE", WINDOWFUNC, 3 },
  282. { "RECURSIVE", "TK_RECURSIVE", CTE, 3 },
  283. { "REFERENCES", "TK_REFERENCES", FKEY, 1 },
  284. { "REGEXP", "TK_LIKE_KW", ALWAYS, 3 },
  285. { "REINDEX", "TK_REINDEX", REINDEX, 1 },
  286. { "RELEASE", "TK_RELEASE", ALWAYS, 1 },
  287. { "RENAME", "TK_RENAME", ALTER, 1 },
  288. { "REPLACE", "TK_REPLACE", CONFLICT, 10 },
  289. { "RESTRICT", "TK_RESTRICT", FKEY, 1 },
  290. { "RETURNING", "TK_RETURNING", RETURNING, 10 },
  291. { "RIGHT", "TK_JOIN_KW", ALWAYS, 0 },
  292. { "ROLLBACK", "TK_ROLLBACK", ALWAYS, 1 },
  293. { "ROW", "TK_ROW", TRIGGER, 1 },
  294. { "ROWS", "TK_ROWS", ALWAYS, 1 },
  295. { "SAVEPOINT", "TK_SAVEPOINT", ALWAYS, 1 },
  296. { "SELECT", "TK_SELECT", ALWAYS, 10 },
  297. { "SET", "TK_SET", ALWAYS, 10 },
  298. { "TABLE", "TK_TABLE", ALWAYS, 1 },
  299. { "TEMP", "TK_TEMP", ALWAYS, 1 },
  300. { "TEMPORARY", "TK_TEMP", ALWAYS, 1 },
  301. { "THEN", "TK_THEN", ALWAYS, 3 },
  302. { "TIES", "TK_TIES", WINDOWFUNC, 3 },
  303. { "TO", "TK_TO", ALWAYS, 3 },
  304. { "TRANSACTION", "TK_TRANSACTION", ALWAYS, 1 },
  305. { "TRIGGER", "TK_TRIGGER", TRIGGER, 1 },
  306. { "UNBOUNDED", "TK_UNBOUNDED", WINDOWFUNC, 3 },
  307. { "UNION", "TK_UNION", COMPOUND, 3 },
  308. { "UNIQUE", "TK_UNIQUE", ALWAYS, 1 },
  309. { "UPDATE", "TK_UPDATE", ALWAYS, 10 },
  310. { "USING", "TK_USING", ALWAYS, 8 },
  311. { "VACUUM", "TK_VACUUM", VACUUM, 1 },
  312. { "VALUES", "TK_VALUES", ALWAYS, 10 },
  313. { "VIEW", "TK_VIEW", VIEW, 1 },
  314. { "VIRTUAL", "TK_VIRTUAL", VTAB, 1 },
  315. { "WHEN", "TK_WHEN", ALWAYS, 1 },
  316. { "WHERE", "TK_WHERE", ALWAYS, 10 },
  317. { "WINDOW", "TK_WINDOW", WINDOWFUNC, 3 },
  318. { "WITH", "TK_WITH", CTE, 4 },
  319. { "WITHIN", "TK_WITHIN", ORDERSET, 1 },
  320. { "WITHOUT", "TK_WITHOUT", ALWAYS, 1 },
  321. };
  322. /* Number of keywords */
  323. static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]));
  324. /* Map all alphabetic characters into lower-case for hashing. This is
  325. ** only valid for alphabetics. In particular it does not work for '_'
  326. ** and so the hash cannot be on a keyword position that might be an '_'.
  327. */
  328. #define charMap(X) (0x20|(X))
  329. /*
  330. ** Comparision function for two Keyword records
  331. */
  332. static int keywordCompare1(const void *a, const void *b){
  333. const Keyword *pA = (Keyword*)a;
  334. const Keyword *pB = (Keyword*)b;
  335. int n = pA->len - pB->len;
  336. if( n==0 ){
  337. n = strcmp(pA->zName, pB->zName);
  338. }
  339. assert( n!=0 );
  340. return n;
  341. }
  342. static int keywordCompare2(const void *a, const void *b){
  343. const Keyword *pA = (Keyword*)a;
  344. const Keyword *pB = (Keyword*)b;
  345. int n = pB->longestSuffix - pA->longestSuffix;
  346. if( n==0 ){
  347. n = strcmp(pA->zName, pB->zName);
  348. }
  349. assert( n!=0 );
  350. return n;
  351. }
  352. static int keywordCompare3(const void *a, const void *b){
  353. const Keyword *pA = (Keyword*)a;
  354. const Keyword *pB = (Keyword*)b;
  355. int n = pA->offset - pB->offset;
  356. if( n==0 ) n = pB->id - pA->id;
  357. assert( n!=0 );
  358. return n;
  359. }
  360. /*
  361. ** Return a KeywordTable entry with the given id
  362. */
  363. static Keyword *findById(int id){
  364. int i;
  365. for(i=0; i<nKeyword; i++){
  366. if( aKeywordTable[i].id==id ) break;
  367. }
  368. return &aKeywordTable[i];
  369. }
  370. /*
  371. ** If aKeyword[*pFrom-1].iNext has a higher priority that aKeyword[*pFrom-1]
  372. ** itself, then swap them.
  373. */
  374. static void reorder(int *pFrom){
  375. int i = *pFrom - 1;
  376. int j;
  377. if( i<0 ) return;
  378. j = aKeywordTable[i].iNext;
  379. if( j==0 ) return;
  380. j--;
  381. if( aKeywordTable[i].priority >= aKeywordTable[j].priority ) return;
  382. aKeywordTable[i].iNext = aKeywordTable[j].iNext;
  383. aKeywordTable[j].iNext = i+1;
  384. *pFrom = j+1;
  385. reorder(&aKeywordTable[i].iNext);
  386. }
  387. /* Parameter to the hash function
  388. */
  389. #define HASH_OP ^
  390. #define HASH_CC '^'
  391. #define HASH_C0 4
  392. #define HASH_C1 3
  393. #define HASH_C2 1
  394. /*
  395. ** This routine does the work. The generated code is printed on standard
  396. ** output.
  397. */
  398. int main(int argc, char **argv){
  399. int i, j, k, h;
  400. int bestSize, bestCount;
  401. int count;
  402. int nChar;
  403. int totalLen = 0;
  404. int aKWHash[1000]; /* 1000 is much bigger than nKeyword */
  405. char zKWText[2000];
  406. /* Remove entries from the list of keywords that have mask==0 */
  407. for(i=j=0; i<nKeyword; i++){
  408. if( aKeywordTable[i].mask==0 ) continue;
  409. if( j<i ){
  410. aKeywordTable[j] = aKeywordTable[i];
  411. }
  412. j++;
  413. }
  414. nKeyword = j;
  415. /* Fill in the lengths of strings and hashes for all entries. */
  416. for(i=0; i<nKeyword; i++){
  417. Keyword *p = &aKeywordTable[i];
  418. p->len = (int)strlen(p->zName);
  419. assert( p->len<sizeof(p->zOrigName) );
  420. memcpy(p->zOrigName, p->zName, p->len+1);
  421. totalLen += p->len;
  422. p->hash = (charMap(p->zName[0])*HASH_C0) HASH_OP
  423. (charMap(p->zName[p->len-1])*HASH_C1) HASH_OP
  424. (p->len*HASH_C2);
  425. p->id = i+1;
  426. }
  427. /* Sort the table from shortest to longest keyword */
  428. qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1);
  429. /* Look for short keywords embedded in longer keywords */
  430. for(i=nKeyword-2; i>=0; i--){
  431. Keyword *p = &aKeywordTable[i];
  432. for(j=nKeyword-1; j>i && p->substrId==0; j--){
  433. Keyword *pOther = &aKeywordTable[j];
  434. if( pOther->substrId ) continue;
  435. if( pOther->len<=p->len ) continue;
  436. for(k=0; k<=pOther->len-p->len; k++){
  437. if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){
  438. p->substrId = pOther->id;
  439. p->substrOffset = k;
  440. break;
  441. }
  442. }
  443. }
  444. }
  445. /* Compute the longestSuffix value for every word */
  446. for(i=0; i<nKeyword; i++){
  447. Keyword *p = &aKeywordTable[i];
  448. if( p->substrId ) continue;
  449. for(j=0; j<nKeyword; j++){
  450. Keyword *pOther;
  451. if( j==i ) continue;
  452. pOther = &aKeywordTable[j];
  453. if( pOther->substrId ) continue;
  454. for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){
  455. if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
  456. p->longestSuffix = k;
  457. }
  458. }
  459. }
  460. }
  461. /* Sort the table into reverse order by length */
  462. qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2);
  463. /* Fill in the offset for all entries */
  464. nChar = 0;
  465. for(i=0; i<nKeyword; i++){
  466. Keyword *p = &aKeywordTable[i];
  467. if( p->offset>0 || p->substrId ) continue;
  468. p->offset = nChar;
  469. nChar += p->len;
  470. for(k=p->len-1; k>=1; k--){
  471. for(j=i+1; j<nKeyword; j++){
  472. Keyword *pOther = &aKeywordTable[j];
  473. if( pOther->offset>0 || pOther->substrId ) continue;
  474. if( pOther->len<=k ) continue;
  475. if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
  476. p = pOther;
  477. p->offset = nChar - k;
  478. nChar = p->offset + p->len;
  479. p->zName += k;
  480. p->len -= k;
  481. p->prefix = k;
  482. j = i;
  483. k = p->len;
  484. }
  485. }
  486. }
  487. }
  488. for(i=0; i<nKeyword; i++){
  489. Keyword *p = &aKeywordTable[i];
  490. if( p->substrId ){
  491. p->offset = findById(p->substrId)->offset + p->substrOffset;
  492. }
  493. }
  494. /* Sort the table by offset */
  495. qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3);
  496. /* Figure out how big to make the hash table in order to minimize the
  497. ** number of collisions */
  498. bestSize = nKeyword;
  499. bestCount = nKeyword*nKeyword;
  500. for(i=nKeyword/2; i<=2*nKeyword; i++){
  501. if( i<=0 ) continue;
  502. for(j=0; j<i; j++) aKWHash[j] = 0;
  503. for(j=0; j<nKeyword; j++){
  504. h = aKeywordTable[j].hash % i;
  505. aKWHash[h] *= 2;
  506. aKWHash[h]++;
  507. }
  508. for(j=count=0; j<i; j++) count += aKWHash[j];
  509. if( count<bestCount ){
  510. bestCount = count;
  511. bestSize = i;
  512. }
  513. }
  514. /* Compute the hash */
  515. for(i=0; i<bestSize; i++) aKWHash[i] = 0;
  516. for(i=0; i<nKeyword; i++){
  517. h = aKeywordTable[i].hash % bestSize;
  518. aKeywordTable[i].iNext = aKWHash[h];
  519. aKWHash[h] = i+1;
  520. reorder(&aKWHash[h]);
  521. }
  522. /* Begin generating code */
  523. printf("%s", zHdr);
  524. printf("/* Hash score: %d */\n", bestCount);
  525. printf("/* zKWText[] encodes %d bytes of keyword text in %d bytes */\n",
  526. totalLen + nKeyword, nChar+1 );
  527. for(i=j=k=0; i<nKeyword; i++){
  528. Keyword *p = &aKeywordTable[i];
  529. if( p->substrId ) continue;
  530. memcpy(&zKWText[k], p->zName, p->len);
  531. k += p->len;
  532. if( j+p->len>70 ){
  533. printf("%*s */\n", 74-j, "");
  534. j = 0;
  535. }
  536. if( j==0 ){
  537. printf("/* ");
  538. j = 8;
  539. }
  540. printf("%s", p->zName);
  541. j += p->len;
  542. }
  543. if( j>0 ){
  544. printf("%*s */\n", 74-j, "");
  545. }
  546. printf("static const char zKWText[%d] = {\n", nChar);
  547. zKWText[nChar] = 0;
  548. for(i=j=0; i<k; i++){
  549. if( j==0 ){
  550. printf(" ");
  551. }
  552. if( zKWText[i]==0 ){
  553. printf("0");
  554. }else{
  555. printf("'%c',", zKWText[i]);
  556. }
  557. j += 4;
  558. if( j>68 ){
  559. printf("\n");
  560. j = 0;
  561. }
  562. }
  563. if( j>0 ) printf("\n");
  564. printf("};\n");
  565. printf("/* aKWHash[i] is the hash value for the i-th keyword */\n");
  566. printf("static const unsigned char aKWHash[%d] = {\n", bestSize);
  567. for(i=j=0; i<bestSize; i++){
  568. if( j==0 ) printf(" ");
  569. printf(" %3d,", aKWHash[i]);
  570. j++;
  571. if( j>12 ){
  572. printf("\n");
  573. j = 0;
  574. }
  575. }
  576. printf("%s};\n", j==0 ? "" : "\n");
  577. printf("/* aKWNext[] forms the hash collision chain. If aKWHash[i]==0\n");
  578. printf("** then the i-th keyword has no more hash collisions. Otherwise,\n");
  579. printf("** the next keyword with the same hash is aKWHash[i]-1. */\n");
  580. printf("static const unsigned char aKWNext[%d] = {0,\n", nKeyword+1);
  581. for(i=j=0; i<nKeyword; i++){
  582. if( j==0 ) printf(" ");
  583. printf(" %3d,", aKeywordTable[i].iNext);
  584. j++;
  585. if( j>12 ){
  586. printf("\n");
  587. j = 0;
  588. }
  589. }
  590. printf("%s};\n", j==0 ? "" : "\n");
  591. printf("/* aKWLen[i] is the length (in bytes) of the i-th keyword */\n");
  592. printf("static const unsigned char aKWLen[%d] = {0,\n", nKeyword+1);
  593. for(i=j=0; i<nKeyword; i++){
  594. if( j==0 ) printf(" ");
  595. printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
  596. j++;
  597. if( j>12 ){
  598. printf("\n");
  599. j = 0;
  600. }
  601. }
  602. printf("%s};\n", j==0 ? "" : "\n");
  603. printf("/* aKWOffset[i] is the index into zKWText[] of the start of\n");
  604. printf("** the text for the i-th keyword. */\n");
  605. printf("static const unsigned short int aKWOffset[%d] = {0,\n", nKeyword+1);
  606. for(i=j=0; i<nKeyword; i++){
  607. if( j==0 ) printf(" ");
  608. printf(" %3d,", aKeywordTable[i].offset);
  609. j++;
  610. if( j>12 ){
  611. printf("\n");
  612. j = 0;
  613. }
  614. }
  615. printf("%s};\n", j==0 ? "" : "\n");
  616. printf("/* aKWCode[i] is the parser symbol code for the i-th keyword */\n");
  617. printf("static const unsigned char aKWCode[%d] = {0,\n", nKeyword+1);
  618. for(i=j=0; i<nKeyword; i++){
  619. char *zToken = aKeywordTable[i].zTokenType;
  620. if( j==0 ) printf(" ");
  621. printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");
  622. j++;
  623. if( j>=5 ){
  624. printf("\n");
  625. j = 0;
  626. }
  627. }
  628. printf("%s};\n", j==0 ? "" : "\n");
  629. printf("/* Hash table decoded:\n");
  630. for(i=0; i<bestSize; i++){
  631. j = aKWHash[i];
  632. printf("** %3d:", i);
  633. while( j ){
  634. printf(" %s", aKeywordTable[j-1].zOrigName);
  635. j = aKeywordTable[j-1].iNext;
  636. }
  637. printf("\n");
  638. }
  639. printf("*/\n");
  640. printf("/* Check to see if z[0..n-1] is a keyword. If it is, write the\n");
  641. printf("** parser symbol code for that keyword into *pType. Always\n");
  642. printf("** return the integer n (the length of the token). */\n");
  643. printf("static int keywordCode(const char *z, int n, int *pType){\n");
  644. printf(" int i, j;\n");
  645. printf(" const char *zKW;\n");
  646. printf(" assert( n>=2 );\n");
  647. printf(" i = ((charMap(z[0])*%d) %c", HASH_C0, HASH_CC);
  648. printf(" (charMap(z[n-1])*%d) %c", HASH_C1, HASH_CC);
  649. printf(" n*%d) %% %d;\n", HASH_C2, bestSize);
  650. printf(" for(i=(int)aKWHash[i]; i>0; i=aKWNext[i]){\n");
  651. printf(" if( aKWLen[i]!=n ) continue;\n");
  652. printf(" zKW = &zKWText[aKWOffset[i]];\n");
  653. printf("#ifdef SQLITE_ASCII\n");
  654. printf(" if( (z[0]&~0x20)!=zKW[0] ) continue;\n");
  655. printf(" if( (z[1]&~0x20)!=zKW[1] ) continue;\n");
  656. printf(" j = 2;\n");
  657. printf(" while( j<n && (z[j]&~0x20)==zKW[j] ){ j++; }\n");
  658. printf("#endif\n");
  659. printf("#ifdef SQLITE_EBCDIC\n");
  660. printf(" if( toupper(z[0])!=zKW[0] ) continue;\n");
  661. printf(" if( toupper(z[1])!=zKW[1] ) continue;\n");
  662. printf(" j = 2;\n");
  663. printf(" while( j<n && toupper(z[j])==zKW[j] ){ j++; }\n");
  664. printf("#endif\n");
  665. printf(" if( j<n ) continue;\n");
  666. for(i=0; i<nKeyword; i++){
  667. printf(" testcase( i==%d ); /* %s */\n",
  668. i+1, aKeywordTable[i].zOrigName);
  669. }
  670. printf(" *pType = aKWCode[i];\n");
  671. printf(" break;\n");
  672. printf(" }\n");
  673. printf(" return n;\n");
  674. printf("}\n");
  675. printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n");
  676. printf(" int id = TK_ID;\n");
  677. printf(" if( n>=2 ) keywordCode((char*)z, n, &id);\n");
  678. printf(" return id;\n");
  679. printf("}\n");
  680. printf("#define SQLITE_N_KEYWORD %d\n", nKeyword);
  681. printf("int sqlite3_keyword_name(int i,const char **pzName,int *pnName){\n");
  682. printf(" if( i<0 || i>=SQLITE_N_KEYWORD ) return SQLITE_ERROR;\n");
  683. printf(" i++;\n");
  684. printf(" *pzName = zKWText + aKWOffset[i];\n");
  685. printf(" *pnName = aKWLen[i];\n");
  686. printf(" return SQLITE_OK;\n");
  687. printf("}\n");
  688. printf("int sqlite3_keyword_count(void){ return SQLITE_N_KEYWORD; }\n");
  689. printf("int sqlite3_keyword_check(const char *zName, int nName){\n");
  690. printf(" return TK_ID!=sqlite3KeywordCode((const u8*)zName, nName);\n");
  691. printf("}\n");
  692. return 0;
  693. }