fts5_config.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128
  1. /*
  2. ** 2014 Jun 09
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This is an SQLite module implementing full-text search.
  14. */
  15. #include "fts5Int.h"
  16. #define FTS5_DEFAULT_PAGE_SIZE 4050
  17. #define FTS5_DEFAULT_AUTOMERGE 4
  18. #define FTS5_DEFAULT_USERMERGE 4
  19. #define FTS5_DEFAULT_CRISISMERGE 16
  20. #define FTS5_DEFAULT_HASHSIZE (1024*1024)
  21. #define FTS5_DEFAULT_DELETE_AUTOMERGE 10 /* default 10% */
  22. /* Maximum allowed page size */
  23. #define FTS5_MAX_PAGE_SIZE (64*1024)
  24. static int fts5_iswhitespace(char x){
  25. return (x==' ');
  26. }
  27. static int fts5_isopenquote(char x){
  28. return (x=='"' || x=='\'' || x=='[' || x=='`');
  29. }
  30. /*
  31. ** Argument pIn points to a character that is part of a nul-terminated
  32. ** string. Return a pointer to the first character following *pIn in
  33. ** the string that is not a white-space character.
  34. */
  35. static const char *fts5ConfigSkipWhitespace(const char *pIn){
  36. const char *p = pIn;
  37. if( p ){
  38. while( fts5_iswhitespace(*p) ){ p++; }
  39. }
  40. return p;
  41. }
  42. /*
  43. ** Argument pIn points to a character that is part of a nul-terminated
  44. ** string. Return a pointer to the first character following *pIn in
  45. ** the string that is not a "bareword" character.
  46. */
  47. static const char *fts5ConfigSkipBareword(const char *pIn){
  48. const char *p = pIn;
  49. while ( sqlite3Fts5IsBareword(*p) ) p++;
  50. if( p==pIn ) p = 0;
  51. return p;
  52. }
  53. static int fts5_isdigit(char a){
  54. return (a>='0' && a<='9');
  55. }
  56. static const char *fts5ConfigSkipLiteral(const char *pIn){
  57. const char *p = pIn;
  58. switch( *p ){
  59. case 'n': case 'N':
  60. if( sqlite3_strnicmp("null", p, 4)==0 ){
  61. p = &p[4];
  62. }else{
  63. p = 0;
  64. }
  65. break;
  66. case 'x': case 'X':
  67. p++;
  68. if( *p=='\'' ){
  69. p++;
  70. while( (*p>='a' && *p<='f')
  71. || (*p>='A' && *p<='F')
  72. || (*p>='0' && *p<='9')
  73. ){
  74. p++;
  75. }
  76. if( *p=='\'' && 0==((p-pIn)%2) ){
  77. p++;
  78. }else{
  79. p = 0;
  80. }
  81. }else{
  82. p = 0;
  83. }
  84. break;
  85. case '\'':
  86. p++;
  87. while( p ){
  88. if( *p=='\'' ){
  89. p++;
  90. if( *p!='\'' ) break;
  91. }
  92. p++;
  93. if( *p==0 ) p = 0;
  94. }
  95. break;
  96. default:
  97. /* maybe a number */
  98. if( *p=='+' || *p=='-' ) p++;
  99. while( fts5_isdigit(*p) ) p++;
  100. /* At this point, if the literal was an integer, the parse is
  101. ** finished. Or, if it is a floating point value, it may continue
  102. ** with either a decimal point or an 'E' character. */
  103. if( *p=='.' && fts5_isdigit(p[1]) ){
  104. p += 2;
  105. while( fts5_isdigit(*p) ) p++;
  106. }
  107. if( p==pIn ) p = 0;
  108. break;
  109. }
  110. return p;
  111. }
  112. /*
  113. ** The first character of the string pointed to by argument z is guaranteed
  114. ** to be an open-quote character (see function fts5_isopenquote()).
  115. **
  116. ** This function searches for the corresponding close-quote character within
  117. ** the string and, if found, dequotes the string in place and adds a new
  118. ** nul-terminator byte.
  119. **
  120. ** If the close-quote is found, the value returned is the byte offset of
  121. ** the character immediately following it. Or, if the close-quote is not
  122. ** found, -1 is returned. If -1 is returned, the buffer is left in an
  123. ** undefined state.
  124. */
  125. static int fts5Dequote(char *z){
  126. char q;
  127. int iIn = 1;
  128. int iOut = 0;
  129. q = z[0];
  130. /* Set stack variable q to the close-quote character */
  131. assert( q=='[' || q=='\'' || q=='"' || q=='`' );
  132. if( q=='[' ) q = ']';
  133. while( z[iIn] ){
  134. if( z[iIn]==q ){
  135. if( z[iIn+1]!=q ){
  136. /* Character iIn was the close quote. */
  137. iIn++;
  138. break;
  139. }else{
  140. /* Character iIn and iIn+1 form an escaped quote character. Skip
  141. ** the input cursor past both and copy a single quote character
  142. ** to the output buffer. */
  143. iIn += 2;
  144. z[iOut++] = q;
  145. }
  146. }else{
  147. z[iOut++] = z[iIn++];
  148. }
  149. }
  150. z[iOut] = '\0';
  151. return iIn;
  152. }
  153. /*
  154. ** Convert an SQL-style quoted string into a normal string by removing
  155. ** the quote characters. The conversion is done in-place. If the
  156. ** input does not begin with a quote character, then this routine
  157. ** is a no-op.
  158. **
  159. ** Examples:
  160. **
  161. ** "abc" becomes abc
  162. ** 'xyz' becomes xyz
  163. ** [pqr] becomes pqr
  164. ** `mno` becomes mno
  165. */
  166. void sqlite3Fts5Dequote(char *z){
  167. char quote; /* Quote character (if any ) */
  168. assert( 0==fts5_iswhitespace(z[0]) );
  169. quote = z[0];
  170. if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
  171. fts5Dequote(z);
  172. }
  173. }
  174. struct Fts5Enum {
  175. const char *zName;
  176. int eVal;
  177. };
  178. typedef struct Fts5Enum Fts5Enum;
  179. static int fts5ConfigSetEnum(
  180. const Fts5Enum *aEnum,
  181. const char *zEnum,
  182. int *peVal
  183. ){
  184. int nEnum = (int)strlen(zEnum);
  185. int i;
  186. int iVal = -1;
  187. for(i=0; aEnum[i].zName; i++){
  188. if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){
  189. if( iVal>=0 ) return SQLITE_ERROR;
  190. iVal = aEnum[i].eVal;
  191. }
  192. }
  193. *peVal = iVal;
  194. return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
  195. }
  196. /*
  197. ** Parse a "special" CREATE VIRTUAL TABLE directive and update
  198. ** configuration object pConfig as appropriate.
  199. **
  200. ** If successful, object pConfig is updated and SQLITE_OK returned. If
  201. ** an error occurs, an SQLite error code is returned and an error message
  202. ** may be left in *pzErr. It is the responsibility of the caller to
  203. ** eventually free any such error message using sqlite3_free().
  204. */
  205. static int fts5ConfigParseSpecial(
  206. Fts5Config *pConfig, /* Configuration object to update */
  207. const char *zCmd, /* Special command to parse */
  208. const char *zArg, /* Argument to parse */
  209. char **pzErr /* OUT: Error message */
  210. ){
  211. int rc = SQLITE_OK;
  212. int nCmd = (int)strlen(zCmd);
  213. if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
  214. const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
  215. const char *p;
  216. int bFirst = 1;
  217. if( pConfig->aPrefix==0 ){
  218. pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
  219. if( rc ) return rc;
  220. }
  221. p = zArg;
  222. while( 1 ){
  223. int nPre = 0;
  224. while( p[0]==' ' ) p++;
  225. if( bFirst==0 && p[0]==',' ){
  226. p++;
  227. while( p[0]==' ' ) p++;
  228. }else if( p[0]=='\0' ){
  229. break;
  230. }
  231. if( p[0]<'0' || p[0]>'9' ){
  232. *pzErr = sqlite3_mprintf("malformed prefix=... directive");
  233. rc = SQLITE_ERROR;
  234. break;
  235. }
  236. if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){
  237. *pzErr = sqlite3_mprintf(
  238. "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES
  239. );
  240. rc = SQLITE_ERROR;
  241. break;
  242. }
  243. while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
  244. nPre = nPre*10 + (p[0] - '0');
  245. p++;
  246. }
  247. if( nPre<=0 || nPre>=1000 ){
  248. *pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
  249. rc = SQLITE_ERROR;
  250. break;
  251. }
  252. pConfig->aPrefix[pConfig->nPrefix] = nPre;
  253. pConfig->nPrefix++;
  254. bFirst = 0;
  255. }
  256. assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
  257. return rc;
  258. }
  259. if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
  260. const char *p = (const char*)zArg;
  261. sqlite3_int64 nArg = strlen(zArg) + 1;
  262. char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg);
  263. if( azArg ){
  264. char *pSpace = (char*)&azArg[nArg];
  265. if( pConfig->t.azArg ){
  266. *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
  267. rc = SQLITE_ERROR;
  268. }else{
  269. for(nArg=0; p && *p; nArg++){
  270. const char *p2 = fts5ConfigSkipWhitespace(p);
  271. if( *p2=='\'' ){
  272. p = fts5ConfigSkipLiteral(p2);
  273. }else{
  274. p = fts5ConfigSkipBareword(p2);
  275. }
  276. if( p ){
  277. memcpy(pSpace, p2, p-p2);
  278. azArg[nArg] = pSpace;
  279. sqlite3Fts5Dequote(pSpace);
  280. pSpace += (p - p2) + 1;
  281. p = fts5ConfigSkipWhitespace(p);
  282. }
  283. }
  284. if( p==0 ){
  285. *pzErr = sqlite3_mprintf("parse error in tokenize directive");
  286. rc = SQLITE_ERROR;
  287. }else{
  288. pConfig->t.azArg = (const char**)azArg;
  289. pConfig->t.nArg = nArg;
  290. azArg = 0;
  291. }
  292. }
  293. }
  294. sqlite3_free(azArg);
  295. return rc;
  296. }
  297. if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
  298. if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
  299. *pzErr = sqlite3_mprintf("multiple content=... directives");
  300. rc = SQLITE_ERROR;
  301. }else{
  302. if( zArg[0] ){
  303. pConfig->eContent = FTS5_CONTENT_EXTERNAL;
  304. pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
  305. }else{
  306. pConfig->eContent = FTS5_CONTENT_NONE;
  307. }
  308. }
  309. return rc;
  310. }
  311. if( sqlite3_strnicmp("contentless_delete", zCmd, nCmd)==0 ){
  312. if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
  313. *pzErr = sqlite3_mprintf("malformed contentless_delete=... directive");
  314. rc = SQLITE_ERROR;
  315. }else{
  316. pConfig->bContentlessDelete = (zArg[0]=='1');
  317. }
  318. return rc;
  319. }
  320. if( sqlite3_strnicmp("contentless_unindexed", zCmd, nCmd)==0 ){
  321. if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
  322. *pzErr = sqlite3_mprintf("malformed contentless_delete=... directive");
  323. rc = SQLITE_ERROR;
  324. }else{
  325. pConfig->bContentlessUnindexed = (zArg[0]=='1');
  326. }
  327. return rc;
  328. }
  329. if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
  330. if( pConfig->zContentRowid ){
  331. *pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
  332. rc = SQLITE_ERROR;
  333. }else{
  334. pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
  335. }
  336. return rc;
  337. }
  338. if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
  339. if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
  340. *pzErr = sqlite3_mprintf("malformed columnsize=... directive");
  341. rc = SQLITE_ERROR;
  342. }else{
  343. pConfig->bColumnsize = (zArg[0]=='1');
  344. }
  345. return rc;
  346. }
  347. if( sqlite3_strnicmp("locale", zCmd, nCmd)==0 ){
  348. if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
  349. *pzErr = sqlite3_mprintf("malformed locale=... directive");
  350. rc = SQLITE_ERROR;
  351. }else{
  352. pConfig->bLocale = (zArg[0]=='1');
  353. }
  354. return rc;
  355. }
  356. if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
  357. const Fts5Enum aDetail[] = {
  358. { "none", FTS5_DETAIL_NONE },
  359. { "full", FTS5_DETAIL_FULL },
  360. { "columns", FTS5_DETAIL_COLUMNS },
  361. { 0, 0 }
  362. };
  363. if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){
  364. *pzErr = sqlite3_mprintf("malformed detail=... directive");
  365. }
  366. return rc;
  367. }
  368. if( sqlite3_strnicmp("tokendata", zCmd, nCmd)==0 ){
  369. if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
  370. *pzErr = sqlite3_mprintf("malformed tokendata=... directive");
  371. rc = SQLITE_ERROR;
  372. }else{
  373. pConfig->bTokendata = (zArg[0]=='1');
  374. }
  375. return rc;
  376. }
  377. *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
  378. return SQLITE_ERROR;
  379. }
  380. /*
  381. ** Gobble up the first bareword or quoted word from the input buffer zIn.
  382. ** Return a pointer to the character immediately following the last in
  383. ** the gobbled word if successful, or a NULL pointer otherwise (failed
  384. ** to find close-quote character).
  385. **
  386. ** Before returning, set pzOut to point to a new buffer containing a
  387. ** nul-terminated, dequoted copy of the gobbled word. If the word was
  388. ** quoted, *pbQuoted is also set to 1 before returning.
  389. **
  390. ** If *pRc is other than SQLITE_OK when this function is called, it is
  391. ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
  392. ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
  393. ** set if a parse error (failed to find close quote) occurs.
  394. */
  395. static const char *fts5ConfigGobbleWord(
  396. int *pRc, /* IN/OUT: Error code */
  397. const char *zIn, /* Buffer to gobble string/bareword from */
  398. char **pzOut, /* OUT: malloc'd buffer containing str/bw */
  399. int *pbQuoted /* OUT: Set to true if dequoting required */
  400. ){
  401. const char *zRet = 0;
  402. sqlite3_int64 nIn = strlen(zIn);
  403. char *zOut = sqlite3_malloc64(nIn+1);
  404. assert( *pRc==SQLITE_OK );
  405. *pbQuoted = 0;
  406. *pzOut = 0;
  407. if( zOut==0 ){
  408. *pRc = SQLITE_NOMEM;
  409. }else{
  410. memcpy(zOut, zIn, (size_t)(nIn+1));
  411. if( fts5_isopenquote(zOut[0]) ){
  412. int ii = fts5Dequote(zOut);
  413. zRet = &zIn[ii];
  414. *pbQuoted = 1;
  415. }else{
  416. zRet = fts5ConfigSkipBareword(zIn);
  417. if( zRet ){
  418. zOut[zRet-zIn] = '\0';
  419. }
  420. }
  421. }
  422. if( zRet==0 ){
  423. sqlite3_free(zOut);
  424. }else{
  425. *pzOut = zOut;
  426. }
  427. return zRet;
  428. }
  429. static int fts5ConfigParseColumn(
  430. Fts5Config *p,
  431. char *zCol,
  432. char *zArg,
  433. char **pzErr,
  434. int *pbUnindexed
  435. ){
  436. int rc = SQLITE_OK;
  437. if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
  438. || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
  439. ){
  440. *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
  441. rc = SQLITE_ERROR;
  442. }else if( zArg ){
  443. if( 0==sqlite3_stricmp(zArg, "unindexed") ){
  444. p->abUnindexed[p->nCol] = 1;
  445. *pbUnindexed = 1;
  446. }else{
  447. *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
  448. rc = SQLITE_ERROR;
  449. }
  450. }
  451. p->azCol[p->nCol++] = zCol;
  452. return rc;
  453. }
  454. /*
  455. ** Populate the Fts5Config.zContentExprlist string.
  456. */
  457. static int fts5ConfigMakeExprlist(Fts5Config *p){
  458. int i;
  459. int rc = SQLITE_OK;
  460. Fts5Buffer buf = {0, 0, 0};
  461. sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
  462. if( p->eContent!=FTS5_CONTENT_NONE ){
  463. assert( p->eContent==FTS5_CONTENT_EXTERNAL
  464. || p->eContent==FTS5_CONTENT_NORMAL
  465. || p->eContent==FTS5_CONTENT_UNINDEXED
  466. );
  467. for(i=0; i<p->nCol; i++){
  468. if( p->eContent==FTS5_CONTENT_EXTERNAL ){
  469. sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
  470. }else if( p->eContent==FTS5_CONTENT_NORMAL || p->abUnindexed[i] ){
  471. sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
  472. }else{
  473. sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL");
  474. }
  475. }
  476. }
  477. if( p->eContent==FTS5_CONTENT_NORMAL && p->bLocale ){
  478. for(i=0; i<p->nCol; i++){
  479. if( p->abUnindexed[i]==0 ){
  480. sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.l%d", i);
  481. }else{
  482. sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL");
  483. }
  484. }
  485. }
  486. assert( p->zContentExprlist==0 );
  487. p->zContentExprlist = (char*)buf.p;
  488. return rc;
  489. }
  490. /*
  491. ** Arguments nArg/azArg contain the string arguments passed to the xCreate
  492. ** or xConnect method of the virtual table. This function attempts to
  493. ** allocate an instance of Fts5Config containing the results of parsing
  494. ** those arguments.
  495. **
  496. ** If successful, SQLITE_OK is returned and *ppOut is set to point to the
  497. ** new Fts5Config object. If an error occurs, an SQLite error code is
  498. ** returned, *ppOut is set to NULL and an error message may be left in
  499. ** *pzErr. It is the responsibility of the caller to eventually free any
  500. ** such error message using sqlite3_free().
  501. */
  502. int sqlite3Fts5ConfigParse(
  503. Fts5Global *pGlobal,
  504. sqlite3 *db,
  505. int nArg, /* Number of arguments */
  506. const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
  507. Fts5Config **ppOut, /* OUT: Results of parse */
  508. char **pzErr /* OUT: Error message */
  509. ){
  510. int rc = SQLITE_OK; /* Return code */
  511. Fts5Config *pRet; /* New object to return */
  512. int i;
  513. sqlite3_int64 nByte;
  514. int bUnindexed = 0; /* True if there are one or more UNINDEXED */
  515. *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
  516. if( pRet==0 ) return SQLITE_NOMEM;
  517. memset(pRet, 0, sizeof(Fts5Config));
  518. pRet->pGlobal = pGlobal;
  519. pRet->db = db;
  520. pRet->iCookie = -1;
  521. nByte = nArg * (sizeof(char*) + sizeof(u8));
  522. pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
  523. pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0;
  524. pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
  525. pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
  526. pRet->bColumnsize = 1;
  527. pRet->eDetail = FTS5_DETAIL_FULL;
  528. #ifdef SQLITE_DEBUG
  529. pRet->bPrefixIndex = 1;
  530. #endif
  531. if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
  532. *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
  533. rc = SQLITE_ERROR;
  534. }
  535. assert( (pRet->abUnindexed && pRet->azCol) || rc!=SQLITE_OK );
  536. for(i=3; rc==SQLITE_OK && i<nArg; i++){
  537. const char *zOrig = azArg[i];
  538. const char *z;
  539. char *zOne = 0;
  540. char *zTwo = 0;
  541. int bOption = 0;
  542. int bMustBeCol = 0;
  543. z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
  544. z = fts5ConfigSkipWhitespace(z);
  545. if( z && *z=='=' ){
  546. bOption = 1;
  547. assert( zOne!=0 );
  548. z++;
  549. if( bMustBeCol ) z = 0;
  550. }
  551. z = fts5ConfigSkipWhitespace(z);
  552. if( z && z[0] ){
  553. int bDummy;
  554. z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
  555. if( z && z[0] ) z = 0;
  556. }
  557. if( rc==SQLITE_OK ){
  558. if( z==0 ){
  559. *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
  560. rc = SQLITE_ERROR;
  561. }else{
  562. if( bOption ){
  563. rc = fts5ConfigParseSpecial(pRet,
  564. ALWAYS(zOne)?zOne:"",
  565. zTwo?zTwo:"",
  566. pzErr
  567. );
  568. }else{
  569. rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr, &bUnindexed);
  570. zOne = 0;
  571. }
  572. }
  573. }
  574. sqlite3_free(zOne);
  575. sqlite3_free(zTwo);
  576. }
  577. /* We only allow contentless_delete=1 if the table is indeed contentless. */
  578. if( rc==SQLITE_OK
  579. && pRet->bContentlessDelete
  580. && pRet->eContent!=FTS5_CONTENT_NONE
  581. ){
  582. *pzErr = sqlite3_mprintf(
  583. "contentless_delete=1 requires a contentless table"
  584. );
  585. rc = SQLITE_ERROR;
  586. }
  587. /* We only allow contentless_delete=1 if columnsize=0 is not present.
  588. **
  589. ** This restriction may be removed at some point.
  590. */
  591. if( rc==SQLITE_OK && pRet->bContentlessDelete && pRet->bColumnsize==0 ){
  592. *pzErr = sqlite3_mprintf(
  593. "contentless_delete=1 is incompatible with columnsize=0"
  594. );
  595. rc = SQLITE_ERROR;
  596. }
  597. /* We only allow contentless_unindexed=1 if the table is actually a
  598. ** contentless one.
  599. */
  600. if( rc==SQLITE_OK
  601. && pRet->bContentlessUnindexed
  602. && pRet->eContent!=FTS5_CONTENT_NONE
  603. ){
  604. *pzErr = sqlite3_mprintf(
  605. "contentless_unindexed=1 requires a contentless table"
  606. );
  607. rc = SQLITE_ERROR;
  608. }
  609. /* If no zContent option was specified, fill in the default values. */
  610. if( rc==SQLITE_OK && pRet->zContent==0 ){
  611. const char *zTail = 0;
  612. assert( pRet->eContent==FTS5_CONTENT_NORMAL
  613. || pRet->eContent==FTS5_CONTENT_NONE
  614. );
  615. if( pRet->eContent==FTS5_CONTENT_NORMAL ){
  616. zTail = "content";
  617. }else if( bUnindexed && pRet->bContentlessUnindexed ){
  618. pRet->eContent = FTS5_CONTENT_UNINDEXED;
  619. zTail = "content";
  620. }else if( pRet->bColumnsize ){
  621. zTail = "docsize";
  622. }
  623. if( zTail ){
  624. pRet->zContent = sqlite3Fts5Mprintf(
  625. &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
  626. );
  627. }
  628. }
  629. if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
  630. pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
  631. }
  632. /* Formulate the zContentExprlist text */
  633. if( rc==SQLITE_OK ){
  634. rc = fts5ConfigMakeExprlist(pRet);
  635. }
  636. if( rc!=SQLITE_OK ){
  637. sqlite3Fts5ConfigFree(pRet);
  638. *ppOut = 0;
  639. }
  640. return rc;
  641. }
  642. /*
  643. ** Free the configuration object passed as the only argument.
  644. */
  645. void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
  646. if( pConfig ){
  647. int i;
  648. if( pConfig->t.pTok ){
  649. if( pConfig->t.pApi1 ){
  650. pConfig->t.pApi1->xDelete(pConfig->t.pTok);
  651. }else{
  652. pConfig->t.pApi2->xDelete(pConfig->t.pTok);
  653. }
  654. }
  655. sqlite3_free((char*)pConfig->t.azArg);
  656. sqlite3_free(pConfig->zDb);
  657. sqlite3_free(pConfig->zName);
  658. for(i=0; i<pConfig->nCol; i++){
  659. sqlite3_free(pConfig->azCol[i]);
  660. }
  661. sqlite3_free(pConfig->azCol);
  662. sqlite3_free(pConfig->aPrefix);
  663. sqlite3_free(pConfig->zRank);
  664. sqlite3_free(pConfig->zRankArgs);
  665. sqlite3_free(pConfig->zContent);
  666. sqlite3_free(pConfig->zContentRowid);
  667. sqlite3_free(pConfig->zContentExprlist);
  668. sqlite3_free(pConfig);
  669. }
  670. }
  671. /*
  672. ** Call sqlite3_declare_vtab() based on the contents of the configuration
  673. ** object passed as the only argument. Return SQLITE_OK if successful, or
  674. ** an SQLite error code if an error occurs.
  675. */
  676. int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
  677. int i;
  678. int rc = SQLITE_OK;
  679. char *zSql;
  680. zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
  681. for(i=0; zSql && i<pConfig->nCol; i++){
  682. const char *zSep = (i==0?"":", ");
  683. zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
  684. }
  685. zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
  686. zSql, pConfig->zName, FTS5_RANK_NAME
  687. );
  688. assert( zSql || rc==SQLITE_NOMEM );
  689. if( zSql ){
  690. rc = sqlite3_declare_vtab(pConfig->db, zSql);
  691. sqlite3_free(zSql);
  692. }
  693. return rc;
  694. }
  695. /*
  696. ** Tokenize the text passed via the second and third arguments.
  697. **
  698. ** The callback is invoked once for each token in the input text. The
  699. ** arguments passed to it are, in order:
  700. **
  701. ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize()
  702. ** const char *pToken // Pointer to buffer containing token
  703. ** int nToken // Size of token in bytes
  704. ** int iStart // Byte offset of start of token within input text
  705. ** int iEnd // Byte offset of end of token within input text
  706. ** int iPos // Position of token in input (first token is 0)
  707. **
  708. ** If the callback returns a non-zero value the tokenization is abandoned
  709. ** and no further callbacks are issued.
  710. **
  711. ** This function returns SQLITE_OK if successful or an SQLite error code
  712. ** if an error occurs. If the tokenization was abandoned early because
  713. ** the callback returned SQLITE_DONE, this is not an error and this function
  714. ** still returns SQLITE_OK. Or, if the tokenization was abandoned early
  715. ** because the callback returned another non-zero value, it is assumed
  716. ** to be an SQLite error code and returned to the caller.
  717. */
  718. int sqlite3Fts5Tokenize(
  719. Fts5Config *pConfig, /* FTS5 Configuration object */
  720. int flags, /* FTS5_TOKENIZE_* flags */
  721. const char *pText, int nText, /* Text to tokenize */
  722. void *pCtx, /* Context passed to xToken() */
  723. int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
  724. ){
  725. int rc = SQLITE_OK;
  726. if( pText ){
  727. if( pConfig->t.pTok==0 ){
  728. rc = sqlite3Fts5LoadTokenizer(pConfig);
  729. }
  730. if( rc==SQLITE_OK ){
  731. if( pConfig->t.pApi1 ){
  732. rc = pConfig->t.pApi1->xTokenize(
  733. pConfig->t.pTok, pCtx, flags, pText, nText, xToken
  734. );
  735. }else{
  736. rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags,
  737. pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken
  738. );
  739. }
  740. }
  741. }
  742. return rc;
  743. }
  744. /*
  745. ** Argument pIn points to the first character in what is expected to be
  746. ** a comma-separated list of SQL literals followed by a ')' character.
  747. ** If it actually is this, return a pointer to the ')'. Otherwise, return
  748. ** NULL to indicate a parse error.
  749. */
  750. static const char *fts5ConfigSkipArgs(const char *pIn){
  751. const char *p = pIn;
  752. while( 1 ){
  753. p = fts5ConfigSkipWhitespace(p);
  754. p = fts5ConfigSkipLiteral(p);
  755. p = fts5ConfigSkipWhitespace(p);
  756. if( p==0 || *p==')' ) break;
  757. if( *p!=',' ){
  758. p = 0;
  759. break;
  760. }
  761. p++;
  762. }
  763. return p;
  764. }
  765. /*
  766. ** Parameter zIn contains a rank() function specification. The format of
  767. ** this is:
  768. **
  769. ** + Bareword (function name)
  770. ** + Open parenthesis - "("
  771. ** + Zero or more SQL literals in a comma separated list
  772. ** + Close parenthesis - ")"
  773. */
  774. int sqlite3Fts5ConfigParseRank(
  775. const char *zIn, /* Input string */
  776. char **pzRank, /* OUT: Rank function name */
  777. char **pzRankArgs /* OUT: Rank function arguments */
  778. ){
  779. const char *p = zIn;
  780. const char *pRank;
  781. char *zRank = 0;
  782. char *zRankArgs = 0;
  783. int rc = SQLITE_OK;
  784. *pzRank = 0;
  785. *pzRankArgs = 0;
  786. if( p==0 ){
  787. rc = SQLITE_ERROR;
  788. }else{
  789. p = fts5ConfigSkipWhitespace(p);
  790. pRank = p;
  791. p = fts5ConfigSkipBareword(p);
  792. if( p ){
  793. zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
  794. if( zRank ) memcpy(zRank, pRank, p-pRank);
  795. }else{
  796. rc = SQLITE_ERROR;
  797. }
  798. if( rc==SQLITE_OK ){
  799. p = fts5ConfigSkipWhitespace(p);
  800. if( *p!='(' ) rc = SQLITE_ERROR;
  801. p++;
  802. }
  803. if( rc==SQLITE_OK ){
  804. const char *pArgs;
  805. p = fts5ConfigSkipWhitespace(p);
  806. pArgs = p;
  807. if( *p!=')' ){
  808. p = fts5ConfigSkipArgs(p);
  809. if( p==0 ){
  810. rc = SQLITE_ERROR;
  811. }else{
  812. zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
  813. if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
  814. }
  815. }
  816. }
  817. }
  818. if( rc!=SQLITE_OK ){
  819. sqlite3_free(zRank);
  820. assert( zRankArgs==0 );
  821. }else{
  822. *pzRank = zRank;
  823. *pzRankArgs = zRankArgs;
  824. }
  825. return rc;
  826. }
  827. int sqlite3Fts5ConfigSetValue(
  828. Fts5Config *pConfig,
  829. const char *zKey,
  830. sqlite3_value *pVal,
  831. int *pbBadkey
  832. ){
  833. int rc = SQLITE_OK;
  834. if( 0==sqlite3_stricmp(zKey, "pgsz") ){
  835. int pgsz = 0;
  836. if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
  837. pgsz = sqlite3_value_int(pVal);
  838. }
  839. if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE ){
  840. *pbBadkey = 1;
  841. }else{
  842. pConfig->pgsz = pgsz;
  843. }
  844. }
  845. else if( 0==sqlite3_stricmp(zKey, "hashsize") ){
  846. int nHashSize = -1;
  847. if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
  848. nHashSize = sqlite3_value_int(pVal);
  849. }
  850. if( nHashSize<=0 ){
  851. *pbBadkey = 1;
  852. }else{
  853. pConfig->nHashSize = nHashSize;
  854. }
  855. }
  856. else if( 0==sqlite3_stricmp(zKey, "automerge") ){
  857. int nAutomerge = -1;
  858. if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
  859. nAutomerge = sqlite3_value_int(pVal);
  860. }
  861. if( nAutomerge<0 || nAutomerge>64 ){
  862. *pbBadkey = 1;
  863. }else{
  864. if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
  865. pConfig->nAutomerge = nAutomerge;
  866. }
  867. }
  868. else if( 0==sqlite3_stricmp(zKey, "usermerge") ){
  869. int nUsermerge = -1;
  870. if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
  871. nUsermerge = sqlite3_value_int(pVal);
  872. }
  873. if( nUsermerge<2 || nUsermerge>16 ){
  874. *pbBadkey = 1;
  875. }else{
  876. pConfig->nUsermerge = nUsermerge;
  877. }
  878. }
  879. else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
  880. int nCrisisMerge = -1;
  881. if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
  882. nCrisisMerge = sqlite3_value_int(pVal);
  883. }
  884. if( nCrisisMerge<0 ){
  885. *pbBadkey = 1;
  886. }else{
  887. if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
  888. if( nCrisisMerge>=FTS5_MAX_SEGMENT ) nCrisisMerge = FTS5_MAX_SEGMENT-1;
  889. pConfig->nCrisisMerge = nCrisisMerge;
  890. }
  891. }
  892. else if( 0==sqlite3_stricmp(zKey, "deletemerge") ){
  893. int nVal = -1;
  894. if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
  895. nVal = sqlite3_value_int(pVal);
  896. }else{
  897. *pbBadkey = 1;
  898. }
  899. if( nVal<0 ) nVal = FTS5_DEFAULT_DELETE_AUTOMERGE;
  900. if( nVal>100 ) nVal = 0;
  901. pConfig->nDeleteMerge = nVal;
  902. }
  903. else if( 0==sqlite3_stricmp(zKey, "rank") ){
  904. const char *zIn = (const char*)sqlite3_value_text(pVal);
  905. char *zRank;
  906. char *zRankArgs;
  907. rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
  908. if( rc==SQLITE_OK ){
  909. sqlite3_free(pConfig->zRank);
  910. sqlite3_free(pConfig->zRankArgs);
  911. pConfig->zRank = zRank;
  912. pConfig->zRankArgs = zRankArgs;
  913. }else if( rc==SQLITE_ERROR ){
  914. rc = SQLITE_OK;
  915. *pbBadkey = 1;
  916. }
  917. }
  918. else if( 0==sqlite3_stricmp(zKey, "secure-delete") ){
  919. int bVal = -1;
  920. if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
  921. bVal = sqlite3_value_int(pVal);
  922. }
  923. if( bVal<0 ){
  924. *pbBadkey = 1;
  925. }else{
  926. pConfig->bSecureDelete = (bVal ? 1 : 0);
  927. }
  928. }
  929. else if( 0==sqlite3_stricmp(zKey, "insttoken") ){
  930. int bVal = -1;
  931. if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
  932. bVal = sqlite3_value_int(pVal);
  933. }
  934. if( bVal<0 ){
  935. *pbBadkey = 1;
  936. }else{
  937. pConfig->bPrefixInsttoken = (bVal ? 1 : 0);
  938. }
  939. }else{
  940. *pbBadkey = 1;
  941. }
  942. return rc;
  943. }
  944. /*
  945. ** Load the contents of the %_config table into memory.
  946. */
  947. int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
  948. const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
  949. char *zSql;
  950. sqlite3_stmt *p = 0;
  951. int rc = SQLITE_OK;
  952. int iVersion = 0;
  953. /* Set default values */
  954. pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
  955. pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
  956. pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE;
  957. pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
  958. pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
  959. pConfig->nDeleteMerge = FTS5_DEFAULT_DELETE_AUTOMERGE;
  960. zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
  961. if( zSql ){
  962. rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
  963. sqlite3_free(zSql);
  964. }
  965. assert( rc==SQLITE_OK || p==0 );
  966. if( rc==SQLITE_OK ){
  967. while( SQLITE_ROW==sqlite3_step(p) ){
  968. const char *zK = (const char*)sqlite3_column_text(p, 0);
  969. sqlite3_value *pVal = sqlite3_column_value(p, 1);
  970. if( 0==sqlite3_stricmp(zK, "version") ){
  971. iVersion = sqlite3_value_int(pVal);
  972. }else{
  973. int bDummy = 0;
  974. sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
  975. }
  976. }
  977. rc = sqlite3_finalize(p);
  978. }
  979. if( rc==SQLITE_OK
  980. && iVersion!=FTS5_CURRENT_VERSION
  981. && iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE
  982. ){
  983. rc = SQLITE_ERROR;
  984. sqlite3Fts5ConfigErrmsg(pConfig, "invalid fts5 file format "
  985. "(found %d, expected %d or %d) - run 'rebuild'",
  986. iVersion, FTS5_CURRENT_VERSION, FTS5_CURRENT_VERSION_SECUREDELETE
  987. );
  988. }else{
  989. pConfig->iVersion = iVersion;
  990. }
  991. if( rc==SQLITE_OK ){
  992. pConfig->iCookie = iCookie;
  993. }
  994. return rc;
  995. }
  996. /*
  997. ** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer
  998. ** containing the error message created using printf() style formatting
  999. ** string zFmt and its trailing arguments.
  1000. */
  1001. void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){
  1002. va_list ap; /* ... printf arguments */
  1003. char *zMsg = 0;
  1004. va_start(ap, zFmt);
  1005. zMsg = sqlite3_vmprintf(zFmt, ap);
  1006. if( pConfig->pzErrmsg ){
  1007. assert( *pConfig->pzErrmsg==0 );
  1008. *pConfig->pzErrmsg = zMsg;
  1009. }else{
  1010. sqlite3_free(zMsg);
  1011. }
  1012. va_end(ap);
  1013. }