fts5Int.h 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947
  1. /*
  2. ** 2014 May 31
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. */
  14. #ifndef _FTS5INT_H
  15. #define _FTS5INT_H
  16. #include "fts5.h"
  17. #include "sqlite3ext.h"
  18. SQLITE_EXTENSION_INIT1
  19. #include <string.h>
  20. #include <assert.h>
  21. #ifndef SQLITE_AMALGAMATION
  22. typedef unsigned char u8;
  23. typedef unsigned int u32;
  24. typedef unsigned short u16;
  25. typedef short i16;
  26. typedef sqlite3_int64 i64;
  27. typedef sqlite3_uint64 u64;
  28. #ifndef ArraySize
  29. # define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0])))
  30. #endif
  31. #define testcase(x)
  32. #if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST)
  33. # define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1
  34. #endif
  35. #if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS)
  36. # define ALWAYS(X) (1)
  37. # define NEVER(X) (0)
  38. #elif !defined(NDEBUG)
  39. # define ALWAYS(X) ((X)?1:(assert(0),0))
  40. # define NEVER(X) ((X)?(assert(0),1):0)
  41. #else
  42. # define ALWAYS(X) (X)
  43. # define NEVER(X) (X)
  44. #endif
  45. #define MIN(x,y) (((x) < (y)) ? (x) : (y))
  46. #define MAX(x,y) (((x) > (y)) ? (x) : (y))
  47. /*
  48. ** Constants for the largest and smallest possible 64-bit signed integers.
  49. */
  50. # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
  51. # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
  52. /* The uptr type is an unsigned integer large enough to hold a pointer
  53. */
  54. #if defined(HAVE_STDINT_H)
  55. typedef uintptr_t uptr;
  56. #elif SQLITE_PTRSIZE==4
  57. typedef u32 uptr;
  58. #else
  59. typedef u64 uptr;
  60. #endif
  61. #ifdef SQLITE_4_BYTE_ALIGNED_MALLOC
  62. # define EIGHT_BYTE_ALIGNMENT(X) ((((uptr)(X) - (uptr)0)&3)==0)
  63. #else
  64. # define EIGHT_BYTE_ALIGNMENT(X) ((((uptr)(X) - (uptr)0)&7)==0)
  65. #endif
  66. #endif
  67. /* Truncate very long tokens to this many bytes. Hard limit is
  68. ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset
  69. ** field that occurs at the start of each leaf page (see fts5_index.c). */
  70. #define FTS5_MAX_TOKEN_SIZE 32768
  71. /*
  72. ** Maximum number of prefix indexes on single FTS5 table. This must be
  73. ** less than 32. If it is set to anything large than that, an #error
  74. ** directive in fts5_index.c will cause the build to fail.
  75. */
  76. #define FTS5_MAX_PREFIX_INDEXES 31
  77. /*
  78. ** Maximum segments permitted in a single index
  79. */
  80. #define FTS5_MAX_SEGMENT 2000
  81. #define FTS5_DEFAULT_NEARDIST 10
  82. #define FTS5_DEFAULT_RANK "bm25"
  83. /* Name of rank and rowid columns */
  84. #define FTS5_RANK_NAME "rank"
  85. #define FTS5_ROWID_NAME "rowid"
  86. #ifdef SQLITE_DEBUG
  87. # define FTS5_CORRUPT sqlite3Fts5Corrupt()
  88. int sqlite3Fts5Corrupt(void);
  89. #else
  90. # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
  91. #endif
  92. /*
  93. ** The assert_nc() macro is similar to the assert() macro, except that it
  94. ** is used for assert() conditions that are true only if it can be
  95. ** guranteed that the database is not corrupt.
  96. */
  97. #ifdef SQLITE_DEBUG
  98. extern int sqlite3_fts5_may_be_corrupt;
  99. # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
  100. #else
  101. # define assert_nc(x) assert(x)
  102. #endif
  103. /*
  104. ** A version of memcmp() that does not cause asan errors if one of the pointer
  105. ** parameters is NULL and the number of bytes to compare is zero.
  106. */
  107. #define fts5Memcmp(s1, s2, n) ((n)<=0 ? 0 : memcmp((s1), (s2), (n)))
  108. /* Mark a function parameter as unused, to suppress nuisance compiler
  109. ** warnings. */
  110. #ifndef UNUSED_PARAM
  111. # define UNUSED_PARAM(X) (void)(X)
  112. #endif
  113. #ifndef UNUSED_PARAM2
  114. # define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y)
  115. #endif
  116. typedef struct Fts5Global Fts5Global;
  117. typedef struct Fts5Colset Fts5Colset;
  118. /* If a NEAR() clump or phrase may only match a specific set of columns,
  119. ** then an object of the following type is used to record the set of columns.
  120. ** Each entry in the aiCol[] array is a column that may be matched.
  121. **
  122. ** This object is used by fts5_expr.c and fts5_index.c.
  123. */
  124. struct Fts5Colset {
  125. int nCol;
  126. int aiCol[1];
  127. };
  128. /**************************************************************************
  129. ** Interface to code in fts5_config.c. fts5_config.c contains contains code
  130. ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
  131. */
  132. typedef struct Fts5Config Fts5Config;
  133. typedef struct Fts5TokenizerConfig Fts5TokenizerConfig;
  134. struct Fts5TokenizerConfig {
  135. Fts5Tokenizer *pTok;
  136. fts5_tokenizer_v2 *pApi2;
  137. fts5_tokenizer *pApi1;
  138. const char **azArg;
  139. int nArg;
  140. int ePattern; /* FTS_PATTERN_XXX constant */
  141. const char *pLocale; /* Current locale to use */
  142. int nLocale; /* Size of pLocale in bytes */
  143. };
  144. /*
  145. ** An instance of the following structure encodes all information that can
  146. ** be gleaned from the CREATE VIRTUAL TABLE statement.
  147. **
  148. ** And all information loaded from the %_config table.
  149. **
  150. ** nAutomerge:
  151. ** The minimum number of segments that an auto-merge operation should
  152. ** attempt to merge together. A value of 1 sets the object to use the
  153. ** compile time default. Zero disables auto-merge altogether.
  154. **
  155. ** bContentlessDelete:
  156. ** True if the contentless_delete option was present in the CREATE
  157. ** VIRTUAL TABLE statement.
  158. **
  159. ** zContent:
  160. **
  161. ** zContentRowid:
  162. ** The value of the content_rowid= option, if one was specified. Or
  163. ** the string "rowid" otherwise. This text is not quoted - if it is
  164. ** used as part of an SQL statement it needs to be quoted appropriately.
  165. **
  166. ** zContentExprlist:
  167. **
  168. ** pzErrmsg:
  169. ** This exists in order to allow the fts5_index.c module to return a
  170. ** decent error message if it encounters a file-format version it does
  171. ** not understand.
  172. **
  173. ** bColumnsize:
  174. ** True if the %_docsize table is created.
  175. **
  176. ** bPrefixIndex:
  177. ** This is only used for debugging. If set to false, any prefix indexes
  178. ** are ignored. This value is configured using:
  179. **
  180. ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
  181. **
  182. ** bLocale:
  183. ** Set to true if locale=1 was specified when the table was created.
  184. */
  185. struct Fts5Config {
  186. sqlite3 *db; /* Database handle */
  187. Fts5Global *pGlobal; /* Global fts5 object for handle db */
  188. char *zDb; /* Database holding FTS index (e.g. "main") */
  189. char *zName; /* Name of FTS index */
  190. int nCol; /* Number of columns */
  191. char **azCol; /* Column names */
  192. u8 *abUnindexed; /* True for unindexed columns */
  193. int nPrefix; /* Number of prefix indexes */
  194. int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
  195. int eContent; /* An FTS5_CONTENT value */
  196. int bContentlessDelete; /* "contentless_delete=" option (dflt==0) */
  197. int bContentlessUnindexed; /* "contentless_unindexed=" option (dflt=0) */
  198. char *zContent; /* content table */
  199. char *zContentRowid; /* "content_rowid=" option value */
  200. int bColumnsize; /* "columnsize=" option value (dflt==1) */
  201. int bTokendata; /* "tokendata=" option value (dflt==0) */
  202. int bLocale; /* "locale=" option value (dflt==0) */
  203. int eDetail; /* FTS5_DETAIL_XXX value */
  204. char *zContentExprlist;
  205. Fts5TokenizerConfig t;
  206. int bLock; /* True when table is preparing statement */
  207. /* Values loaded from the %_config table */
  208. int iVersion; /* fts5 file format 'version' */
  209. int iCookie; /* Incremented when %_config is modified */
  210. int pgsz; /* Approximate page size used in %_data */
  211. int nAutomerge; /* 'automerge' setting */
  212. int nCrisisMerge; /* Maximum allowed segments per level */
  213. int nUsermerge; /* 'usermerge' setting */
  214. int nHashSize; /* Bytes of memory for in-memory hash */
  215. char *zRank; /* Name of rank function */
  216. char *zRankArgs; /* Arguments to rank function */
  217. int bSecureDelete; /* 'secure-delete' */
  218. int nDeleteMerge; /* 'deletemerge' */
  219. int bPrefixInsttoken; /* 'prefix-insttoken' */
  220. /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
  221. char **pzErrmsg;
  222. #ifdef SQLITE_DEBUG
  223. int bPrefixIndex; /* True to use prefix-indexes */
  224. #endif
  225. };
  226. /* Current expected value of %_config table 'version' field. And
  227. ** the expected version if the 'secure-delete' option has ever been
  228. ** set on the table. */
  229. #define FTS5_CURRENT_VERSION 4
  230. #define FTS5_CURRENT_VERSION_SECUREDELETE 5
  231. #define FTS5_CONTENT_NORMAL 0
  232. #define FTS5_CONTENT_NONE 1
  233. #define FTS5_CONTENT_EXTERNAL 2
  234. #define FTS5_CONTENT_UNINDEXED 3
  235. #define FTS5_DETAIL_FULL 0
  236. #define FTS5_DETAIL_NONE 1
  237. #define FTS5_DETAIL_COLUMNS 2
  238. #define FTS5_PATTERN_NONE 0
  239. #define FTS5_PATTERN_LIKE 65 /* matches SQLITE_INDEX_CONSTRAINT_LIKE */
  240. #define FTS5_PATTERN_GLOB 66 /* matches SQLITE_INDEX_CONSTRAINT_GLOB */
  241. int sqlite3Fts5ConfigParse(
  242. Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
  243. );
  244. void sqlite3Fts5ConfigFree(Fts5Config*);
  245. int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
  246. int sqlite3Fts5Tokenize(
  247. Fts5Config *pConfig, /* FTS5 Configuration object */
  248. int flags, /* FTS5_TOKENIZE_* flags */
  249. const char *pText, int nText, /* Text to tokenize */
  250. void *pCtx, /* Context passed to xToken() */
  251. int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
  252. );
  253. void sqlite3Fts5Dequote(char *z);
  254. /* Load the contents of the %_config table */
  255. int sqlite3Fts5ConfigLoad(Fts5Config*, int);
  256. /* Set the value of a single config attribute */
  257. int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
  258. int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
  259. void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...);
  260. /*
  261. ** End of interface to code in fts5_config.c.
  262. **************************************************************************/
  263. /**************************************************************************
  264. ** Interface to code in fts5_buffer.c.
  265. */
  266. /*
  267. ** Buffer object for the incremental building of string data.
  268. */
  269. typedef struct Fts5Buffer Fts5Buffer;
  270. struct Fts5Buffer {
  271. u8 *p;
  272. int n;
  273. int nSpace;
  274. };
  275. int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32);
  276. void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
  277. void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*);
  278. void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
  279. void sqlite3Fts5BufferFree(Fts5Buffer*);
  280. void sqlite3Fts5BufferZero(Fts5Buffer*);
  281. void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
  282. void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
  283. char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
  284. #define fts5BufferZero(x) sqlite3Fts5BufferZero(x)
  285. #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c)
  286. #define fts5BufferFree(a) sqlite3Fts5BufferFree(a)
  287. #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
  288. #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d)
  289. #define fts5BufferGrow(pRc,pBuf,nn) ( \
  290. (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \
  291. sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \
  292. )
  293. /* Write and decode big-endian 32-bit integer values */
  294. void sqlite3Fts5Put32(u8*, int);
  295. int sqlite3Fts5Get32(const u8*);
  296. #define FTS5_POS2COLUMN(iPos) (int)((iPos >> 32) & 0x7FFFFFFF)
  297. #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0x7FFFFFFF)
  298. typedef struct Fts5PoslistReader Fts5PoslistReader;
  299. struct Fts5PoslistReader {
  300. /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
  301. const u8 *a; /* Position list to iterate through */
  302. int n; /* Size of buffer at a[] in bytes */
  303. int i; /* Current offset in a[] */
  304. u8 bFlag; /* For client use (any custom purpose) */
  305. /* Output variables */
  306. u8 bEof; /* Set to true at EOF */
  307. i64 iPos; /* (iCol<<32) + iPos */
  308. };
  309. int sqlite3Fts5PoslistReaderInit(
  310. const u8 *a, int n, /* Poslist buffer to iterate through */
  311. Fts5PoslistReader *pIter /* Iterator object to initialize */
  312. );
  313. int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
  314. typedef struct Fts5PoslistWriter Fts5PoslistWriter;
  315. struct Fts5PoslistWriter {
  316. i64 iPrev;
  317. };
  318. int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
  319. void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64);
  320. int sqlite3Fts5PoslistNext64(
  321. const u8 *a, int n, /* Buffer containing poslist */
  322. int *pi, /* IN/OUT: Offset within a[] */
  323. i64 *piOff /* IN/OUT: Current offset */
  324. );
  325. /* Malloc utility */
  326. void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte);
  327. char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
  328. /* Character set tests (like isspace(), isalpha() etc.) */
  329. int sqlite3Fts5IsBareword(char t);
  330. /* Bucket of terms object used by the integrity-check in offsets=0 mode. */
  331. typedef struct Fts5Termset Fts5Termset;
  332. int sqlite3Fts5TermsetNew(Fts5Termset**);
  333. int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
  334. void sqlite3Fts5TermsetFree(Fts5Termset*);
  335. /*
  336. ** End of interface to code in fts5_buffer.c.
  337. **************************************************************************/
  338. /**************************************************************************
  339. ** Interface to code in fts5_index.c. fts5_index.c contains contains code
  340. ** to access the data stored in the %_data table.
  341. */
  342. typedef struct Fts5Index Fts5Index;
  343. typedef struct Fts5IndexIter Fts5IndexIter;
  344. struct Fts5IndexIter {
  345. i64 iRowid;
  346. const u8 *pData;
  347. int nData;
  348. u8 bEof;
  349. };
  350. #define sqlite3Fts5IterEof(x) ((x)->bEof)
  351. /*
  352. ** Values used as part of the flags argument passed to IndexQuery().
  353. */
  354. #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
  355. #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
  356. #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
  357. #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
  358. /* The following are used internally by the fts5_index.c module. They are
  359. ** defined here only to make it easier to avoid clashes with the flags
  360. ** above. */
  361. #define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
  362. #define FTS5INDEX_QUERY_NOOUTPUT 0x0020
  363. #define FTS5INDEX_QUERY_SKIPHASH 0x0040
  364. #define FTS5INDEX_QUERY_NOTOKENDATA 0x0080
  365. #define FTS5INDEX_QUERY_SCANONETERM 0x0100
  366. /*
  367. ** Create/destroy an Fts5Index object.
  368. */
  369. int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
  370. int sqlite3Fts5IndexClose(Fts5Index *p);
  371. /*
  372. ** Return a simple checksum value based on the arguments.
  373. */
  374. u64 sqlite3Fts5IndexEntryCksum(
  375. i64 iRowid,
  376. int iCol,
  377. int iPos,
  378. int iIdx,
  379. const char *pTerm,
  380. int nTerm
  381. );
  382. /*
  383. ** Argument p points to a buffer containing utf-8 text that is n bytes in
  384. ** size. Return the number of bytes in the nChar character prefix of the
  385. ** buffer, or 0 if there are less than nChar characters in total.
  386. */
  387. int sqlite3Fts5IndexCharlenToBytelen(
  388. const char *p,
  389. int nByte,
  390. int nChar
  391. );
  392. /*
  393. ** Open a new iterator to iterate though all rowids that match the
  394. ** specified token or token prefix.
  395. */
  396. int sqlite3Fts5IndexQuery(
  397. Fts5Index *p, /* FTS index to query */
  398. const char *pToken, int nToken, /* Token (or prefix) to query for */
  399. int flags, /* Mask of FTS5INDEX_QUERY_X flags */
  400. Fts5Colset *pColset, /* Match these columns only */
  401. Fts5IndexIter **ppIter /* OUT: New iterator object */
  402. );
  403. /*
  404. ** The various operations on open token or token prefix iterators opened
  405. ** using sqlite3Fts5IndexQuery().
  406. */
  407. int sqlite3Fts5IterNext(Fts5IndexIter*);
  408. int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
  409. /*
  410. ** Close an iterator opened by sqlite3Fts5IndexQuery().
  411. */
  412. void sqlite3Fts5IterClose(Fts5IndexIter*);
  413. /*
  414. ** Close the reader blob handle, if it is open.
  415. */
  416. void sqlite3Fts5IndexCloseReader(Fts5Index*);
  417. /*
  418. ** This interface is used by the fts5vocab module.
  419. */
  420. const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
  421. int sqlite3Fts5IterNextScan(Fts5IndexIter*);
  422. void *sqlite3Fts5StructureRef(Fts5Index*);
  423. void sqlite3Fts5StructureRelease(void*);
  424. int sqlite3Fts5StructureTest(Fts5Index*, void*);
  425. /*
  426. ** Used by xInstToken():
  427. */
  428. int sqlite3Fts5IterToken(
  429. Fts5IndexIter *pIndexIter,
  430. const char *pToken, int nToken,
  431. i64 iRowid,
  432. int iCol,
  433. int iOff,
  434. const char **ppOut, int *pnOut
  435. );
  436. /*
  437. ** Insert or remove data to or from the index. Each time a document is
  438. ** added to or removed from the index, this function is called one or more
  439. ** times.
  440. **
  441. ** For an insert, it must be called once for each token in the new document.
  442. ** If the operation is a delete, it must be called (at least) once for each
  443. ** unique token in the document with an iCol value less than zero. The iPos
  444. ** argument is ignored for a delete.
  445. */
  446. int sqlite3Fts5IndexWrite(
  447. Fts5Index *p, /* Index to write to */
  448. int iCol, /* Column token appears in (-ve -> delete) */
  449. int iPos, /* Position of token within column */
  450. const char *pToken, int nToken /* Token to add or remove to or from index */
  451. );
  452. /*
  453. ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
  454. ** document iDocid.
  455. */
  456. int sqlite3Fts5IndexBeginWrite(
  457. Fts5Index *p, /* Index to write to */
  458. int bDelete, /* True if current operation is a delete */
  459. i64 iDocid /* Docid to add or remove data from */
  460. );
  461. /*
  462. ** Flush any data stored in the in-memory hash tables to the database.
  463. ** Also close any open blob handles.
  464. */
  465. int sqlite3Fts5IndexSync(Fts5Index *p);
  466. /*
  467. ** Discard any data stored in the in-memory hash tables. Do not write it
  468. ** to the database. Additionally, assume that the contents of the %_data
  469. ** table may have changed on disk. So any in-memory caches of %_data
  470. ** records must be invalidated.
  471. */
  472. int sqlite3Fts5IndexRollback(Fts5Index *p);
  473. /*
  474. ** Get or set the "averages" values.
  475. */
  476. int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
  477. int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
  478. /*
  479. ** Functions called by the storage module as part of integrity-check.
  480. */
  481. int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum);
  482. /*
  483. ** Called during virtual module initialization to register UDF
  484. ** fts5_decode() with SQLite
  485. */
  486. int sqlite3Fts5IndexInit(sqlite3*);
  487. int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
  488. /*
  489. ** Return the total number of entries read from the %_data table by
  490. ** this connection since it was created.
  491. */
  492. int sqlite3Fts5IndexReads(Fts5Index *p);
  493. int sqlite3Fts5IndexReinit(Fts5Index *p);
  494. int sqlite3Fts5IndexOptimize(Fts5Index *p);
  495. int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
  496. int sqlite3Fts5IndexReset(Fts5Index *p);
  497. int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
  498. int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin);
  499. int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid);
  500. void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter*);
  501. /* Used to populate hash tables for xInstToken in detail=none/column mode. */
  502. int sqlite3Fts5IndexIterWriteTokendata(
  503. Fts5IndexIter*, const char*, int, i64 iRowid, int iCol, int iOff
  504. );
  505. /*
  506. ** End of interface to code in fts5_index.c.
  507. **************************************************************************/
  508. /**************************************************************************
  509. ** Interface to code in fts5_varint.c.
  510. */
  511. int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
  512. int sqlite3Fts5GetVarintLen(u32 iVal);
  513. u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
  514. int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
  515. #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&(b))
  516. #define fts5GetVarint sqlite3Fts5GetVarint
  517. #define fts5FastGetVarint32(a, iOff, nVal) { \
  518. nVal = (a)[iOff++]; \
  519. if( nVal & 0x80 ){ \
  520. iOff--; \
  521. iOff += fts5GetVarint32(&(a)[iOff], nVal); \
  522. } \
  523. }
  524. /*
  525. ** End of interface to code in fts5_varint.c.
  526. **************************************************************************/
  527. /**************************************************************************
  528. ** Interface to code in fts5_main.c.
  529. */
  530. /*
  531. ** Virtual-table object.
  532. */
  533. typedef struct Fts5Table Fts5Table;
  534. struct Fts5Table {
  535. sqlite3_vtab base; /* Base class used by SQLite core */
  536. Fts5Config *pConfig; /* Virtual table configuration */
  537. Fts5Index *pIndex; /* Full-text index */
  538. };
  539. int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig);
  540. Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);
  541. int sqlite3Fts5FlushToDisk(Fts5Table*);
  542. void sqlite3Fts5ClearLocale(Fts5Config *pConfig);
  543. void sqlite3Fts5SetLocale(Fts5Config *pConfig, const char *pLoc, int nLoc);
  544. int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal);
  545. int sqlite3Fts5DecodeLocaleValue(sqlite3_value *pVal,
  546. const char **ppText, int *pnText, const char **ppLoc, int *pnLoc
  547. );
  548. /*
  549. ** End of interface to code in fts5.c.
  550. **************************************************************************/
  551. /**************************************************************************
  552. ** Interface to code in fts5_hash.c.
  553. */
  554. typedef struct Fts5Hash Fts5Hash;
  555. /*
  556. ** Create a hash table, free a hash table.
  557. */
  558. int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize);
  559. void sqlite3Fts5HashFree(Fts5Hash*);
  560. int sqlite3Fts5HashWrite(
  561. Fts5Hash*,
  562. i64 iRowid, /* Rowid for this entry */
  563. int iCol, /* Column token appears in (-ve -> delete) */
  564. int iPos, /* Position of token within column */
  565. char bByte,
  566. const char *pToken, int nToken /* Token to add or remove to or from index */
  567. );
  568. /*
  569. ** Empty (but do not delete) a hash table.
  570. */
  571. void sqlite3Fts5HashClear(Fts5Hash*);
  572. /*
  573. ** Return true if the hash is empty, false otherwise.
  574. */
  575. int sqlite3Fts5HashIsEmpty(Fts5Hash*);
  576. int sqlite3Fts5HashQuery(
  577. Fts5Hash*, /* Hash table to query */
  578. int nPre,
  579. const char *pTerm, int nTerm, /* Query term */
  580. void **ppObj, /* OUT: Pointer to doclist for pTerm */
  581. int *pnDoclist /* OUT: Size of doclist in bytes */
  582. );
  583. int sqlite3Fts5HashScanInit(
  584. Fts5Hash*, /* Hash table to query */
  585. const char *pTerm, int nTerm /* Query prefix */
  586. );
  587. void sqlite3Fts5HashScanNext(Fts5Hash*);
  588. int sqlite3Fts5HashScanEof(Fts5Hash*);
  589. void sqlite3Fts5HashScanEntry(Fts5Hash *,
  590. const char **pzTerm, /* OUT: term (nul-terminated) */
  591. int *pnTerm, /* OUT: Size of term in bytes */
  592. const u8 **ppDoclist, /* OUT: pointer to doclist */
  593. int *pnDoclist /* OUT: size of doclist in bytes */
  594. );
  595. /*
  596. ** End of interface to code in fts5_hash.c.
  597. **************************************************************************/
  598. /**************************************************************************
  599. ** Interface to code in fts5_storage.c. fts5_storage.c contains contains
  600. ** code to access the data stored in the %_content and %_docsize tables.
  601. */
  602. #define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
  603. #define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
  604. #define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
  605. typedef struct Fts5Storage Fts5Storage;
  606. int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
  607. int sqlite3Fts5StorageClose(Fts5Storage *p);
  608. int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
  609. int sqlite3Fts5DropAll(Fts5Config*);
  610. int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
  611. int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int);
  612. int sqlite3Fts5StorageContentInsert(Fts5Storage *p, int, sqlite3_value**, i64*);
  613. int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64);
  614. int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg);
  615. int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
  616. void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
  617. int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
  618. int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
  619. int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
  620. int sqlite3Fts5StorageSync(Fts5Storage *p);
  621. int sqlite3Fts5StorageRollback(Fts5Storage *p);
  622. int sqlite3Fts5StorageConfigValue(
  623. Fts5Storage *p, const char*, sqlite3_value*, int
  624. );
  625. int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
  626. int sqlite3Fts5StorageRebuild(Fts5Storage *p);
  627. int sqlite3Fts5StorageOptimize(Fts5Storage *p);
  628. int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
  629. int sqlite3Fts5StorageReset(Fts5Storage *p);
  630. void sqlite3Fts5StorageReleaseDeleteRow(Fts5Storage*);
  631. int sqlite3Fts5StorageFindDeleteRow(Fts5Storage *p, i64 iDel);
  632. /*
  633. ** End of interface to code in fts5_storage.c.
  634. **************************************************************************/
  635. /**************************************************************************
  636. ** Interface to code in fts5_expr.c.
  637. */
  638. typedef struct Fts5Expr Fts5Expr;
  639. typedef struct Fts5ExprNode Fts5ExprNode;
  640. typedef struct Fts5Parse Fts5Parse;
  641. typedef struct Fts5Token Fts5Token;
  642. typedef struct Fts5ExprPhrase Fts5ExprPhrase;
  643. typedef struct Fts5ExprNearset Fts5ExprNearset;
  644. struct Fts5Token {
  645. const char *p; /* Token text (not NULL terminated) */
  646. int n; /* Size of buffer p in bytes */
  647. };
  648. /* Parse a MATCH expression. */
  649. int sqlite3Fts5ExprNew(
  650. Fts5Config *pConfig,
  651. int bPhraseToAnd,
  652. int iCol, /* Column on LHS of MATCH operator */
  653. const char *zExpr,
  654. Fts5Expr **ppNew,
  655. char **pzErr
  656. );
  657. int sqlite3Fts5ExprPattern(
  658. Fts5Config *pConfig,
  659. int bGlob,
  660. int iCol,
  661. const char *zText,
  662. Fts5Expr **pp
  663. );
  664. /*
  665. ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
  666. ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
  667. ** rc = sqlite3Fts5ExprNext(pExpr)
  668. ** ){
  669. ** // The document with rowid iRowid matches the expression!
  670. ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
  671. ** }
  672. */
  673. int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
  674. int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
  675. int sqlite3Fts5ExprEof(Fts5Expr*);
  676. i64 sqlite3Fts5ExprRowid(Fts5Expr*);
  677. void sqlite3Fts5ExprFree(Fts5Expr*);
  678. int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2);
  679. /* Called during startup to register a UDF with SQLite */
  680. int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
  681. int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
  682. int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
  683. int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
  684. typedef struct Fts5PoslistPopulator Fts5PoslistPopulator;
  685. Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int);
  686. int sqlite3Fts5ExprPopulatePoslists(
  687. Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int
  688. );
  689. void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64);
  690. int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
  691. int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
  692. int sqlite3Fts5ExprQueryToken(Fts5Expr*, int, int, const char**, int*);
  693. int sqlite3Fts5ExprInstToken(Fts5Expr*, i64, int, int, int, int, const char**, int*);
  694. void sqlite3Fts5ExprClearTokens(Fts5Expr*);
  695. /*******************************************
  696. ** The fts5_expr.c API above this point is used by the other hand-written
  697. ** C code in this module. The interfaces below this point are called by
  698. ** the parser code in fts5parse.y. */
  699. void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
  700. Fts5ExprNode *sqlite3Fts5ParseNode(
  701. Fts5Parse *pParse,
  702. int eType,
  703. Fts5ExprNode *pLeft,
  704. Fts5ExprNode *pRight,
  705. Fts5ExprNearset *pNear
  706. );
  707. Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
  708. Fts5Parse *pParse,
  709. Fts5ExprNode *pLeft,
  710. Fts5ExprNode *pRight
  711. );
  712. Fts5ExprPhrase *sqlite3Fts5ParseTerm(
  713. Fts5Parse *pParse,
  714. Fts5ExprPhrase *pPhrase,
  715. Fts5Token *pToken,
  716. int bPrefix
  717. );
  718. void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*);
  719. Fts5ExprNearset *sqlite3Fts5ParseNearset(
  720. Fts5Parse*,
  721. Fts5ExprNearset*,
  722. Fts5ExprPhrase*
  723. );
  724. Fts5Colset *sqlite3Fts5ParseColset(
  725. Fts5Parse*,
  726. Fts5Colset*,
  727. Fts5Token *
  728. );
  729. void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
  730. void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
  731. void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
  732. void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
  733. void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*);
  734. Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*);
  735. void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
  736. void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
  737. /*
  738. ** End of interface to code in fts5_expr.c.
  739. **************************************************************************/
  740. /**************************************************************************
  741. ** Interface to code in fts5_aux.c.
  742. */
  743. int sqlite3Fts5AuxInit(fts5_api*);
  744. /*
  745. ** End of interface to code in fts5_aux.c.
  746. **************************************************************************/
  747. /**************************************************************************
  748. ** Interface to code in fts5_tokenizer.c.
  749. */
  750. int sqlite3Fts5TokenizerInit(fts5_api*);
  751. int sqlite3Fts5TokenizerPattern(
  752. int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
  753. Fts5Tokenizer *pTok
  754. );
  755. int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig*);
  756. /*
  757. ** End of interface to code in fts5_tokenizer.c.
  758. **************************************************************************/
  759. /**************************************************************************
  760. ** Interface to code in fts5_vocab.c.
  761. */
  762. int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
  763. /*
  764. ** End of interface to code in fts5_vocab.c.
  765. **************************************************************************/
  766. /**************************************************************************
  767. ** Interface to automatically generated code in fts5_unicode2.c.
  768. */
  769. int sqlite3Fts5UnicodeIsdiacritic(int c);
  770. int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
  771. int sqlite3Fts5UnicodeCatParse(const char*, u8*);
  772. int sqlite3Fts5UnicodeCategory(u32 iCode);
  773. void sqlite3Fts5UnicodeAscii(u8*, u8*);
  774. /*
  775. ** End of interface to code in fts5_unicode2.c.
  776. **************************************************************************/
  777. #endif