fts5_index.c 290 KB


  1. /*
  2. ** 2014 May 31
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** Low level access to the FTS index stored in the database file. The
  14. ** routines in this file file implement all read and write access to the
  15. ** %_data table. Other parts of the system access this functionality via
  16. ** the interface defined in fts5Int.h.
  17. */
  18. #include "fts5Int.h"
  19. /*
  20. ** Overview:
  21. **
  22. ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
  23. ** As well as the main term index, there may be up to 31 prefix indexes.
  24. ** The format is similar to FTS3/4, except that:
  25. **
  26. ** * all segment b-tree leaf data is stored in fixed size page records
  27. ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
  28. ** taken to ensure it is possible to iterate in either direction through
  29. ** the entries in a doclist, or to seek to a specific entry within a
  30. ** doclist, without loading it into memory.
  31. **
  32. ** * large doclists that span many pages have associated "doclist index"
  33. ** records that contain a copy of the first rowid on each page spanned by
  34. ** the doclist. This is used to speed up seek operations, and merges of
  35. ** large doclists with very small doclists.
  36. **
  37. ** * extra fields in the "structure record" record the state of ongoing
  38. ** incremental merge operations.
  39. **
  40. */
  41. #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
  42. #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
  43. #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
  44. #define FTS5_MAIN_PREFIX '0'
  45. #if FTS5_MAX_PREFIX_INDEXES > 31
  46. # error "FTS5_MAX_PREFIX_INDEXES is too large"
  47. #endif
  48. #define FTS5_MAX_LEVEL 64
  49. /*
  50. ** There are two versions of the format used for the structure record:
  51. **
  52. ** 1. the legacy format, that may be read by all fts5 versions, and
  53. **
  54. ** 2. the V2 format, which is used by contentless_delete=1 databases.
  55. **
  56. ** Both begin with a 4-byte "configuration cookie" value. Then, a legacy
  57. ** format structure record contains a varint - the number of levels in
  58. ** the structure. Whereas a V2 structure record contains the constant
  59. ** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a
  60. ** varint has to be at least 16256 to begin with "0xFF". And the default
  61. ** maximum number of levels is 64.
  62. **
  63. ** See below for more on structure record formats.
  64. */
  65. #define FTS5_STRUCTURE_V2 "\xFF\x00\x00\x01"
  66. /*
  67. ** Details:
  68. **
  69. ** The %_data table managed by this module,
  70. **
  71. ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
  72. **
  73. ** , contains the following 6 types of records. See the comments surrounding
  74. ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
  75. ** assigned to each fo them.
  76. **
  77. ** 1. Structure Records:
  78. **
  79. ** The set of segments that make up an index - the index structure - are
  80. ** recorded in a single record within the %_data table. The record consists
  81. ** of a single 32-bit configuration cookie value followed by a list of
  82. ** SQLite varints.
  83. **
  84. ** If the structure record is a V2 record, the configuration cookie is
  85. ** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01].
  86. **
  87. ** Next, the record continues with three varints:
  88. **
  89. ** + number of levels,
  90. ** + total number of segments on all levels,
  91. ** + value of write counter.
  92. **
  93. ** Then, for each level from 0 to nMax:
  94. **
  95. ** + number of input segments in ongoing merge.
  96. ** + total number of segments in level.
  97. ** + for each segment from oldest to newest:
  98. ** + segment id (always > 0)
  99. ** + first leaf page number (often 1, always greater than 0)
  100. ** + final leaf page number
  101. **
  102. ** Then, for V2 structures only:
  103. **
  104. ** + lower origin counter value,
  105. ** + upper origin counter value,
  106. ** + the number of tombstone hash pages.
  107. **
  108. ** 2. The Averages Record:
  109. **
  110. ** A single record within the %_data table. The data is a list of varints.
  111. ** The first value is the number of rows in the index. Then, for each column
  112. ** from left to right, the total number of tokens in the column for all
  113. ** rows of the table.
  114. **
  115. ** 3. Segment leaves:
  116. **
  117. ** TERM/DOCLIST FORMAT:
  118. **
  119. ** Most of each segment leaf is taken up by term/doclist data. The
  120. ** general format of term/doclist, starting with the first term
  121. ** on the leaf page, is:
  122. **
  123. ** varint : size of first term
  124. ** blob: first term data
  125. ** doclist: first doclist
  126. ** zero-or-more {
  127. ** varint: number of bytes in common with previous term
  128. ** varint: number of bytes of new term data (nNew)
  129. ** blob: nNew bytes of new term data
  130. ** doclist: next doclist
  131. ** }
  132. **
  133. ** doclist format:
  134. **
  135. ** varint: first rowid
  136. ** poslist: first poslist
  137. ** zero-or-more {
  138. ** varint: rowid delta (always > 0)
  139. ** poslist: next poslist
  140. ** }
  141. **
  142. ** poslist format:
  143. **
  144. ** varint: size of poslist in bytes multiplied by 2, not including
  145. ** this field. Plus 1 if this entry carries the "delete" flag.
  146. ** collist: collist for column 0
  147. ** zero-or-more {
  148. ** 0x01 byte
  149. ** varint: column number (I)
  150. ** collist: collist for column I
  151. ** }
  152. **
  153. ** collist format:
  154. **
  155. ** varint: first offset + 2
  156. ** zero-or-more {
  157. ** varint: offset delta + 2
  158. ** }
  159. **
  160. ** PAGE FORMAT
  161. **
  162. ** Each leaf page begins with a 4-byte header containing 2 16-bit
  163. ** unsigned integer fields in big-endian format. They are:
  164. **
  165. ** * The byte offset of the first rowid on the page, if it exists
  166. ** and occurs before the first term (otherwise 0).
  167. **
  168. ** * The byte offset of the start of the page footer. If the page
  169. ** footer is 0 bytes in size, then this field is the same as the
  170. ** size of the leaf page in bytes.
  171. **
  172. ** The page footer consists of a single varint for each term located
  173. ** on the page. Each varint is the byte offset of the current term
  174. ** within the page, delta-compressed against the previous value. In
  175. ** other words, the first varint in the footer is the byte offset of
  176. ** the first term, the second is the byte offset of the second less that
  177. ** of the first, and so on.
  178. **
  179. ** The term/doclist format described above is accurate if the entire
  180. ** term/doclist data fits on a single leaf page. If this is not the case,
  181. ** the format is changed in two ways:
  182. **
  183. ** + if the first rowid on a page occurs before the first term, it
  184. ** is stored as a literal value:
  185. **
  186. ** varint: first rowid
  187. **
  188. ** + the first term on each page is stored in the same way as the
  189. ** very first term of the segment:
  190. **
  191. ** varint : size of first term
  192. ** blob: first term data
  193. **
  194. ** 5. Segment doclist indexes:
  195. **
  196. ** Doclist indexes are themselves b-trees, however they usually consist of
  197. ** a single leaf record only. The format of each doclist index leaf page
  198. ** is:
  199. **
  200. ** * Flags byte. Bits are:
  201. ** 0x01: Clear if leaf is also the root page, otherwise set.
  202. **
  203. ** * Page number of fts index leaf page. As a varint.
  204. **
  205. ** * First rowid on page indicated by previous field. As a varint.
  206. **
  207. ** * A list of varints, one for each subsequent termless page. A
  208. ** positive delta if the termless page contains at least one rowid,
  209. ** or an 0x00 byte otherwise.
  210. **
  211. ** Internal doclist index nodes are:
  212. **
  213. ** * Flags byte. Bits are:
  214. ** 0x01: Clear for root page, otherwise set.
  215. **
  216. ** * Page number of first child page. As a varint.
  217. **
  218. ** * Copy of first rowid on page indicated by previous field. As a varint.
  219. **
  220. ** * A list of delta-encoded varints - the first rowid on each subsequent
  221. ** child page.
  222. **
  223. ** 6. Tombstone Hash Page
  224. **
  225. ** These records are only ever present in contentless_delete=1 tables.
  226. ** There are zero or more of these associated with each segment. They
  227. ** are used to store the tombstone rowids for rows contained in the
  228. ** associated segments.
  229. **
  230. ** The set of nHashPg tombstone hash pages associated with a single
  231. ** segment together form a single hash table containing tombstone rowids.
  232. ** To find the page of the hash on which a key might be stored:
  233. **
  234. ** iPg = (rowid % nHashPg)
  235. **
  236. ** Then, within page iPg, which has nSlot slots:
  237. **
  238. ** iSlot = (rowid / nHashPg) % nSlot
  239. **
  240. ** Each tombstone hash page begins with an 8 byte header:
  241. **
  242. ** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8.
  243. ** 1-byte: rowid-0-tombstone flag. This flag is only valid on the
  244. ** first tombstone hash page for each segment (iPg=0). If set,
  245. ** the hash table contains rowid 0. If clear, it does not.
  246. ** Rowid 0 is handled specially.
  247. ** 2-bytes: unused.
  248. ** 4-bytes: Big-endian integer containing number of entries on page.
  249. **
  250. ** Following this are nSlot 4 or 8 byte slots (depending on the key-size
  251. ** in the first byte of the page header). The number of slots may be
  252. ** determined based on the size of the page record and the key-size:
  253. **
  254. ** nSlot = (nByte - 8) / key-size
  255. */
  256. /*
  257. ** Rowids for the averages and structure records in the %_data table.
  258. */
  259. #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
  260. #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
  261. /*
  262. ** Macros determining the rowids used by segment leaves and dlidx leaves
  263. ** and nodes. All nodes and leaves are stored in the %_data table with large
  264. ** positive rowids.
  265. **
  266. ** Each segment has a unique non-zero 16-bit id.
  267. **
  268. ** The rowid for each segment leaf is found by passing the segment id and
  269. ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
  270. ** sequentially starting from 1.
  271. */
  272. #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
  273. #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
  274. #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
  275. #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
  276. #define fts5_dri(segid, dlidx, height, pgno) ( \
  277. ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
  278. ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
  279. ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
  280. ((i64)(pgno)) \
  281. )
  282. #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
  283. #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
  284. #define FTS5_TOMBSTONE_ROWID(segid,ipg) fts5_dri(segid+(1<<16), 0, 0, ipg)
  285. #ifdef SQLITE_DEBUG
  286. int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
  287. #endif
  288. /*
  289. ** Each time a blob is read from the %_data table, it is padded with this
  290. ** many zero bytes. This makes it easier to decode the various record formats
  291. ** without overreading if the records are corrupt.
  292. */
  293. #define FTS5_DATA_ZERO_PADDING 8
  294. #define FTS5_DATA_PADDING 20
  295. typedef struct Fts5Data Fts5Data;
  296. typedef struct Fts5DlidxIter Fts5DlidxIter;
  297. typedef struct Fts5DlidxLvl Fts5DlidxLvl;
  298. typedef struct Fts5DlidxWriter Fts5DlidxWriter;
  299. typedef struct Fts5Iter Fts5Iter;
  300. typedef struct Fts5PageWriter Fts5PageWriter;
  301. typedef struct Fts5SegIter Fts5SegIter;
  302. typedef struct Fts5DoclistIter Fts5DoclistIter;
  303. typedef struct Fts5SegWriter Fts5SegWriter;
  304. typedef struct Fts5Structure Fts5Structure;
  305. typedef struct Fts5StructureLevel Fts5StructureLevel;
  306. typedef struct Fts5StructureSegment Fts5StructureSegment;
  307. typedef struct Fts5TokenDataIter Fts5TokenDataIter;
  308. typedef struct Fts5TokenDataMap Fts5TokenDataMap;
  309. typedef struct Fts5TombstoneArray Fts5TombstoneArray;
  310. struct Fts5Data {
  311. u8 *p; /* Pointer to buffer containing record */
  312. int nn; /* Size of record in bytes */
  313. int szLeaf; /* Size of leaf without page-index */
  314. };
  315. /*
  316. ** One object per %_data table.
  317. **
  318. ** nContentlessDelete:
  319. ** The number of contentless delete operations since the most recent
  320. ** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked
  321. ** so that extra auto-merge work can be done by fts5IndexFlush() to
  322. ** account for the delete operations.
  323. */
  324. struct Fts5Index {
  325. Fts5Config *pConfig; /* Virtual table configuration */
  326. char *zDataTbl; /* Name of %_data table */
  327. int nWorkUnit; /* Leaf pages in a "unit" of work */
  328. /*
  329. ** Variables related to the accumulation of tokens and doclists within the
  330. ** in-memory hash tables before they are flushed to disk.
  331. */
  332. Fts5Hash *pHash; /* Hash table for in-memory data */
  333. int nPendingData; /* Current bytes of pending data */
  334. i64 iWriteRowid; /* Rowid for current doc being written */
  335. int bDelete; /* Current write is a delete */
  336. int nContentlessDelete; /* Number of contentless delete ops */
  337. int nPendingRow; /* Number of INSERT in hash table */
  338. /* Error state. */
  339. int rc; /* Current error code */
  340. int flushRc;
  341. /* State used by the fts5DataXXX() functions. */
  342. sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
  343. sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
  344. sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
  345. sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
  346. sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
  347. sqlite3_stmt *pIdxSelect;
  348. sqlite3_stmt *pIdxNextSelect;
  349. int nRead; /* Total number of blocks read */
  350. sqlite3_stmt *pDeleteFromIdx;
  351. sqlite3_stmt *pDataVersion;
  352. i64 iStructVersion; /* data_version when pStruct read */
  353. Fts5Structure *pStruct; /* Current db structure (or NULL) */
  354. };
  355. struct Fts5DoclistIter {
  356. u8 *aEof; /* Pointer to 1 byte past end of doclist */
  357. /* Output variables. aPoslist==0 at EOF */
  358. i64 iRowid;
  359. u8 *aPoslist;
  360. int nPoslist;
  361. int nSize;
  362. };
  363. /*
  364. ** The contents of the "structure" record for each index are represented
  365. ** using an Fts5Structure record in memory. Which uses instances of the
  366. ** other Fts5StructureXXX types as components.
  367. **
  368. ** nOriginCntr:
  369. ** This value is set to non-zero for structure records created for
  370. ** contentlessdelete=1 tables only. In that case it represents the
  371. ** origin value to apply to the next top-level segment created.
  372. */
  373. struct Fts5StructureSegment {
  374. int iSegid; /* Segment id */
  375. int pgnoFirst; /* First leaf page number in segment */
  376. int pgnoLast; /* Last leaf page number in segment */
  377. /* contentlessdelete=1 tables only: */
  378. u64 iOrigin1;
  379. u64 iOrigin2;
  380. int nPgTombstone; /* Number of tombstone hash table pages */
  381. u64 nEntryTombstone; /* Number of tombstone entries that "count" */
  382. u64 nEntry; /* Number of rows in this segment */
  383. };
  384. struct Fts5StructureLevel {
  385. int nMerge; /* Number of segments in incr-merge */
  386. int nSeg; /* Total number of segments on level */
  387. Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
  388. };
  389. struct Fts5Structure {
  390. int nRef; /* Object reference count */
  391. u64 nWriteCounter; /* Total leaves written to level 0 */
  392. u64 nOriginCntr; /* Origin value for next top-level segment */
  393. int nSegment; /* Total segments in this structure */
  394. int nLevel; /* Number of levels in this index */
  395. Fts5StructureLevel aLevel[FLEXARRAY]; /* Array of nLevel level objects */
  396. };
  397. /* Size (in bytes) of an Fts5Structure object holding up to N levels */
  398. #define SZ_FTS5STRUCTURE(N) \
  399. (offsetof(Fts5Structure,aLevel) + (N)*sizeof(Fts5StructureLevel))
  400. /*
  401. ** An object of type Fts5SegWriter is used to write to segments.
  402. */
  403. struct Fts5PageWriter {
  404. int pgno; /* Page number for this page */
  405. int iPrevPgidx; /* Previous value written into pgidx */
  406. Fts5Buffer buf; /* Buffer containing leaf data */
  407. Fts5Buffer pgidx; /* Buffer containing page-index */
  408. Fts5Buffer term; /* Buffer containing previous term on page */
  409. };
  410. struct Fts5DlidxWriter {
  411. int pgno; /* Page number for this page */
  412. int bPrevValid; /* True if iPrev is valid */
  413. i64 iPrev; /* Previous rowid value written to page */
  414. Fts5Buffer buf; /* Buffer containing page data */
  415. };
  416. struct Fts5SegWriter {
  417. int iSegid; /* Segid to write to */
  418. Fts5PageWriter writer; /* PageWriter object */
  419. i64 iPrevRowid; /* Previous rowid written to current leaf */
  420. u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
  421. u8 bFirstRowidInPage; /* True if next rowid is first in page */
  422. /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
  423. u8 bFirstTermInPage; /* True if next term will be first in leaf */
  424. int nLeafWritten; /* Number of leaf pages written */
  425. int nEmpty; /* Number of contiguous term-less nodes */
  426. int nDlidx; /* Allocated size of aDlidx[] array */
  427. Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
  428. /* Values to insert into the %_idx table */
  429. Fts5Buffer btterm; /* Next term to insert into %_idx table */
  430. int iBtPage; /* Page number corresponding to btterm */
  431. };
  432. typedef struct Fts5CResult Fts5CResult;
  433. struct Fts5CResult {
  434. u16 iFirst; /* aSeg[] index of firstest iterator */
  435. u8 bTermEq; /* True if the terms are equal */
  436. };
  437. /*
  438. ** Object for iterating through a single segment, visiting each term/rowid
  439. ** pair in the segment.
  440. **
  441. ** pSeg:
  442. ** The segment to iterate through.
  443. **
  444. ** iLeafPgno:
  445. ** Current leaf page number within segment.
  446. **
  447. ** iLeafOffset:
  448. ** Byte offset within the current leaf that is the first byte of the
  449. ** position list data (one byte passed the position-list size field).
  450. **
  451. ** pLeaf:
  452. ** Buffer containing current leaf page data. Set to NULL at EOF.
  453. **
  454. ** iTermLeafPgno, iTermLeafOffset:
  455. ** Leaf page number containing the last term read from the segment. And
  456. ** the offset immediately following the term data.
  457. **
  458. ** flags:
  459. ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
  460. **
  461. ** FTS5_SEGITER_ONETERM:
  462. ** If set, set the iterator to point to EOF after the current doclist
  463. ** has been exhausted. Do not proceed to the next term in the segment.
  464. **
  465. ** FTS5_SEGITER_REVERSE:
  466. ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
  467. ** it is set, iterate through rowid in descending order instead of the
  468. ** default ascending order.
  469. **
  470. ** iRowidOffset/nRowidOffset/aRowidOffset:
  471. ** These are used if the FTS5_SEGITER_REVERSE flag is set.
  472. **
  473. ** For each rowid on the page corresponding to the current term, the
  474. ** corresponding aRowidOffset[] entry is set to the byte offset of the
  475. ** start of the "position-list-size" field within the page.
  476. **
  477. ** iTermIdx:
  478. ** Index of current term on iTermLeafPgno.
  479. **
  480. ** apTombstone/nTombstone:
  481. ** These are used for contentless_delete=1 tables only. When the cursor
  482. ** is first allocated, the apTombstone[] array is allocated so that it
  483. ** is large enough for all tombstones hash pages associated with the
  484. ** segment. The pages themselves are loaded lazily from the database as
  485. ** they are required.
  486. */
  487. struct Fts5SegIter {
  488. Fts5StructureSegment *pSeg; /* Segment to iterate through */
  489. int flags; /* Mask of configuration flags */
  490. int iLeafPgno; /* Current leaf page number */
  491. Fts5Data *pLeaf; /* Current leaf data */
  492. Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
  493. i64 iLeafOffset; /* Byte offset within current leaf */
  494. Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */
  495. /* Next method */
  496. void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
  497. /* The page and offset from which the current term was read. The offset
  498. ** is the offset of the first rowid in the current doclist. */
  499. int iTermLeafPgno;
  500. int iTermLeafOffset;
  501. int iPgidxOff; /* Next offset in pgidx */
  502. int iEndofDoclist;
  503. /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  504. int iRowidOffset; /* Current entry in aRowidOffset[] */
  505. int nRowidOffset; /* Allocated size of aRowidOffset[] array */
  506. int *aRowidOffset; /* Array of offset to rowid fields */
  507. Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
  508. /* Variables populated based on current entry. */
  509. Fts5Buffer term; /* Current term */
  510. i64 iRowid; /* Current rowid */
  511. int nPos; /* Number of bytes in current position list */
  512. u8 bDel; /* True if the delete flag is set */
  513. };
  514. /*
  515. ** Array of tombstone pages. Reference counted.
  516. */
  517. struct Fts5TombstoneArray {
  518. int nRef; /* Number of pointers to this object */
  519. int nTombstone;
  520. Fts5Data *apTombstone[FLEXARRAY]; /* Array of tombstone pages */
  521. };
  522. /* Size (in bytes) of an Fts5TombstoneArray holding up to N tombstones */
  523. #define SZ_FTS5TOMBSTONEARRAY(N) \
  524. (offsetof(Fts5TombstoneArray,apTombstone)+(N)*sizeof(Fts5Data*))
  525. /*
  526. ** Argument is a pointer to an Fts5Data structure that contains a
  527. ** leaf page.
  528. */
  529. #define ASSERT_SZLEAF_OK(x) assert( \
  530. (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
  531. )
  532. #define FTS5_SEGITER_ONETERM 0x01
  533. #define FTS5_SEGITER_REVERSE 0x02
  534. /*
  535. ** Argument is a pointer to an Fts5Data structure that contains a leaf
  536. ** page. This macro evaluates to true if the leaf contains no terms, or
  537. ** false if it contains at least one term.
  538. */
  539. #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
  540. #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
  541. #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
  542. /*
  543. ** Object for iterating through the merged results of one or more segments,
  544. ** visiting each term/rowid pair in the merged data.
  545. **
  546. ** nSeg is always a power of two greater than or equal to the number of
  547. ** segments that this object is merging data from. Both the aSeg[] and
  548. ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
  549. ** with zeroed objects - these are handled as if they were iterators opened
  550. ** on empty segments.
  551. **
  552. ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
  553. ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
  554. ** comparison in this context is the index of the iterator that currently
  555. ** points to the smaller term/rowid combination. Iterators at EOF are
  556. ** considered to be greater than all other iterators.
  557. **
  558. ** aFirst[1] contains the index in aSeg[] of the iterator that points to
  559. ** the smallest key overall. aFirst[0] is unused.
  560. **
  561. ** poslist:
  562. ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
  563. ** There is no way to tell if this is populated or not.
  564. **
  565. ** pColset:
  566. ** If not NULL, points to an object containing a set of column indices.
  567. ** Only matches that occur in one of these columns will be returned.
  568. ** The Fts5Iter does not own the Fts5Colset object, and so it is not
  569. ** freed when the iterator is closed - it is owned by the upper layer.
  570. */
  571. struct Fts5Iter {
  572. Fts5IndexIter base; /* Base class containing output vars */
  573. Fts5TokenDataIter *pTokenDataIter;
  574. Fts5Index *pIndex; /* Index that owns this iterator */
  575. Fts5Buffer poslist; /* Buffer containing current poslist */
  576. Fts5Colset *pColset; /* Restrict matches to these columns */
  577. /* Invoked to set output variables. */
  578. void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
  579. int nSeg; /* Size of aSeg[] array */
  580. int bRev; /* True to iterate in reverse order */
  581. u8 bSkipEmpty; /* True to skip deleted entries */
  582. i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
  583. Fts5CResult *aFirst; /* Current merge state (see above) */
  584. Fts5SegIter aSeg[FLEXARRAY]; /* Array of segment iterators */
  585. };
  586. /* Size (in bytes) of an Fts5Iter object holding up to N segment iterators */
  587. #define SZ_FTS5ITER(N) (offsetof(Fts5Iter,aSeg)+(N)*sizeof(Fts5SegIter))
  588. /*
  589. ** An instance of the following type is used to iterate through the contents
  590. ** of a doclist-index record.
  591. **
  592. ** pData:
  593. ** Record containing the doclist-index data.
  594. **
  595. ** bEof:
  596. ** Set to true once iterator has reached EOF.
  597. **
  598. ** iOff:
  599. ** Set to the current offset within record pData.
  600. */
  601. struct Fts5DlidxLvl {
  602. Fts5Data *pData; /* Data for current page of this level */
  603. int iOff; /* Current offset into pData */
  604. int bEof; /* At EOF already */
  605. int iFirstOff; /* Used by reverse iterators */
  606. /* Output variables */
  607. int iLeafPgno; /* Page number of current leaf page */
  608. i64 iRowid; /* First rowid on leaf iLeafPgno */
  609. };
  610. struct Fts5DlidxIter {
  611. int nLvl;
  612. int iSegid;
  613. Fts5DlidxLvl aLvl[FLEXARRAY];
  614. };
  615. /* Size (in bytes) of an Fts5DlidxIter object with up to N levels */
  616. #define SZ_FTS5DLIDXITER(N) \
  617. (offsetof(Fts5DlidxIter,aLvl)+(N)*sizeof(Fts5DlidxLvl))
  618. static void fts5PutU16(u8 *aOut, u16 iVal){
  619. aOut[0] = (iVal>>8);
  620. aOut[1] = (iVal&0xFF);
  621. }
  622. static u16 fts5GetU16(const u8 *aIn){
  623. return ((u16)aIn[0] << 8) + aIn[1];
  624. }
  625. /*
  626. ** The only argument points to a buffer at least 8 bytes in size. This
  627. ** function interprets the first 8 bytes of the buffer as a 64-bit big-endian
  628. ** unsigned integer and returns the result.
  629. */
  630. static u64 fts5GetU64(u8 *a){
  631. return ((u64)a[0] << 56)
  632. + ((u64)a[1] << 48)
  633. + ((u64)a[2] << 40)
  634. + ((u64)a[3] << 32)
  635. + ((u64)a[4] << 24)
  636. + ((u64)a[5] << 16)
  637. + ((u64)a[6] << 8)
  638. + ((u64)a[7] << 0);
  639. }
  640. /*
  641. ** The only argument points to a buffer at least 4 bytes in size. This
  642. ** function interprets the first 4 bytes of the buffer as a 32-bit big-endian
  643. ** unsigned integer and returns the result.
  644. */
  645. static u32 fts5GetU32(const u8 *a){
  646. return ((u32)a[0] << 24)
  647. + ((u32)a[1] << 16)
  648. + ((u32)a[2] << 8)
  649. + ((u32)a[3] << 0);
  650. }
  651. /*
  652. ** Write iVal, formated as a 64-bit big-endian unsigned integer, to the
  653. ** buffer indicated by the first argument.
  654. */
  655. static void fts5PutU64(u8 *a, u64 iVal){
  656. a[0] = ((iVal >> 56) & 0xFF);
  657. a[1] = ((iVal >> 48) & 0xFF);
  658. a[2] = ((iVal >> 40) & 0xFF);
  659. a[3] = ((iVal >> 32) & 0xFF);
  660. a[4] = ((iVal >> 24) & 0xFF);
  661. a[5] = ((iVal >> 16) & 0xFF);
  662. a[6] = ((iVal >> 8) & 0xFF);
  663. a[7] = ((iVal >> 0) & 0xFF);
  664. }
  665. /*
  666. ** Write iVal, formated as a 32-bit big-endian unsigned integer, to the
  667. ** buffer indicated by the first argument.
  668. */
  669. static void fts5PutU32(u8 *a, u32 iVal){
  670. a[0] = ((iVal >> 24) & 0xFF);
  671. a[1] = ((iVal >> 16) & 0xFF);
  672. a[2] = ((iVal >> 8) & 0xFF);
  673. a[3] = ((iVal >> 0) & 0xFF);
  674. }
  675. /*
  676. ** Allocate and return a buffer at least nByte bytes in size.
  677. **
  678. ** If an OOM error is encountered, return NULL and set the error code in
  679. ** the Fts5Index handle passed as the first argument.
  680. */
  681. static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
  682. return sqlite3Fts5MallocZero(&p->rc, nByte);
  683. }
  684. /*
  685. ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
  686. **
  687. ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
  688. ** +ve if pRight is smaller than pLeft. In other words:
  689. **
  690. ** res = *pLeft - *pRight
  691. */
  692. #ifdef SQLITE_DEBUG
  693. static int fts5BufferCompareBlob(
  694. Fts5Buffer *pLeft, /* Left hand side of comparison */
  695. const u8 *pRight, int nRight /* Right hand side of comparison */
  696. ){
  697. int nCmp = MIN(pLeft->n, nRight);
  698. int res = memcmp(pLeft->p, pRight, nCmp);
  699. return (res==0 ? (pLeft->n - nRight) : res);
  700. }
  701. #endif
  702. /*
  703. ** Compare the contents of the two buffers using memcmp(). If one buffer
  704. ** is a prefix of the other, it is considered the lesser.
  705. **
  706. ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
  707. ** +ve if pRight is smaller than pLeft. In other words:
  708. **
  709. ** res = *pLeft - *pRight
  710. */
  711. static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
  712. int nCmp, res;
  713. nCmp = MIN(pLeft->n, pRight->n);
  714. assert( nCmp<=0 || pLeft->p!=0 );
  715. assert( nCmp<=0 || pRight->p!=0 );
  716. res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
  717. return (res==0 ? (pLeft->n - pRight->n) : res);
  718. }
  719. static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
  720. int ret;
  721. fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
  722. return ret;
  723. }
  724. /*
  725. ** Close the read-only blob handle, if it is open.
  726. */
  727. static void fts5IndexCloseReader(Fts5Index *p){
  728. if( p->pReader ){
  729. int rc;
  730. sqlite3_blob *pReader = p->pReader;
  731. p->pReader = 0;
  732. rc = sqlite3_blob_close(pReader);
  733. if( p->rc==SQLITE_OK ) p->rc = rc;
  734. }
  735. }
  736. /*
  737. ** Retrieve a record from the %_data table.
  738. **
  739. ** If an error occurs, NULL is returned and an error left in the
  740. ** Fts5Index object.
  741. */
  742. static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
  743. Fts5Data *pRet = 0;
  744. if( p->rc==SQLITE_OK ){
  745. int rc = SQLITE_OK;
  746. if( p->pReader ){
  747. /* This call may return SQLITE_ABORT if there has been a savepoint
  748. ** rollback since it was last used. In this case a new blob handle
  749. ** is required. */
  750. sqlite3_blob *pBlob = p->pReader;
  751. p->pReader = 0;
  752. rc = sqlite3_blob_reopen(pBlob, iRowid);
  753. assert( p->pReader==0 );
  754. p->pReader = pBlob;
  755. if( rc!=SQLITE_OK ){
  756. fts5IndexCloseReader(p);
  757. }
  758. if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
  759. }
  760. /* If the blob handle is not open at this point, open it and seek
  761. ** to the requested entry. */
  762. if( p->pReader==0 && rc==SQLITE_OK ){
  763. Fts5Config *pConfig = p->pConfig;
  764. rc = sqlite3_blob_open(pConfig->db,
  765. pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
  766. );
  767. }
  768. /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
  769. ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
  770. ** All the reasons those functions might return SQLITE_ERROR - missing
  771. ** table, missing row, non-blob/text in block column - indicate
  772. ** backing store corruption. */
  773. if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
  774. if( rc==SQLITE_OK ){
  775. u8 *aOut = 0; /* Read blob data into this buffer */
  776. int nByte = sqlite3_blob_bytes(p->pReader);
  777. int szData = (sizeof(Fts5Data) + 7) & ~7;
  778. sqlite3_int64 nAlloc = szData + nByte + FTS5_DATA_PADDING;
  779. pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
  780. if( pRet ){
  781. pRet->nn = nByte;
  782. aOut = pRet->p = (u8*)pRet + szData;
  783. }else{
  784. rc = SQLITE_NOMEM;
  785. }
  786. if( rc==SQLITE_OK ){
  787. rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
  788. }
  789. if( rc!=SQLITE_OK ){
  790. sqlite3_free(pRet);
  791. pRet = 0;
  792. }else{
  793. /* TODO1: Fix this */
  794. pRet->p[nByte] = 0x00;
  795. pRet->p[nByte+1] = 0x00;
  796. pRet->szLeaf = fts5GetU16(&pRet->p[2]);
  797. }
  798. }
  799. p->rc = rc;
  800. p->nRead++;
  801. }
  802. assert( (pRet==0)==(p->rc!=SQLITE_OK) );
  803. assert( pRet==0 || EIGHT_BYTE_ALIGNMENT( pRet->p ) );
  804. return pRet;
  805. }
  806. /*
  807. ** Release a reference to data record returned by an earlier call to
  808. ** fts5DataRead().
  809. */
  810. static void fts5DataRelease(Fts5Data *pData){
  811. sqlite3_free(pData);
  812. }
  813. static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
  814. Fts5Data *pRet = fts5DataRead(p, iRowid);
  815. if( pRet ){
  816. if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
  817. p->rc = FTS5_CORRUPT;
  818. fts5DataRelease(pRet);
  819. pRet = 0;
  820. }
  821. }
  822. return pRet;
  823. }
  824. static int fts5IndexPrepareStmt(
  825. Fts5Index *p,
  826. sqlite3_stmt **ppStmt,
  827. char *zSql
  828. ){
  829. if( p->rc==SQLITE_OK ){
  830. if( zSql ){
  831. int rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
  832. SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
  833. ppStmt, 0);
  834. /* If this prepare() call fails with SQLITE_ERROR, then one of the
  835. ** %_idx or %_data tables has been removed or modified. Call this
  836. ** corruption. */
  837. p->rc = (rc==SQLITE_ERROR ? SQLITE_CORRUPT : rc);
  838. }else{
  839. p->rc = SQLITE_NOMEM;
  840. }
  841. }
  842. sqlite3_free(zSql);
  843. return p->rc;
  844. }
  845. /*
  846. ** INSERT OR REPLACE a record into the %_data table.
  847. */
  848. static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
  849. if( p->rc!=SQLITE_OK ) return;
  850. if( p->pWriter==0 ){
  851. Fts5Config *pConfig = p->pConfig;
  852. fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
  853. "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
  854. pConfig->zDb, pConfig->zName
  855. ));
  856. if( p->rc ) return;
  857. }
  858. sqlite3_bind_int64(p->pWriter, 1, iRowid);
  859. sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
  860. sqlite3_step(p->pWriter);
  861. p->rc = sqlite3_reset(p->pWriter);
  862. sqlite3_bind_null(p->pWriter, 2);
  863. }
  864. /*
  865. ** Execute the following SQL:
  866. **
  867. ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
  868. */
  869. static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
  870. if( p->rc!=SQLITE_OK ) return;
  871. if( p->pDeleter==0 ){
  872. Fts5Config *pConfig = p->pConfig;
  873. char *zSql = sqlite3_mprintf(
  874. "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
  875. pConfig->zDb, pConfig->zName
  876. );
  877. if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
  878. }
  879. sqlite3_bind_int64(p->pDeleter, 1, iFirst);
  880. sqlite3_bind_int64(p->pDeleter, 2, iLast);
  881. sqlite3_step(p->pDeleter);
  882. p->rc = sqlite3_reset(p->pDeleter);
  883. }
  884. /*
  885. ** Remove all records associated with segment iSegid.
  886. */
  887. static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){
  888. int iSegid = pSeg->iSegid;
  889. i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
  890. i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
  891. fts5DataDelete(p, iFirst, iLast);
  892. if( pSeg->nPgTombstone ){
  893. i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0);
  894. i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1);
  895. fts5DataDelete(p, iTomb1, iTomb2);
  896. }
  897. if( p->pIdxDeleter==0 ){
  898. Fts5Config *pConfig = p->pConfig;
  899. fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
  900. "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
  901. pConfig->zDb, pConfig->zName
  902. ));
  903. }
  904. if( p->rc==SQLITE_OK ){
  905. sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
  906. sqlite3_step(p->pIdxDeleter);
  907. p->rc = sqlite3_reset(p->pIdxDeleter);
  908. }
  909. }
  910. /*
  911. ** Release a reference to an Fts5Structure object returned by an earlier
  912. ** call to fts5StructureRead() or fts5StructureDecode().
  913. */
  914. static void fts5StructureRelease(Fts5Structure *pStruct){
  915. if( pStruct && 0>=(--pStruct->nRef) ){
  916. int i;
  917. assert( pStruct->nRef==0 );
  918. for(i=0; i<pStruct->nLevel; i++){
  919. sqlite3_free(pStruct->aLevel[i].aSeg);
  920. }
  921. sqlite3_free(pStruct);
  922. }
  923. }
  924. static void fts5StructureRef(Fts5Structure *pStruct){
  925. pStruct->nRef++;
  926. }
  927. void *sqlite3Fts5StructureRef(Fts5Index *p){
  928. fts5StructureRef(p->pStruct);
  929. return (void*)p->pStruct;
  930. }
  931. void sqlite3Fts5StructureRelease(void *p){
  932. if( p ){
  933. fts5StructureRelease((Fts5Structure*)p);
  934. }
  935. }
  936. int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
  937. if( p->pStruct!=(Fts5Structure*)pStruct ){
  938. return SQLITE_ABORT;
  939. }
  940. return SQLITE_OK;
  941. }
  942. /*
  943. ** Ensure that structure object (*pp) is writable.
  944. **
  945. ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
  946. ** an error occurs, (*pRc) is set to an SQLite error code before returning.
  947. */
  948. static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
  949. Fts5Structure *p = *pp;
  950. if( *pRc==SQLITE_OK && p->nRef>1 ){
  951. i64 nByte = SZ_FTS5STRUCTURE(p->nLevel);
  952. Fts5Structure *pNew;
  953. pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
  954. if( pNew ){
  955. int i;
  956. memcpy(pNew, p, nByte);
  957. for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
  958. for(i=0; i<p->nLevel; i++){
  959. Fts5StructureLevel *pLvl = &pNew->aLevel[i];
  960. nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
  961. pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
  962. if( pLvl->aSeg==0 ){
  963. for(i=0; i<p->nLevel; i++){
  964. sqlite3_free(pNew->aLevel[i].aSeg);
  965. }
  966. sqlite3_free(pNew);
  967. return;
  968. }
  969. memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
  970. }
  971. p->nRef--;
  972. pNew->nRef = 1;
  973. }
  974. *pp = pNew;
  975. }
  976. }
  977. /*
  978. ** Deserialize and return the structure record currently stored in serialized
  979. ** form within buffer pData/nData.
  980. **
  981. ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
  982. ** are over-allocated by one slot. This allows the structure contents
  983. ** to be more easily edited.
  984. **
  985. ** If an error occurs, *ppOut is set to NULL and an SQLite error code
  986. ** returned. Otherwise, *ppOut is set to point to the new object and
  987. ** SQLITE_OK returned.
  988. */
  989. static int fts5StructureDecode(
  990. const u8 *pData, /* Buffer containing serialized structure */
  991. int nData, /* Size of buffer pData in bytes */
  992. int *piCookie, /* Configuration cookie value */
  993. Fts5Structure **ppOut /* OUT: Deserialized object */
  994. ){
  995. int rc = SQLITE_OK;
  996. int i = 0;
  997. int iLvl;
  998. int nLevel = 0;
  999. int nSegment = 0;
  1000. sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
  1001. Fts5Structure *pRet = 0; /* Structure object to return */
  1002. int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */
  1003. u64 nOriginCntr = 0; /* Largest origin value seen so far */
  1004. /* Grab the cookie value */
  1005. if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
  1006. i = 4;
  1007. /* Check if this is a V2 structure record. Set bStructureV2 if it is. */
  1008. if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2, 4) ){
  1009. i += 4;
  1010. bStructureV2 = 1;
  1011. }
  1012. /* Read the total number of levels and segments from the start of the
  1013. ** structure record. */
  1014. i += fts5GetVarint32(&pData[i], nLevel);
  1015. i += fts5GetVarint32(&pData[i], nSegment);
  1016. if( nLevel>FTS5_MAX_SEGMENT || nLevel<0
  1017. || nSegment>FTS5_MAX_SEGMENT || nSegment<0
  1018. ){
  1019. return FTS5_CORRUPT;
  1020. }
  1021. nByte = SZ_FTS5STRUCTURE(nLevel);
  1022. pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
  1023. if( pRet ){
  1024. pRet->nRef = 1;
  1025. pRet->nLevel = nLevel;
  1026. pRet->nSegment = nSegment;
  1027. i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
  1028. for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
  1029. Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
  1030. int nTotal = 0;
  1031. int iSeg;
  1032. if( i>=nData ){
  1033. rc = FTS5_CORRUPT;
  1034. }else{
  1035. i += fts5GetVarint32(&pData[i], pLvl->nMerge);
  1036. i += fts5GetVarint32(&pData[i], nTotal);
  1037. if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
  1038. pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
  1039. nTotal * sizeof(Fts5StructureSegment)
  1040. );
  1041. nSegment -= nTotal;
  1042. }
  1043. if( rc==SQLITE_OK ){
  1044. pLvl->nSeg = nTotal;
  1045. for(iSeg=0; iSeg<nTotal; iSeg++){
  1046. Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
  1047. if( i>=nData ){
  1048. rc = FTS5_CORRUPT;
  1049. break;
  1050. }
  1051. assert( pSeg!=0 );
  1052. i += fts5GetVarint32(&pData[i], pSeg->iSegid);
  1053. i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
  1054. i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
  1055. if( bStructureV2 ){
  1056. i += fts5GetVarint(&pData[i], &pSeg->iOrigin1);
  1057. i += fts5GetVarint(&pData[i], &pSeg->iOrigin2);
  1058. i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone);
  1059. i += fts5GetVarint(&pData[i], &pSeg->nEntryTombstone);
  1060. i += fts5GetVarint(&pData[i], &pSeg->nEntry);
  1061. nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2);
  1062. }
  1063. if( pSeg->pgnoLast<pSeg->pgnoFirst ){
  1064. rc = FTS5_CORRUPT;
  1065. break;
  1066. }
  1067. }
  1068. if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
  1069. if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
  1070. }
  1071. }
  1072. if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
  1073. if( bStructureV2 ){
  1074. pRet->nOriginCntr = nOriginCntr+1;
  1075. }
  1076. if( rc!=SQLITE_OK ){
  1077. fts5StructureRelease(pRet);
  1078. pRet = 0;
  1079. }
  1080. }
  1081. *ppOut = pRet;
  1082. return rc;
  1083. }
  1084. /*
  1085. ** Add a level to the Fts5Structure.aLevel[] array of structure object
  1086. ** (*ppStruct).
  1087. */
  1088. static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
  1089. fts5StructureMakeWritable(pRc, ppStruct);
  1090. assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK );
  1091. if( *pRc==SQLITE_OK ){
  1092. Fts5Structure *pStruct = *ppStruct;
  1093. int nLevel = pStruct->nLevel;
  1094. sqlite3_int64 nByte = SZ_FTS5STRUCTURE(nLevel+2);
  1095. pStruct = sqlite3_realloc64(pStruct, nByte);
  1096. if( pStruct ){
  1097. memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
  1098. pStruct->nLevel++;
  1099. *ppStruct = pStruct;
  1100. }else{
  1101. *pRc = SQLITE_NOMEM;
  1102. }
  1103. }
  1104. }
  1105. /*
  1106. ** Extend level iLvl so that there is room for at least nExtra more
  1107. ** segments.
  1108. */
  1109. static void fts5StructureExtendLevel(
  1110. int *pRc,
  1111. Fts5Structure *pStruct,
  1112. int iLvl,
  1113. int nExtra,
  1114. int bInsert
  1115. ){
  1116. if( *pRc==SQLITE_OK ){
  1117. Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
  1118. Fts5StructureSegment *aNew;
  1119. sqlite3_int64 nByte;
  1120. nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
  1121. aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
  1122. if( aNew ){
  1123. if( bInsert==0 ){
  1124. memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
  1125. }else{
  1126. int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
  1127. memmove(&aNew[nExtra], aNew, nMove);
  1128. memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
  1129. }
  1130. pLvl->aSeg = aNew;
  1131. }else{
  1132. *pRc = SQLITE_NOMEM;
  1133. }
  1134. }
  1135. }
  1136. static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
  1137. Fts5Structure *pRet = 0;
  1138. Fts5Config *pConfig = p->pConfig;
  1139. int iCookie; /* Configuration cookie */
  1140. Fts5Data *pData;
  1141. pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
  1142. if( p->rc==SQLITE_OK ){
  1143. /* TODO: Do we need this if the leaf-index is appended? Probably... */
  1144. memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
  1145. p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
  1146. if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
  1147. p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  1148. }
  1149. fts5DataRelease(pData);
  1150. if( p->rc!=SQLITE_OK ){
  1151. fts5StructureRelease(pRet);
  1152. pRet = 0;
  1153. }
  1154. }
  1155. return pRet;
  1156. }
  1157. static i64 fts5IndexDataVersion(Fts5Index *p){
  1158. i64 iVersion = 0;
  1159. if( p->rc==SQLITE_OK ){
  1160. if( p->pDataVersion==0 ){
  1161. p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
  1162. sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
  1163. );
  1164. if( p->rc ) return 0;
  1165. }
  1166. if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
  1167. iVersion = sqlite3_column_int64(p->pDataVersion, 0);
  1168. }
  1169. p->rc = sqlite3_reset(p->pDataVersion);
  1170. }
  1171. return iVersion;
  1172. }
  1173. /*
  1174. ** Read, deserialize and return the structure record.
  1175. **
  1176. ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
  1177. ** are over-allocated as described for function fts5StructureDecode()
  1178. ** above.
  1179. **
  1180. ** If an error occurs, NULL is returned and an error code left in the
  1181. ** Fts5Index handle. If an error has already occurred when this function
  1182. ** is called, it is a no-op.
  1183. */
  1184. static Fts5Structure *fts5StructureRead(Fts5Index *p){
  1185. if( p->pStruct==0 ){
  1186. p->iStructVersion = fts5IndexDataVersion(p);
  1187. if( p->rc==SQLITE_OK ){
  1188. p->pStruct = fts5StructureReadUncached(p);
  1189. }
  1190. }
  1191. #if 0
  1192. else{
  1193. Fts5Structure *pTest = fts5StructureReadUncached(p);
  1194. if( pTest ){
  1195. int i, j;
  1196. assert_nc( p->pStruct->nSegment==pTest->nSegment );
  1197. assert_nc( p->pStruct->nLevel==pTest->nLevel );
  1198. for(i=0; i<pTest->nLevel; i++){
  1199. assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
  1200. assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
  1201. for(j=0; j<pTest->aLevel[i].nSeg; j++){
  1202. Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
  1203. Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
  1204. assert_nc( p1->iSegid==p2->iSegid );
  1205. assert_nc( p1->pgnoFirst==p2->pgnoFirst );
  1206. assert_nc( p1->pgnoLast==p2->pgnoLast );
  1207. }
  1208. }
  1209. fts5StructureRelease(pTest);
  1210. }
  1211. }
  1212. #endif
  1213. if( p->rc!=SQLITE_OK ) return 0;
  1214. assert( p->iStructVersion!=0 );
  1215. assert( p->pStruct!=0 );
  1216. fts5StructureRef(p->pStruct);
  1217. return p->pStruct;
  1218. }
  1219. static void fts5StructureInvalidate(Fts5Index *p){
  1220. if( p->pStruct ){
  1221. fts5StructureRelease(p->pStruct);
  1222. p->pStruct = 0;
  1223. }
  1224. }
  1225. /*
  1226. ** Return the total number of segments in index structure pStruct. This
  1227. ** function is only ever used as part of assert() conditions.
  1228. */
  1229. #ifdef SQLITE_DEBUG
  1230. static int fts5StructureCountSegments(Fts5Structure *pStruct){
  1231. int nSegment = 0; /* Total number of segments */
  1232. if( pStruct ){
  1233. int iLvl; /* Used to iterate through levels */
  1234. for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
  1235. nSegment += pStruct->aLevel[iLvl].nSeg;
  1236. }
  1237. }
  1238. return nSegment;
  1239. }
  1240. #endif
  1241. #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
  1242. assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
  1243. memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
  1244. (pBuf)->n += nBlob; \
  1245. }
  1246. #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
  1247. (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
  1248. assert( (pBuf)->nSpace>=(pBuf)->n ); \
  1249. }
  1250. /*
  1251. ** Serialize and store the "structure" record.
  1252. **
  1253. ** If an error occurs, leave an error code in the Fts5Index object. If an
  1254. ** error has already occurred, this function is a no-op.
  1255. */
  1256. static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
  1257. if( p->rc==SQLITE_OK ){
  1258. Fts5Buffer buf; /* Buffer to serialize record into */
  1259. int iLvl; /* Used to iterate through levels */
  1260. int iCookie; /* Cookie value to store */
  1261. int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9));
  1262. assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
  1263. memset(&buf, 0, sizeof(Fts5Buffer));
  1264. /* Append the current configuration cookie */
  1265. iCookie = p->pConfig->iCookie;
  1266. if( iCookie<0 ) iCookie = 0;
  1267. if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){
  1268. sqlite3Fts5Put32(buf.p, iCookie);
  1269. buf.n = 4;
  1270. if( pStruct->nOriginCntr>0 ){
  1271. fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4);
  1272. }
  1273. fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
  1274. fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
  1275. fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
  1276. }
  1277. for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
  1278. int iSeg; /* Used to iterate through segments */
  1279. Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
  1280. fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
  1281. fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
  1282. assert( pLvl->nMerge<=pLvl->nSeg );
  1283. for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
  1284. Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
  1285. fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid);
  1286. fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst);
  1287. fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast);
  1288. if( pStruct->nOriginCntr>0 ){
  1289. fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1);
  1290. fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2);
  1291. fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone);
  1292. fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone);
  1293. fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry);
  1294. }
  1295. }
  1296. }
  1297. fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
  1298. fts5BufferFree(&buf);
  1299. }
  1300. }
  1301. #if 0
  1302. static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
  1303. static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
  1304. int rc = SQLITE_OK;
  1305. Fts5Buffer buf;
  1306. memset(&buf, 0, sizeof(buf));
  1307. fts5DebugStructure(&rc, &buf, pStruct);
  1308. fprintf(stdout, "%s: %s\n", zCaption, buf.p);
  1309. fflush(stdout);
  1310. fts5BufferFree(&buf);
  1311. }
  1312. #else
  1313. # define fts5PrintStructure(x,y)
  1314. #endif
  1315. static int fts5SegmentSize(Fts5StructureSegment *pSeg){
  1316. return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
  1317. }
  1318. /*
  1319. ** Return a copy of index structure pStruct. Except, promote as many
  1320. ** segments as possible to level iPromote. If an OOM occurs, NULL is
  1321. ** returned.
  1322. */
  1323. static void fts5StructurePromoteTo(
  1324. Fts5Index *p,
  1325. int iPromote,
  1326. int szPromote,
  1327. Fts5Structure *pStruct
  1328. ){
  1329. int il, is;
  1330. Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
  1331. if( pOut->nMerge==0 ){
  1332. for(il=iPromote+1; il<pStruct->nLevel; il++){
  1333. Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
  1334. if( pLvl->nMerge ) return;
  1335. for(is=pLvl->nSeg-1; is>=0; is--){
  1336. int sz = fts5SegmentSize(&pLvl->aSeg[is]);
  1337. if( sz>szPromote ) return;
  1338. fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
  1339. if( p->rc ) return;
  1340. memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
  1341. pOut->nSeg++;
  1342. pLvl->nSeg--;
  1343. }
  1344. }
  1345. }
  1346. }
  1347. /*
  1348. ** A new segment has just been written to level iLvl of index structure
  1349. ** pStruct. This function determines if any segments should be promoted
  1350. ** as a result. Segments are promoted in two scenarios:
  1351. **
  1352. ** a) If the segment just written is smaller than one or more segments
  1353. ** within the previous populated level, it is promoted to the previous
  1354. ** populated level.
  1355. **
  1356. ** b) If the segment just written is larger than the newest segment on
  1357. ** the next populated level, then that segment, and any other adjacent
  1358. ** segments that are also smaller than the one just written, are
  1359. ** promoted.
  1360. **
  1361. ** If one or more segments are promoted, the structure object is updated
  1362. ** to reflect this.
  1363. */
  1364. static void fts5StructurePromote(
  1365. Fts5Index *p, /* FTS5 backend object */
  1366. int iLvl, /* Index level just updated */
  1367. Fts5Structure *pStruct /* Index structure */
  1368. ){
  1369. if( p->rc==SQLITE_OK ){
  1370. int iTst;
  1371. int iPromote = -1;
  1372. int szPromote = 0; /* Promote anything this size or smaller */
  1373. Fts5StructureSegment *pSeg; /* Segment just written */
  1374. int szSeg; /* Size of segment just written */
  1375. int nSeg = pStruct->aLevel[iLvl].nSeg;
  1376. if( nSeg==0 ) return;
  1377. pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
  1378. szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
  1379. /* Check for condition (a) */
  1380. for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
  1381. if( iTst>=0 ){
  1382. int i;
  1383. int szMax = 0;
  1384. Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
  1385. assert( pTst->nMerge==0 );
  1386. for(i=0; i<pTst->nSeg; i++){
  1387. int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
  1388. if( sz>szMax ) szMax = sz;
  1389. }
  1390. if( szMax>=szSeg ){
  1391. /* Condition (a) is true. Promote the newest segment on level
  1392. ** iLvl to level iTst. */
  1393. iPromote = iTst;
  1394. szPromote = szMax;
  1395. }
  1396. }
  1397. /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
  1398. ** is a no-op if it is not. */
  1399. if( iPromote<0 ){
  1400. iPromote = iLvl;
  1401. szPromote = szSeg;
  1402. }
  1403. fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
  1404. }
  1405. }
  1406. /*
  1407. ** Advance the iterator passed as the only argument. If the end of the
  1408. ** doclist-index page is reached, return non-zero.
  1409. */
  1410. static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
  1411. Fts5Data *pData = pLvl->pData;
  1412. if( pLvl->iOff==0 ){
  1413. assert( pLvl->bEof==0 );
  1414. pLvl->iOff = 1;
  1415. pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
  1416. pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
  1417. pLvl->iFirstOff = pLvl->iOff;
  1418. }else{
  1419. int iOff;
  1420. for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
  1421. if( pData->p[iOff] ) break;
  1422. }
  1423. if( iOff<pData->nn ){
  1424. u64 iVal;
  1425. pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
  1426. iOff += fts5GetVarint(&pData->p[iOff], &iVal);
  1427. pLvl->iRowid += iVal;
  1428. pLvl->iOff = iOff;
  1429. }else{
  1430. pLvl->bEof = 1;
  1431. }
  1432. }
  1433. return pLvl->bEof;
  1434. }
  1435. /*
  1436. ** Advance the iterator passed as the only argument.
  1437. */
  1438. static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
  1439. Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
  1440. assert( iLvl<pIter->nLvl );
  1441. if( fts5DlidxLvlNext(pLvl) ){
  1442. if( (iLvl+1) < pIter->nLvl ){
  1443. fts5DlidxIterNextR(p, pIter, iLvl+1);
  1444. if( pLvl[1].bEof==0 ){
  1445. fts5DataRelease(pLvl->pData);
  1446. memset(pLvl, 0, sizeof(Fts5DlidxLvl));
  1447. pLvl->pData = fts5DataRead(p,
  1448. FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
  1449. );
  1450. if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
  1451. }
  1452. }
  1453. }
  1454. return pIter->aLvl[0].bEof;
  1455. }
  1456. static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
  1457. return fts5DlidxIterNextR(p, pIter, 0);
  1458. }
  1459. /*
  1460. ** The iterator passed as the first argument has the following fields set
  1461. ** as follows. This function sets up the rest of the iterator so that it
  1462. ** points to the first rowid in the doclist-index.
  1463. **
  1464. ** pData:
  1465. ** pointer to doclist-index record,
  1466. **
  1467. ** When this function is called pIter->iLeafPgno is the page number the
  1468. ** doclist is associated with (the one featuring the term).
  1469. */
  1470. static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
  1471. int i;
  1472. for(i=0; i<pIter->nLvl; i++){
  1473. fts5DlidxLvlNext(&pIter->aLvl[i]);
  1474. }
  1475. return pIter->aLvl[0].bEof;
  1476. }
  1477. static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
  1478. return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
  1479. }
  1480. static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
  1481. int i;
  1482. /* Advance each level to the last entry on the last page */
  1483. for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
  1484. Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
  1485. while( fts5DlidxLvlNext(pLvl)==0 );
  1486. pLvl->bEof = 0;
  1487. if( i>0 ){
  1488. Fts5DlidxLvl *pChild = &pLvl[-1];
  1489. fts5DataRelease(pChild->pData);
  1490. memset(pChild, 0, sizeof(Fts5DlidxLvl));
  1491. pChild->pData = fts5DataRead(p,
  1492. FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
  1493. );
  1494. }
  1495. }
  1496. }
  1497. /*
  1498. ** Move the iterator passed as the only argument to the previous entry.
  1499. */
  1500. static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
  1501. int iOff = pLvl->iOff;
  1502. assert( pLvl->bEof==0 );
  1503. if( iOff<=pLvl->iFirstOff ){
  1504. pLvl->bEof = 1;
  1505. }else{
  1506. u8 *a = pLvl->pData->p;
  1507. pLvl->iOff = 0;
  1508. fts5DlidxLvlNext(pLvl);
  1509. while( 1 ){
  1510. int nZero = 0;
  1511. int ii = pLvl->iOff;
  1512. u64 delta = 0;
  1513. while( a[ii]==0 ){
  1514. nZero++;
  1515. ii++;
  1516. }
  1517. ii += sqlite3Fts5GetVarint(&a[ii], &delta);
  1518. if( ii>=iOff ) break;
  1519. pLvl->iLeafPgno += nZero+1;
  1520. pLvl->iRowid += delta;
  1521. pLvl->iOff = ii;
  1522. }
  1523. }
  1524. return pLvl->bEof;
  1525. }
  1526. static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
  1527. Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
  1528. assert( iLvl<pIter->nLvl );
  1529. if( fts5DlidxLvlPrev(pLvl) ){
  1530. if( (iLvl+1) < pIter->nLvl ){
  1531. fts5DlidxIterPrevR(p, pIter, iLvl+1);
  1532. if( pLvl[1].bEof==0 ){
  1533. fts5DataRelease(pLvl->pData);
  1534. memset(pLvl, 0, sizeof(Fts5DlidxLvl));
  1535. pLvl->pData = fts5DataRead(p,
  1536. FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
  1537. );
  1538. if( pLvl->pData ){
  1539. while( fts5DlidxLvlNext(pLvl)==0 );
  1540. pLvl->bEof = 0;
  1541. }
  1542. }
  1543. }
  1544. }
  1545. return pIter->aLvl[0].bEof;
  1546. }
  1547. static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
  1548. return fts5DlidxIterPrevR(p, pIter, 0);
  1549. }
  1550. /*
  1551. ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
  1552. */
  1553. static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
  1554. if( pIter ){
  1555. int i;
  1556. for(i=0; i<pIter->nLvl; i++){
  1557. fts5DataRelease(pIter->aLvl[i].pData);
  1558. }
  1559. sqlite3_free(pIter);
  1560. }
  1561. }
  1562. static Fts5DlidxIter *fts5DlidxIterInit(
  1563. Fts5Index *p, /* Fts5 Backend to iterate within */
  1564. int bRev, /* True for ORDER BY ASC */
  1565. int iSegid, /* Segment id */
  1566. int iLeafPg /* Leaf page number to load dlidx for */
  1567. ){
  1568. Fts5DlidxIter *pIter = 0;
  1569. int i;
  1570. int bDone = 0;
  1571. for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
  1572. sqlite3_int64 nByte = SZ_FTS5DLIDXITER(i+1);
  1573. Fts5DlidxIter *pNew;
  1574. pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
  1575. if( pNew==0 ){
  1576. p->rc = SQLITE_NOMEM;
  1577. }else{
  1578. i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
  1579. Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
  1580. pIter = pNew;
  1581. memset(pLvl, 0, sizeof(Fts5DlidxLvl));
  1582. pLvl->pData = fts5DataRead(p, iRowid);
  1583. if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
  1584. bDone = 1;
  1585. }
  1586. pIter->nLvl = i+1;
  1587. }
  1588. }
  1589. if( p->rc==SQLITE_OK ){
  1590. pIter->iSegid = iSegid;
  1591. if( bRev==0 ){
  1592. fts5DlidxIterFirst(pIter);
  1593. }else{
  1594. fts5DlidxIterLast(p, pIter);
  1595. }
  1596. }
  1597. if( p->rc!=SQLITE_OK ){
  1598. fts5DlidxIterFree(pIter);
  1599. pIter = 0;
  1600. }
  1601. return pIter;
  1602. }
  1603. static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
  1604. return pIter->aLvl[0].iRowid;
  1605. }
  1606. static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
  1607. return pIter->aLvl[0].iLeafPgno;
  1608. }
  1609. /*
  1610. ** Load the next leaf page into the segment iterator.
  1611. */
  1612. static void fts5SegIterNextPage(
  1613. Fts5Index *p, /* FTS5 backend object */
  1614. Fts5SegIter *pIter /* Iterator to advance to next page */
  1615. ){
  1616. Fts5Data *pLeaf;
  1617. Fts5StructureSegment *pSeg = pIter->pSeg;
  1618. fts5DataRelease(pIter->pLeaf);
  1619. pIter->iLeafPgno++;
  1620. if( pIter->pNextLeaf ){
  1621. pIter->pLeaf = pIter->pNextLeaf;
  1622. pIter->pNextLeaf = 0;
  1623. }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
  1624. pIter->pLeaf = fts5LeafRead(p,
  1625. FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
  1626. );
  1627. }else{
  1628. pIter->pLeaf = 0;
  1629. }
  1630. pLeaf = pIter->pLeaf;
  1631. if( pLeaf ){
  1632. pIter->iPgidxOff = pLeaf->szLeaf;
  1633. if( fts5LeafIsTermless(pLeaf) ){
  1634. pIter->iEndofDoclist = pLeaf->nn+1;
  1635. }else{
  1636. pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
  1637. pIter->iEndofDoclist
  1638. );
  1639. }
  1640. }
  1641. }
  1642. /*
  1643. ** Argument p points to a buffer containing a varint to be interpreted as a
  1644. ** position list size field. Read the varint and return the number of bytes
  1645. ** read. Before returning, set *pnSz to the number of bytes in the position
  1646. ** list, and *pbDel to true if the delete flag is set, or false otherwise.
  1647. */
  1648. static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
  1649. int nSz;
  1650. int n = 0;
  1651. fts5FastGetVarint32(p, n, nSz);
  1652. assert_nc( nSz>=0 );
  1653. *pnSz = nSz/2;
  1654. *pbDel = nSz & 0x0001;
  1655. return n;
  1656. }
  1657. /*
  1658. ** Fts5SegIter.iLeafOffset currently points to the first byte of a
  1659. ** position-list size field. Read the value of the field and store it
  1660. ** in the following variables:
  1661. **
  1662. ** Fts5SegIter.nPos
  1663. ** Fts5SegIter.bDel
  1664. **
  1665. ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
  1666. ** position list content (if any).
  1667. */
  1668. static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  1669. if( p->rc==SQLITE_OK ){
  1670. int iOff = pIter->iLeafOffset; /* Offset to read at */
  1671. ASSERT_SZLEAF_OK(pIter->pLeaf);
  1672. if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
  1673. int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
  1674. pIter->bDel = 0;
  1675. pIter->nPos = 1;
  1676. if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
  1677. pIter->bDel = 1;
  1678. iOff++;
  1679. if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
  1680. pIter->nPos = 1;
  1681. iOff++;
  1682. }else{
  1683. pIter->nPos = 0;
  1684. }
  1685. }
  1686. }else{
  1687. int nSz;
  1688. fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
  1689. pIter->bDel = (nSz & 0x0001);
  1690. pIter->nPos = nSz>>1;
  1691. assert_nc( pIter->nPos>=0 );
  1692. }
  1693. pIter->iLeafOffset = iOff;
  1694. }
  1695. }
  1696. static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  1697. u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
  1698. i64 iOff = pIter->iLeafOffset;
  1699. ASSERT_SZLEAF_OK(pIter->pLeaf);
  1700. while( iOff>=pIter->pLeaf->szLeaf ){
  1701. fts5SegIterNextPage(p, pIter);
  1702. if( pIter->pLeaf==0 ){
  1703. if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
  1704. return;
  1705. }
  1706. iOff = 4;
  1707. a = pIter->pLeaf->p;
  1708. }
  1709. iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  1710. pIter->iLeafOffset = iOff;
  1711. }
  1712. /*
  1713. ** Fts5SegIter.iLeafOffset currently points to the first byte of the
  1714. ** "nSuffix" field of a term. Function parameter nKeep contains the value
  1715. ** of the "nPrefix" field (if there was one - it is passed 0 if this is
  1716. ** the first term in the segment).
  1717. **
  1718. ** This function populates:
  1719. **
  1720. ** Fts5SegIter.term
  1721. ** Fts5SegIter.rowid
  1722. **
  1723. ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
  1724. ** the first position list. The position list belonging to document
  1725. ** (Fts5SegIter.iRowid).
  1726. */
  1727. static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
  1728. u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
  1729. i64 iOff = pIter->iLeafOffset; /* Offset to read at */
  1730. int nNew; /* Bytes of new data */
  1731. iOff += fts5GetVarint32(&a[iOff], nNew);
  1732. if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
  1733. p->rc = FTS5_CORRUPT;
  1734. return;
  1735. }
  1736. pIter->term.n = nKeep;
  1737. fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  1738. assert( pIter->term.n<=pIter->term.nSpace );
  1739. iOff += nNew;
  1740. pIter->iTermLeafOffset = iOff;
  1741. pIter->iTermLeafPgno = pIter->iLeafPgno;
  1742. pIter->iLeafOffset = iOff;
  1743. if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
  1744. pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  1745. }else{
  1746. int nExtra;
  1747. pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
  1748. pIter->iEndofDoclist += nExtra;
  1749. }
  1750. fts5SegIterLoadRowid(p, pIter);
  1751. }
  1752. static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
  1753. static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
  1754. static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
  1755. static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
  1756. if( pIter->flags & FTS5_SEGITER_REVERSE ){
  1757. pIter->xNext = fts5SegIterNext_Reverse;
  1758. }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
  1759. pIter->xNext = fts5SegIterNext_None;
  1760. }else{
  1761. pIter->xNext = fts5SegIterNext;
  1762. }
  1763. }
  1764. /*
  1765. ** Allocate a tombstone hash page array object (pIter->pTombArray) for
  1766. ** the iterator passed as the second argument. If an OOM error occurs,
  1767. ** leave an error in the Fts5Index object.
  1768. */
  1769. static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){
  1770. const int nTomb = pIter->pSeg->nPgTombstone;
  1771. if( nTomb>0 ){
  1772. int nByte = SZ_FTS5TOMBSTONEARRAY(nTomb+1);
  1773. Fts5TombstoneArray *pNew;
  1774. pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte);
  1775. if( pNew ){
  1776. pNew->nTombstone = nTomb;
  1777. pNew->nRef = 1;
  1778. pIter->pTombArray = pNew;
  1779. }
  1780. }
  1781. }
  1782. /*
  1783. ** Initialize the iterator object pIter to iterate through the entries in
  1784. ** segment pSeg. The iterator is left pointing to the first entry when
  1785. ** this function returns.
  1786. **
  1787. ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
  1788. ** an error has already occurred when this function is called, it is a no-op.
  1789. */
  1790. static void fts5SegIterInit(
  1791. Fts5Index *p, /* FTS index object */
  1792. Fts5StructureSegment *pSeg, /* Description of segment */
  1793. Fts5SegIter *pIter /* Object to populate */
  1794. ){
  1795. if( pSeg->pgnoFirst==0 ){
  1796. /* This happens if the segment is being used as an input to an incremental
  1797. ** merge and all data has already been "trimmed". See function
  1798. ** fts5TrimSegments() for details. In this case leave the iterator empty.
  1799. ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
  1800. ** at EOF already. */
  1801. assert( pIter->pLeaf==0 );
  1802. return;
  1803. }
  1804. if( p->rc==SQLITE_OK ){
  1805. memset(pIter, 0, sizeof(*pIter));
  1806. fts5SegIterSetNext(p, pIter);
  1807. pIter->pSeg = pSeg;
  1808. pIter->iLeafPgno = pSeg->pgnoFirst-1;
  1809. do {
  1810. fts5SegIterNextPage(p, pIter);
  1811. }while( p->rc==SQLITE_OK && pIter->pLeaf && pIter->pLeaf->nn==4 );
  1812. }
  1813. if( p->rc==SQLITE_OK && pIter->pLeaf ){
  1814. pIter->iLeafOffset = 4;
  1815. assert( pIter->pLeaf!=0 );
  1816. assert_nc( pIter->pLeaf->nn>4 );
  1817. assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
  1818. pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
  1819. fts5SegIterLoadTerm(p, pIter, 0);
  1820. fts5SegIterLoadNPos(p, pIter);
  1821. fts5SegIterAllocTombstone(p, pIter);
  1822. }
  1823. }
  1824. /*
  1825. ** This function is only ever called on iterators created by calls to
  1826. ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
  1827. **
  1828. ** The iterator is in an unusual state when this function is called: the
  1829. ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
  1830. ** the position-list size field for the first relevant rowid on the page.
  1831. ** Fts5SegIter.rowid is set, but nPos and bDel are not.
  1832. **
  1833. ** This function advances the iterator so that it points to the last
  1834. ** relevant rowid on the page and, if necessary, initializes the
  1835. ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
  1836. ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
  1837. ** byte of the position list content associated with said rowid.
  1838. */
  1839. static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
  1840. int eDetail = p->pConfig->eDetail;
  1841. int n = pIter->pLeaf->szLeaf;
  1842. int i = pIter->iLeafOffset;
  1843. u8 *a = pIter->pLeaf->p;
  1844. int iRowidOffset = 0;
  1845. if( n>pIter->iEndofDoclist ){
  1846. n = pIter->iEndofDoclist;
  1847. }
  1848. ASSERT_SZLEAF_OK(pIter->pLeaf);
  1849. while( 1 ){
  1850. u64 iDelta = 0;
  1851. if( eDetail==FTS5_DETAIL_NONE ){
  1852. /* todo */
  1853. if( i<n && a[i]==0 ){
  1854. i++;
  1855. if( i<n && a[i]==0 ) i++;
  1856. }
  1857. }else{
  1858. int nPos;
  1859. int bDummy;
  1860. i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
  1861. i += nPos;
  1862. }
  1863. if( i>=n ) break;
  1864. i += fts5GetVarint(&a[i], &iDelta);
  1865. pIter->iRowid += iDelta;
  1866. /* If necessary, grow the pIter->aRowidOffset[] array. */
  1867. if( iRowidOffset>=pIter->nRowidOffset ){
  1868. int nNew = pIter->nRowidOffset + 8;
  1869. int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
  1870. if( aNew==0 ){
  1871. p->rc = SQLITE_NOMEM;
  1872. break;
  1873. }
  1874. pIter->aRowidOffset = aNew;
  1875. pIter->nRowidOffset = nNew;
  1876. }
  1877. pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
  1878. pIter->iLeafOffset = i;
  1879. }
  1880. pIter->iRowidOffset = iRowidOffset;
  1881. fts5SegIterLoadNPos(p, pIter);
  1882. }
  1883. /*
  1884. **
  1885. */
  1886. static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
  1887. assert( pIter->flags & FTS5_SEGITER_REVERSE );
  1888. assert( pIter->flags & FTS5_SEGITER_ONETERM );
  1889. fts5DataRelease(pIter->pLeaf);
  1890. pIter->pLeaf = 0;
  1891. while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
  1892. Fts5Data *pNew;
  1893. pIter->iLeafPgno--;
  1894. pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
  1895. pIter->pSeg->iSegid, pIter->iLeafPgno
  1896. ));
  1897. if( pNew ){
  1898. /* iTermLeafOffset may be equal to szLeaf if the term is the last
  1899. ** thing on the page - i.e. the first rowid is on the following page.
  1900. ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
  1901. if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
  1902. assert( pIter->pLeaf==0 );
  1903. if( pIter->iTermLeafOffset<pNew->szLeaf ){
  1904. pIter->pLeaf = pNew;
  1905. pIter->iLeafOffset = pIter->iTermLeafOffset;
  1906. }
  1907. }else{
  1908. int iRowidOff;
  1909. iRowidOff = fts5LeafFirstRowidOff(pNew);
  1910. if( iRowidOff ){
  1911. if( iRowidOff>=pNew->szLeaf ){
  1912. p->rc = FTS5_CORRUPT;
  1913. }else{
  1914. pIter->pLeaf = pNew;
  1915. pIter->iLeafOffset = iRowidOff;
  1916. }
  1917. }
  1918. }
  1919. if( pIter->pLeaf ){
  1920. u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
  1921. pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
  1922. break;
  1923. }else{
  1924. fts5DataRelease(pNew);
  1925. }
  1926. }
  1927. }
  1928. if( pIter->pLeaf ){
  1929. pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  1930. fts5SegIterReverseInitPage(p, pIter);
  1931. }
  1932. }
  1933. /*
  1934. ** Return true if the iterator passed as the second argument currently
  1935. ** points to a delete marker. A delete marker is an entry with a 0 byte
  1936. ** position-list.
  1937. */
  1938. static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
  1939. Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
  1940. return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
  1941. }
  1942. /*
  1943. ** Advance iterator pIter to the next entry.
  1944. **
  1945. ** This version of fts5SegIterNext() is only used by reverse iterators.
  1946. */
  1947. static void fts5SegIterNext_Reverse(
  1948. Fts5Index *p, /* FTS5 backend object */
  1949. Fts5SegIter *pIter, /* Iterator to advance */
  1950. int *pbUnused /* Unused */
  1951. ){
  1952. assert( pIter->flags & FTS5_SEGITER_REVERSE );
  1953. assert( pIter->pNextLeaf==0 );
  1954. UNUSED_PARAM(pbUnused);
  1955. if( pIter->iRowidOffset>0 ){
  1956. u8 *a = pIter->pLeaf->p;
  1957. int iOff;
  1958. u64 iDelta;
  1959. pIter->iRowidOffset--;
  1960. pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
  1961. fts5SegIterLoadNPos(p, pIter);
  1962. iOff = pIter->iLeafOffset;
  1963. if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
  1964. iOff += pIter->nPos;
  1965. }
  1966. fts5GetVarint(&a[iOff], &iDelta);
  1967. pIter->iRowid -= iDelta;
  1968. }else{
  1969. fts5SegIterReverseNewPage(p, pIter);
  1970. }
  1971. }
  1972. /*
  1973. ** Advance iterator pIter to the next entry.
  1974. **
  1975. ** This version of fts5SegIterNext() is only used if detail=none and the
  1976. ** iterator is not a reverse direction iterator.
  1977. */
  1978. static void fts5SegIterNext_None(
  1979. Fts5Index *p, /* FTS5 backend object */
  1980. Fts5SegIter *pIter, /* Iterator to advance */
  1981. int *pbNewTerm /* OUT: Set for new term */
  1982. ){
  1983. int iOff;
  1984. assert( p->rc==SQLITE_OK );
  1985. assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
  1986. assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
  1987. ASSERT_SZLEAF_OK(pIter->pLeaf);
  1988. iOff = pIter->iLeafOffset;
  1989. /* Next entry is on the next page */
  1990. while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
  1991. fts5SegIterNextPage(p, pIter);
  1992. if( p->rc || pIter->pLeaf==0 ) return;
  1993. pIter->iRowid = 0;
  1994. iOff = 4;
  1995. }
  1996. if( iOff<pIter->iEndofDoclist ){
  1997. /* Next entry is on the current page */
  1998. u64 iDelta;
  1999. iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
  2000. pIter->iLeafOffset = iOff;
  2001. pIter->iRowid += iDelta;
  2002. }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
  2003. if( pIter->pSeg ){
  2004. int nKeep = 0;
  2005. if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
  2006. iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
  2007. }
  2008. pIter->iLeafOffset = iOff;
  2009. fts5SegIterLoadTerm(p, pIter, nKeep);
  2010. }else{
  2011. const u8 *pList = 0;
  2012. const char *zTerm = 0;
  2013. int nTerm = 0;
  2014. int nList;
  2015. sqlite3Fts5HashScanNext(p->pHash);
  2016. sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
  2017. if( pList==0 ) goto next_none_eof;
  2018. pIter->pLeaf->p = (u8*)pList;
  2019. pIter->pLeaf->nn = nList;
  2020. pIter->pLeaf->szLeaf = nList;
  2021. pIter->iEndofDoclist = nList;
  2022. sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm);
  2023. pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
  2024. }
  2025. if( pbNewTerm ) *pbNewTerm = 1;
  2026. }else{
  2027. goto next_none_eof;
  2028. }
  2029. fts5SegIterLoadNPos(p, pIter);
  2030. return;
  2031. next_none_eof:
  2032. fts5DataRelease(pIter->pLeaf);
  2033. pIter->pLeaf = 0;
  2034. }
  2035. /*
  2036. ** Advance iterator pIter to the next entry.
  2037. **
  2038. ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
  2039. ** is not considered an error if the iterator reaches EOF. If an error has
  2040. ** already occurred when this function is called, it is a no-op.
  2041. */
  2042. static void fts5SegIterNext(
  2043. Fts5Index *p, /* FTS5 backend object */
  2044. Fts5SegIter *pIter, /* Iterator to advance */
  2045. int *pbNewTerm /* OUT: Set for new term */
  2046. ){
  2047. Fts5Data *pLeaf = pIter->pLeaf;
  2048. int iOff;
  2049. int bNewTerm = 0;
  2050. int nKeep = 0;
  2051. u8 *a;
  2052. int n;
  2053. assert( pbNewTerm==0 || *pbNewTerm==0 );
  2054. assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
  2055. /* Search for the end of the position list within the current page. */
  2056. a = pLeaf->p;
  2057. n = pLeaf->szLeaf;
  2058. ASSERT_SZLEAF_OK(pLeaf);
  2059. iOff = pIter->iLeafOffset + pIter->nPos;
  2060. if( iOff<n ){
  2061. /* The next entry is on the current page. */
  2062. assert_nc( iOff<=pIter->iEndofDoclist );
  2063. if( iOff>=pIter->iEndofDoclist ){
  2064. bNewTerm = 1;
  2065. if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
  2066. iOff += fts5GetVarint32(&a[iOff], nKeep);
  2067. }
  2068. }else{
  2069. u64 iDelta;
  2070. iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
  2071. pIter->iRowid += iDelta;
  2072. assert_nc( iDelta>0 );
  2073. }
  2074. pIter->iLeafOffset = iOff;
  2075. }else if( pIter->pSeg==0 ){
  2076. const u8 *pList = 0;
  2077. const char *zTerm = 0;
  2078. int nTerm = 0;
  2079. int nList = 0;
  2080. assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
  2081. if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
  2082. sqlite3Fts5HashScanNext(p->pHash);
  2083. sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
  2084. }
  2085. if( pList==0 ){
  2086. fts5DataRelease(pIter->pLeaf);
  2087. pIter->pLeaf = 0;
  2088. }else{
  2089. pIter->pLeaf->p = (u8*)pList;
  2090. pIter->pLeaf->nn = nList;
  2091. pIter->pLeaf->szLeaf = nList;
  2092. pIter->iEndofDoclist = nList+1;
  2093. sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm);
  2094. pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
  2095. *pbNewTerm = 1;
  2096. }
  2097. }else{
  2098. iOff = 0;
  2099. /* Next entry is not on the current page */
  2100. while( iOff==0 ){
  2101. fts5SegIterNextPage(p, pIter);
  2102. pLeaf = pIter->pLeaf;
  2103. if( pLeaf==0 ) break;
  2104. ASSERT_SZLEAF_OK(pLeaf);
  2105. if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
  2106. iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
  2107. pIter->iLeafOffset = iOff;
  2108. if( pLeaf->nn>pLeaf->szLeaf ){
  2109. pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
  2110. &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
  2111. );
  2112. }
  2113. }
  2114. else if( pLeaf->nn>pLeaf->szLeaf ){
  2115. pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
  2116. &pLeaf->p[pLeaf->szLeaf], iOff
  2117. );
  2118. pIter->iLeafOffset = iOff;
  2119. pIter->iEndofDoclist = iOff;
  2120. bNewTerm = 1;
  2121. }
  2122. assert_nc( iOff<pLeaf->szLeaf );
  2123. if( iOff>pLeaf->szLeaf ){
  2124. p->rc = FTS5_CORRUPT;
  2125. return;
  2126. }
  2127. }
  2128. }
  2129. /* Check if the iterator is now at EOF. If so, return early. */
  2130. if( pIter->pLeaf ){
  2131. if( bNewTerm ){
  2132. if( pIter->flags & FTS5_SEGITER_ONETERM ){
  2133. fts5DataRelease(pIter->pLeaf);
  2134. pIter->pLeaf = 0;
  2135. }else{
  2136. fts5SegIterLoadTerm(p, pIter, nKeep);
  2137. fts5SegIterLoadNPos(p, pIter);
  2138. if( pbNewTerm ) *pbNewTerm = 1;
  2139. }
  2140. }else{
  2141. /* The following could be done by calling fts5SegIterLoadNPos(). But
  2142. ** this block is particularly performance critical, so equivalent
  2143. ** code is inlined. */
  2144. int nSz;
  2145. assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
  2146. fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
  2147. pIter->bDel = (nSz & 0x0001);
  2148. pIter->nPos = nSz>>1;
  2149. assert_nc( pIter->nPos>=0 );
  2150. }
  2151. }
  2152. }
  2153. #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
  2154. #define fts5IndexSkipVarint(a, iOff) { \
  2155. int iEnd = iOff+9; \
  2156. while( (a[iOff++] & 0x80) && iOff<iEnd ); \
  2157. }
  2158. /*
  2159. ** Iterator pIter currently points to the first rowid in a doclist. This
  2160. ** function sets the iterator up so that iterates in reverse order through
  2161. ** the doclist.
  2162. */
  2163. static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
  2164. Fts5DlidxIter *pDlidx = pIter->pDlidx;
  2165. Fts5Data *pLast = 0;
  2166. int pgnoLast = 0;
  2167. if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){
  2168. int iSegid = pIter->pSeg->iSegid;
  2169. pgnoLast = fts5DlidxIterPgno(pDlidx);
  2170. pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
  2171. }else{
  2172. Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
  2173. /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
  2174. ** position-list content for the current rowid. Back it up so that it
  2175. ** points to the start of the position-list size field. */
  2176. int iPoslist;
  2177. if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
  2178. iPoslist = pIter->iTermLeafOffset;
  2179. }else{
  2180. iPoslist = 4;
  2181. }
  2182. fts5IndexSkipVarint(pLeaf->p, iPoslist);
  2183. pIter->iLeafOffset = iPoslist;
  2184. /* If this condition is true then the largest rowid for the current
  2185. ** term may not be stored on the current page. So search forward to
  2186. ** see where said rowid really is. */
  2187. if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
  2188. int pgno;
  2189. Fts5StructureSegment *pSeg = pIter->pSeg;
  2190. /* The last rowid in the doclist may not be on the current page. Search
  2191. ** forward to find the page containing the last rowid. */
  2192. for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
  2193. i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
  2194. Fts5Data *pNew = fts5LeafRead(p, iAbs);
  2195. if( pNew ){
  2196. int iRowid, bTermless;
  2197. iRowid = fts5LeafFirstRowidOff(pNew);
  2198. bTermless = fts5LeafIsTermless(pNew);
  2199. if( iRowid ){
  2200. SWAPVAL(Fts5Data*, pNew, pLast);
  2201. pgnoLast = pgno;
  2202. }
  2203. fts5DataRelease(pNew);
  2204. if( bTermless==0 ) break;
  2205. }
  2206. }
  2207. }
  2208. }
  2209. /* If pLast is NULL at this point, then the last rowid for this doclist
  2210. ** lies on the page currently indicated by the iterator. In this case
  2211. ** pIter->iLeafOffset is already set to point to the position-list size
  2212. ** field associated with the first relevant rowid on the page.
  2213. **
  2214. ** Or, if pLast is non-NULL, then it is the page that contains the last
  2215. ** rowid. In this case configure the iterator so that it points to the
  2216. ** first rowid on this page.
  2217. */
  2218. if( pLast ){
  2219. int iOff;
  2220. fts5DataRelease(pIter->pLeaf);
  2221. pIter->pLeaf = pLast;
  2222. pIter->iLeafPgno = pgnoLast;
  2223. iOff = fts5LeafFirstRowidOff(pLast);
  2224. if( iOff>pLast->szLeaf ){
  2225. p->rc = FTS5_CORRUPT;
  2226. return;
  2227. }
  2228. iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
  2229. pIter->iLeafOffset = iOff;
  2230. if( fts5LeafIsTermless(pLast) ){
  2231. pIter->iEndofDoclist = pLast->nn+1;
  2232. }else{
  2233. pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
  2234. }
  2235. }
  2236. fts5SegIterReverseInitPage(p, pIter);
  2237. }
  2238. /*
  2239. ** Iterator pIter currently points to the first rowid of a doclist.
  2240. ** There is a doclist-index associated with the final term on the current
  2241. ** page. If the current term is the last term on the page, load the
  2242. ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
  2243. */
  2244. static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
  2245. int iSeg = pIter->pSeg->iSegid;
  2246. int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
  2247. Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
  2248. assert( pIter->flags & FTS5_SEGITER_ONETERM );
  2249. assert( pIter->pDlidx==0 );
  2250. /* Check if the current doclist ends on this page. If it does, return
  2251. ** early without loading the doclist-index (as it belongs to a different
  2252. ** term. */
  2253. if( pIter->iTermLeafPgno==pIter->iLeafPgno
  2254. && pIter->iEndofDoclist<pLeaf->szLeaf
  2255. ){
  2256. return;
  2257. }
  2258. pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
  2259. }
  2260. /*
  2261. ** The iterator object passed as the second argument currently contains
  2262. ** no valid values except for the Fts5SegIter.pLeaf member variable. This
  2263. ** function searches the leaf page for a term matching (pTerm/nTerm).
  2264. **
  2265. ** If the specified term is found on the page, then the iterator is left
  2266. ** pointing to it. If argument bGe is zero and the term is not found,
  2267. ** the iterator is left pointing at EOF.
  2268. **
  2269. ** If bGe is non-zero and the specified term is not found, then the
  2270. ** iterator is left pointing to the smallest term in the segment that
  2271. ** is larger than the specified term, even if this term is not on the
  2272. ** current page.
  2273. */
  2274. static void fts5LeafSeek(
  2275. Fts5Index *p, /* Leave any error code here */
  2276. int bGe, /* True for a >= search */
  2277. Fts5SegIter *pIter, /* Iterator to seek */
  2278. const u8 *pTerm, int nTerm /* Term to search for */
  2279. ){
  2280. u32 iOff;
  2281. const u8 *a = pIter->pLeaf->p;
  2282. u32 n = (u32)pIter->pLeaf->nn;
  2283. u32 nMatch = 0;
  2284. u32 nKeep = 0;
  2285. u32 nNew = 0;
  2286. u32 iTermOff;
  2287. u32 iPgidx; /* Current offset in pgidx */
  2288. int bEndOfPage = 0;
  2289. assert( p->rc==SQLITE_OK );
  2290. iPgidx = (u32)pIter->pLeaf->szLeaf;
  2291. iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
  2292. iOff = iTermOff;
  2293. if( iOff>n ){
  2294. p->rc = FTS5_CORRUPT;
  2295. return;
  2296. }
  2297. while( 1 ){
  2298. /* Figure out how many new bytes are in this term */
  2299. fts5FastGetVarint32(a, iOff, nNew);
  2300. if( nKeep<nMatch ){
  2301. goto search_failed;
  2302. }
  2303. assert( nKeep>=nMatch );
  2304. if( nKeep==nMatch ){
  2305. u32 nCmp;
  2306. u32 i;
  2307. nCmp = (u32)MIN(nNew, nTerm-nMatch);
  2308. for(i=0; i<nCmp; i++){
  2309. if( a[iOff+i]!=pTerm[nMatch+i] ) break;
  2310. }
  2311. nMatch += i;
  2312. if( (u32)nTerm==nMatch ){
  2313. if( i==nNew ){
  2314. goto search_success;
  2315. }else{
  2316. goto search_failed;
  2317. }
  2318. }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
  2319. goto search_failed;
  2320. }
  2321. }
  2322. if( iPgidx>=n ){
  2323. bEndOfPage = 1;
  2324. break;
  2325. }
  2326. iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
  2327. iTermOff += nKeep;
  2328. iOff = iTermOff;
  2329. if( iOff>=n ){
  2330. p->rc = FTS5_CORRUPT;
  2331. return;
  2332. }
  2333. /* Read the nKeep field of the next term. */
  2334. fts5FastGetVarint32(a, iOff, nKeep);
  2335. }
  2336. search_failed:
  2337. if( bGe==0 ){
  2338. fts5DataRelease(pIter->pLeaf);
  2339. pIter->pLeaf = 0;
  2340. return;
  2341. }else if( bEndOfPage ){
  2342. do {
  2343. fts5SegIterNextPage(p, pIter);
  2344. if( pIter->pLeaf==0 ) return;
  2345. a = pIter->pLeaf->p;
  2346. if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
  2347. iPgidx = (u32)pIter->pLeaf->szLeaf;
  2348. iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
  2349. if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
  2350. p->rc = FTS5_CORRUPT;
  2351. return;
  2352. }else{
  2353. nKeep = 0;
  2354. iTermOff = iOff;
  2355. n = (u32)pIter->pLeaf->nn;
  2356. iOff += fts5GetVarint32(&a[iOff], nNew);
  2357. break;
  2358. }
  2359. }
  2360. }while( 1 );
  2361. }
  2362. search_success:
  2363. if( (i64)iOff+nNew>n || nNew<1 ){
  2364. p->rc = FTS5_CORRUPT;
  2365. return;
  2366. }
  2367. pIter->iLeafOffset = iOff + nNew;
  2368. pIter->iTermLeafOffset = pIter->iLeafOffset;
  2369. pIter->iTermLeafPgno = pIter->iLeafPgno;
  2370. fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  2371. fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  2372. if( iPgidx>=n ){
  2373. pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  2374. }else{
  2375. int nExtra;
  2376. iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
  2377. pIter->iEndofDoclist = iTermOff + nExtra;
  2378. }
  2379. pIter->iPgidxOff = iPgidx;
  2380. fts5SegIterLoadRowid(p, pIter);
  2381. fts5SegIterLoadNPos(p, pIter);
  2382. }
  2383. static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
  2384. if( p->pIdxSelect==0 ){
  2385. Fts5Config *pConfig = p->pConfig;
  2386. fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
  2387. "SELECT pgno FROM '%q'.'%q_idx' WHERE "
  2388. "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
  2389. pConfig->zDb, pConfig->zName
  2390. ));
  2391. }
  2392. return p->pIdxSelect;
  2393. }
  2394. /*
  2395. ** Initialize the object pIter to point to term pTerm/nTerm within segment
  2396. ** pSeg. If there is no such term in the index, the iterator is set to EOF.
  2397. **
  2398. ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
  2399. ** an error has already occurred when this function is called, it is a no-op.
  2400. */
  2401. static void fts5SegIterSeekInit(
  2402. Fts5Index *p, /* FTS5 backend */
  2403. const u8 *pTerm, int nTerm, /* Term to seek to */
  2404. int flags, /* Mask of FTS5INDEX_XXX flags */
  2405. Fts5StructureSegment *pSeg, /* Description of segment */
  2406. Fts5SegIter *pIter /* Object to populate */
  2407. ){
  2408. int iPg = 1;
  2409. int bGe = (flags & FTS5INDEX_QUERY_SCAN);
  2410. int bDlidx = 0; /* True if there is a doclist-index */
  2411. sqlite3_stmt *pIdxSelect = 0;
  2412. assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
  2413. assert( pTerm && nTerm );
  2414. memset(pIter, 0, sizeof(*pIter));
  2415. pIter->pSeg = pSeg;
  2416. /* This block sets stack variable iPg to the leaf page number that may
  2417. ** contain term (pTerm/nTerm), if it is present in the segment. */
  2418. pIdxSelect = fts5IdxSelectStmt(p);
  2419. if( p->rc ) return;
  2420. sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
  2421. sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
  2422. if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
  2423. i64 val = sqlite3_column_int(pIdxSelect, 0);
  2424. iPg = (int)(val>>1);
  2425. bDlidx = (val & 0x0001);
  2426. }
  2427. p->rc = sqlite3_reset(pIdxSelect);
  2428. sqlite3_bind_null(pIdxSelect, 2);
  2429. if( iPg<pSeg->pgnoFirst ){
  2430. iPg = pSeg->pgnoFirst;
  2431. bDlidx = 0;
  2432. }
  2433. pIter->iLeafPgno = iPg - 1;
  2434. fts5SegIterNextPage(p, pIter);
  2435. if( pIter->pLeaf ){
  2436. fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
  2437. }
  2438. if( p->rc==SQLITE_OK && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM)) ){
  2439. pIter->flags |= FTS5_SEGITER_ONETERM;
  2440. if( pIter->pLeaf ){
  2441. if( flags & FTS5INDEX_QUERY_DESC ){
  2442. pIter->flags |= FTS5_SEGITER_REVERSE;
  2443. }
  2444. if( bDlidx ){
  2445. fts5SegIterLoadDlidx(p, pIter);
  2446. }
  2447. if( flags & FTS5INDEX_QUERY_DESC ){
  2448. fts5SegIterReverse(p, pIter);
  2449. }
  2450. }
  2451. }
  2452. fts5SegIterSetNext(p, pIter);
  2453. if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM) ){
  2454. fts5SegIterAllocTombstone(p, pIter);
  2455. }
  2456. /* Either:
  2457. **
  2458. ** 1) an error has occurred, or
  2459. ** 2) the iterator points to EOF, or
  2460. ** 3) the iterator points to an entry with term (pTerm/nTerm), or
  2461. ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
  2462. ** to an entry with a term greater than or equal to (pTerm/nTerm).
  2463. */
  2464. assert_nc( p->rc!=SQLITE_OK /* 1 */
  2465. || pIter->pLeaf==0 /* 2 */
  2466. || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
  2467. || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
  2468. );
  2469. }
  2470. /*
  2471. ** SQL used by fts5SegIterNextInit() to find the page to open.
  2472. */
  2473. static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){
  2474. if( p->pIdxNextSelect==0 ){
  2475. Fts5Config *pConfig = p->pConfig;
  2476. fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf(
  2477. "SELECT pgno FROM '%q'.'%q_idx' WHERE "
  2478. "segid=? AND term>? ORDER BY term ASC LIMIT 1",
  2479. pConfig->zDb, pConfig->zName
  2480. ));
  2481. }
  2482. return p->pIdxNextSelect;
  2483. }
  2484. /*
  2485. ** This is similar to fts5SegIterSeekInit(), except that it initializes
  2486. ** the segment iterator to point to the first term following the page
  2487. ** with pToken/nToken on it.
  2488. */
  2489. static void fts5SegIterNextInit(
  2490. Fts5Index *p,
  2491. const char *pTerm, int nTerm,
  2492. Fts5StructureSegment *pSeg, /* Description of segment */
  2493. Fts5SegIter *pIter /* Object to populate */
  2494. ){
  2495. int iPg = -1; /* Page of segment to open */
  2496. int bDlidx = 0;
  2497. sqlite3_stmt *pSel = 0; /* SELECT to find iPg */
  2498. pSel = fts5IdxNextStmt(p);
  2499. if( pSel ){
  2500. assert( p->rc==SQLITE_OK );
  2501. sqlite3_bind_int(pSel, 1, pSeg->iSegid);
  2502. sqlite3_bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC);
  2503. if( sqlite3_step(pSel)==SQLITE_ROW ){
  2504. i64 val = sqlite3_column_int64(pSel, 0);
  2505. iPg = (int)(val>>1);
  2506. bDlidx = (val & 0x0001);
  2507. }
  2508. p->rc = sqlite3_reset(pSel);
  2509. sqlite3_bind_null(pSel, 2);
  2510. if( p->rc ) return;
  2511. }
  2512. memset(pIter, 0, sizeof(*pIter));
  2513. pIter->pSeg = pSeg;
  2514. pIter->flags |= FTS5_SEGITER_ONETERM;
  2515. if( iPg>=0 ){
  2516. pIter->iLeafPgno = iPg - 1;
  2517. fts5SegIterNextPage(p, pIter);
  2518. fts5SegIterSetNext(p, pIter);
  2519. }
  2520. if( pIter->pLeaf ){
  2521. const u8 *a = pIter->pLeaf->p;
  2522. int iTermOff = 0;
  2523. pIter->iPgidxOff = pIter->pLeaf->szLeaf;
  2524. pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff);
  2525. pIter->iLeafOffset = iTermOff;
  2526. fts5SegIterLoadTerm(p, pIter, 0);
  2527. fts5SegIterLoadNPos(p, pIter);
  2528. if( bDlidx ) fts5SegIterLoadDlidx(p, pIter);
  2529. assert( p->rc!=SQLITE_OK ||
  2530. fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0
  2531. );
  2532. }
  2533. }
  2534. /*
  2535. ** Initialize the object pIter to point to term pTerm/nTerm within the
  2536. ** in-memory hash table. If there is no such term in the hash-table, the
  2537. ** iterator is set to EOF.
  2538. **
  2539. ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
  2540. ** an error has already occurred when this function is called, it is a no-op.
  2541. */
  2542. static void fts5SegIterHashInit(
  2543. Fts5Index *p, /* FTS5 backend */
  2544. const u8 *pTerm, int nTerm, /* Term to seek to */
  2545. int flags, /* Mask of FTS5INDEX_XXX flags */
  2546. Fts5SegIter *pIter /* Object to populate */
  2547. ){
  2548. int nList = 0;
  2549. const u8 *z = 0;
  2550. int n = 0;
  2551. Fts5Data *pLeaf = 0;
  2552. assert( p->pHash );
  2553. assert( p->rc==SQLITE_OK );
  2554. if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
  2555. const u8 *pList = 0;
  2556. p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
  2557. sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList);
  2558. if( pList ){
  2559. pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
  2560. if( pLeaf ){
  2561. pLeaf->p = (u8*)pList;
  2562. }
  2563. }
  2564. /* The call to sqlite3Fts5HashScanInit() causes the hash table to
  2565. ** fill the size field of all existing position lists. This means they
  2566. ** can no longer be appended to. Since the only scenario in which they
  2567. ** can be appended to is if the previous operation on this table was
  2568. ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this
  2569. ** possibility altogether. */
  2570. p->bDelete = 0;
  2571. }else{
  2572. p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
  2573. (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
  2574. );
  2575. if( pLeaf ){
  2576. pLeaf->p = (u8*)&pLeaf[1];
  2577. }
  2578. z = pTerm;
  2579. n = nTerm;
  2580. pIter->flags |= FTS5_SEGITER_ONETERM;
  2581. }
  2582. if( pLeaf ){
  2583. sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
  2584. pLeaf->nn = pLeaf->szLeaf = nList;
  2585. pIter->pLeaf = pLeaf;
  2586. pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
  2587. pIter->iEndofDoclist = pLeaf->nn;
  2588. if( flags & FTS5INDEX_QUERY_DESC ){
  2589. pIter->flags |= FTS5_SEGITER_REVERSE;
  2590. fts5SegIterReverseInitPage(p, pIter);
  2591. }else{
  2592. fts5SegIterLoadNPos(p, pIter);
  2593. }
  2594. }
  2595. fts5SegIterSetNext(p, pIter);
  2596. }
  2597. /*
  2598. ** Array ap[] contains n elements. Release each of these elements using
  2599. ** fts5DataRelease(). Then free the array itself using sqlite3_free().
  2600. */
  2601. static void fts5IndexFreeArray(Fts5Data **ap, int n){
  2602. if( ap ){
  2603. int ii;
  2604. for(ii=0; ii<n; ii++){
  2605. fts5DataRelease(ap[ii]);
  2606. }
  2607. sqlite3_free(ap);
  2608. }
  2609. }
  2610. /*
  2611. ** Decrement the ref-count of the object passed as the only argument. If it
  2612. ** reaches 0, free it and its contents.
  2613. */
  2614. static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){
  2615. if( p ){
  2616. p->nRef--;
  2617. if( p->nRef<=0 ){
  2618. int ii;
  2619. for(ii=0; ii<p->nTombstone; ii++){
  2620. fts5DataRelease(p->apTombstone[ii]);
  2621. }
  2622. sqlite3_free(p);
  2623. }
  2624. }
  2625. }
  2626. /*
  2627. ** Zero the iterator passed as the only argument.
  2628. */
  2629. static void fts5SegIterClear(Fts5SegIter *pIter){
  2630. fts5BufferFree(&pIter->term);
  2631. fts5DataRelease(pIter->pLeaf);
  2632. fts5DataRelease(pIter->pNextLeaf);
  2633. fts5TombstoneArrayDelete(pIter->pTombArray);
  2634. fts5DlidxIterFree(pIter->pDlidx);
  2635. sqlite3_free(pIter->aRowidOffset);
  2636. memset(pIter, 0, sizeof(Fts5SegIter));
  2637. }
  2638. #ifdef SQLITE_DEBUG
  2639. /*
  2640. ** This function is used as part of the big assert() procedure implemented by
  2641. ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
  2642. ** in *pRes is the correct result of comparing the current positions of the
  2643. ** two iterators.
  2644. */
  2645. static void fts5AssertComparisonResult(
  2646. Fts5Iter *pIter,
  2647. Fts5SegIter *p1,
  2648. Fts5SegIter *p2,
  2649. Fts5CResult *pRes
  2650. ){
  2651. int i1 = p1 - pIter->aSeg;
  2652. int i2 = p2 - pIter->aSeg;
  2653. if( p1->pLeaf || p2->pLeaf ){
  2654. if( p1->pLeaf==0 ){
  2655. assert( pRes->iFirst==i2 );
  2656. }else if( p2->pLeaf==0 ){
  2657. assert( pRes->iFirst==i1 );
  2658. }else{
  2659. int nMin = MIN(p1->term.n, p2->term.n);
  2660. int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
  2661. if( res==0 ) res = p1->term.n - p2->term.n;
  2662. if( res==0 ){
  2663. assert( pRes->bTermEq==1 );
  2664. assert( p1->iRowid!=p2->iRowid );
  2665. res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
  2666. }else{
  2667. assert( pRes->bTermEq==0 );
  2668. }
  2669. if( res<0 ){
  2670. assert( pRes->iFirst==i1 );
  2671. }else{
  2672. assert( pRes->iFirst==i2 );
  2673. }
  2674. }
  2675. }
  2676. }
  2677. /*
  2678. ** This function is a no-op unless SQLITE_DEBUG is defined when this module
  2679. ** is compiled. In that case, this function is essentially an assert()
  2680. ** statement used to verify that the contents of the pIter->aFirst[] array
  2681. ** are correct.
  2682. */
  2683. static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
  2684. if( p->rc==SQLITE_OK ){
  2685. Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  2686. int i;
  2687. assert( (pFirst->pLeaf==0)==pIter->base.bEof );
  2688. /* Check that pIter->iSwitchRowid is set correctly. */
  2689. for(i=0; i<pIter->nSeg; i++){
  2690. Fts5SegIter *p1 = &pIter->aSeg[i];
  2691. assert( p1==pFirst
  2692. || p1->pLeaf==0
  2693. || fts5BufferCompare(&pFirst->term, &p1->term)
  2694. || p1->iRowid==pIter->iSwitchRowid
  2695. || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
  2696. );
  2697. }
  2698. for(i=0; i<pIter->nSeg; i+=2){
  2699. Fts5SegIter *p1 = &pIter->aSeg[i];
  2700. Fts5SegIter *p2 = &pIter->aSeg[i+1];
  2701. Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
  2702. fts5AssertComparisonResult(pIter, p1, p2, pRes);
  2703. }
  2704. for(i=1; i<(pIter->nSeg / 2); i+=2){
  2705. Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
  2706. Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
  2707. Fts5CResult *pRes = &pIter->aFirst[i];
  2708. fts5AssertComparisonResult(pIter, p1, p2, pRes);
  2709. }
  2710. }
  2711. }
  2712. #else
  2713. # define fts5AssertMultiIterSetup(x,y)
  2714. #endif
  2715. /*
  2716. ** Do the comparison necessary to populate pIter->aFirst[iOut].
  2717. **
  2718. ** If the returned value is non-zero, then it is the index of an entry
  2719. ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
  2720. ** to a key that is a duplicate of another, higher priority,
  2721. ** segment-iterator in the pSeg->aSeg[] array.
  2722. */
  2723. static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
  2724. int i1; /* Index of left-hand Fts5SegIter */
  2725. int i2; /* Index of right-hand Fts5SegIter */
  2726. int iRes;
  2727. Fts5SegIter *p1; /* Left-hand Fts5SegIter */
  2728. Fts5SegIter *p2; /* Right-hand Fts5SegIter */
  2729. Fts5CResult *pRes = &pIter->aFirst[iOut];
  2730. assert( iOut<pIter->nSeg && iOut>0 );
  2731. assert( pIter->bRev==0 || pIter->bRev==1 );
  2732. if( iOut>=(pIter->nSeg/2) ){
  2733. i1 = (iOut - pIter->nSeg/2) * 2;
  2734. i2 = i1 + 1;
  2735. }else{
  2736. i1 = pIter->aFirst[iOut*2].iFirst;
  2737. i2 = pIter->aFirst[iOut*2+1].iFirst;
  2738. }
  2739. p1 = &pIter->aSeg[i1];
  2740. p2 = &pIter->aSeg[i2];
  2741. pRes->bTermEq = 0;
  2742. if( p1->pLeaf==0 ){ /* If p1 is at EOF */
  2743. iRes = i2;
  2744. }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
  2745. iRes = i1;
  2746. }else{
  2747. int res = fts5BufferCompare(&p1->term, &p2->term);
  2748. if( res==0 ){
  2749. assert_nc( i2>i1 );
  2750. assert_nc( i2!=0 );
  2751. pRes->bTermEq = 1;
  2752. if( p1->iRowid==p2->iRowid ){
  2753. return i2;
  2754. }
  2755. res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
  2756. }
  2757. assert( res!=0 );
  2758. if( res<0 ){
  2759. iRes = i1;
  2760. }else{
  2761. iRes = i2;
  2762. }
  2763. }
  2764. pRes->iFirst = (u16)iRes;
  2765. return 0;
  2766. }
  2767. /*
  2768. ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
  2769. ** It is an error if leaf iLeafPgno does not exist. Unless the db is
  2770. ** a 'secure-delete' db, if it contains no rowids then this is also an error.
  2771. */
  2772. static void fts5SegIterGotoPage(
  2773. Fts5Index *p, /* FTS5 backend object */
  2774. Fts5SegIter *pIter, /* Iterator to advance */
  2775. int iLeafPgno
  2776. ){
  2777. assert( iLeafPgno>pIter->iLeafPgno );
  2778. if( iLeafPgno>pIter->pSeg->pgnoLast ){
  2779. p->rc = FTS5_CORRUPT;
  2780. }else{
  2781. fts5DataRelease(pIter->pNextLeaf);
  2782. pIter->pNextLeaf = 0;
  2783. pIter->iLeafPgno = iLeafPgno-1;
  2784. while( p->rc==SQLITE_OK ){
  2785. int iOff;
  2786. fts5SegIterNextPage(p, pIter);
  2787. if( pIter->pLeaf==0 ) break;
  2788. iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
  2789. if( iOff>0 ){
  2790. u8 *a = pIter->pLeaf->p;
  2791. int n = pIter->pLeaf->szLeaf;
  2792. if( iOff<4 || iOff>=n ){
  2793. p->rc = FTS5_CORRUPT;
  2794. }else{
  2795. iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  2796. pIter->iLeafOffset = iOff;
  2797. fts5SegIterLoadNPos(p, pIter);
  2798. }
  2799. break;
  2800. }
  2801. }
  2802. }
  2803. }
  2804. /*
  2805. ** Advance the iterator passed as the second argument until it is at or
  2806. ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
  2807. ** always advanced at least once.
  2808. */
  2809. static void fts5SegIterNextFrom(
  2810. Fts5Index *p, /* FTS5 backend object */
  2811. Fts5SegIter *pIter, /* Iterator to advance */
  2812. i64 iMatch /* Advance iterator at least this far */
  2813. ){
  2814. int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
  2815. Fts5DlidxIter *pDlidx = pIter->pDlidx;
  2816. int iLeafPgno = pIter->iLeafPgno;
  2817. int bMove = 1;
  2818. assert( pIter->flags & FTS5_SEGITER_ONETERM );
  2819. assert( pIter->pDlidx );
  2820. assert( pIter->pLeaf );
  2821. if( bRev==0 ){
  2822. while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
  2823. iLeafPgno = fts5DlidxIterPgno(pDlidx);
  2824. fts5DlidxIterNext(p, pDlidx);
  2825. }
  2826. assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
  2827. if( iLeafPgno>pIter->iLeafPgno ){
  2828. fts5SegIterGotoPage(p, pIter, iLeafPgno);
  2829. bMove = 0;
  2830. }
  2831. }else{
  2832. assert( pIter->pNextLeaf==0 );
  2833. assert( iMatch<pIter->iRowid );
  2834. while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
  2835. fts5DlidxIterPrev(p, pDlidx);
  2836. }
  2837. iLeafPgno = fts5DlidxIterPgno(pDlidx);
  2838. assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
  2839. if( iLeafPgno<pIter->iLeafPgno ){
  2840. pIter->iLeafPgno = iLeafPgno+1;
  2841. fts5SegIterReverseNewPage(p, pIter);
  2842. bMove = 0;
  2843. }
  2844. }
  2845. do{
  2846. if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
  2847. if( pIter->pLeaf==0 ) break;
  2848. if( bRev==0 && pIter->iRowid>=iMatch ) break;
  2849. if( bRev!=0 && pIter->iRowid<=iMatch ) break;
  2850. bMove = 1;
  2851. }while( p->rc==SQLITE_OK );
  2852. }
  2853. /*
  2854. ** Free the iterator object passed as the second argument.
  2855. */
  2856. static void fts5MultiIterFree(Fts5Iter *pIter){
  2857. if( pIter ){
  2858. int i;
  2859. for(i=0; i<pIter->nSeg; i++){
  2860. fts5SegIterClear(&pIter->aSeg[i]);
  2861. }
  2862. fts5BufferFree(&pIter->poslist);
  2863. sqlite3_free(pIter);
  2864. }
  2865. }
  2866. static void fts5MultiIterAdvanced(
  2867. Fts5Index *p, /* FTS5 backend to iterate within */
  2868. Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
  2869. int iChanged, /* Index of sub-iterator just advanced */
  2870. int iMinset /* Minimum entry in aFirst[] to set */
  2871. ){
  2872. int i;
  2873. for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
  2874. int iEq;
  2875. if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
  2876. Fts5SegIter *pSeg = &pIter->aSeg[iEq];
  2877. assert( p->rc==SQLITE_OK );
  2878. pSeg->xNext(p, pSeg, 0);
  2879. i = pIter->nSeg + iEq;
  2880. }
  2881. }
  2882. }
  2883. /*
  2884. ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
  2885. ** points to the same term though - just a different rowid. This function
  2886. ** attempts to update the contents of the pIter->aFirst[] accordingly.
  2887. ** If it does so successfully, 0 is returned. Otherwise 1.
  2888. **
  2889. ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
  2890. ** on the iterator instead. That function does the same as this one, except
  2891. ** that it deals with more complicated cases as well.
  2892. */
  2893. static int fts5MultiIterAdvanceRowid(
  2894. Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
  2895. int iChanged, /* Index of sub-iterator just advanced */
  2896. Fts5SegIter **ppFirst
  2897. ){
  2898. Fts5SegIter *pNew = &pIter->aSeg[iChanged];
  2899. if( pNew->iRowid==pIter->iSwitchRowid
  2900. || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
  2901. ){
  2902. int i;
  2903. Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
  2904. pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
  2905. for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
  2906. Fts5CResult *pRes = &pIter->aFirst[i];
  2907. assert( pNew->pLeaf );
  2908. assert( pRes->bTermEq==0 || pOther->pLeaf );
  2909. if( pRes->bTermEq ){
  2910. if( pNew->iRowid==pOther->iRowid ){
  2911. return 1;
  2912. }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
  2913. pIter->iSwitchRowid = pOther->iRowid;
  2914. pNew = pOther;
  2915. }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
  2916. pIter->iSwitchRowid = pOther->iRowid;
  2917. }
  2918. }
  2919. pRes->iFirst = (u16)(pNew - pIter->aSeg);
  2920. if( i==1 ) break;
  2921. pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
  2922. }
  2923. }
  2924. *ppFirst = pNew;
  2925. return 0;
  2926. }
  2927. /*
  2928. ** Set the pIter->bEof variable based on the state of the sub-iterators.
  2929. */
  2930. static void fts5MultiIterSetEof(Fts5Iter *pIter){
  2931. Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  2932. pIter->base.bEof = pSeg->pLeaf==0;
  2933. pIter->iSwitchRowid = pSeg->iRowid;
  2934. }
  2935. /*
  2936. ** The argument to this macro must be an Fts5Data structure containing a
  2937. ** tombstone hash page. This macro returns the key-size of the hash-page.
  2938. */
  2939. #define TOMBSTONE_KEYSIZE(pPg) (pPg->p[0]==4 ? 4 : 8)
  2940. #define TOMBSTONE_NSLOT(pPg) \
  2941. ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)) : 1)
  2942. /*
  2943. ** Query a single tombstone hash table for rowid iRowid. Return true if
  2944. ** it is found or false otherwise. The tombstone hash table is one of
  2945. ** nHashTable tables.
  2946. */
  2947. static int fts5IndexTombstoneQuery(
  2948. Fts5Data *pHash, /* Hash table page to query */
  2949. int nHashTable, /* Number of pages attached to segment */
  2950. u64 iRowid /* Rowid to query hash for */
  2951. ){
  2952. const int szKey = TOMBSTONE_KEYSIZE(pHash);
  2953. const int nSlot = TOMBSTONE_NSLOT(pHash);
  2954. int iSlot = (iRowid / nHashTable) % nSlot;
  2955. int nCollide = nSlot;
  2956. if( iRowid==0 ){
  2957. return pHash->p[1];
  2958. }else if( szKey==4 ){
  2959. u32 *aSlot = (u32*)&pHash->p[8];
  2960. while( aSlot[iSlot] ){
  2961. if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1;
  2962. if( nCollide--==0 ) break;
  2963. iSlot = (iSlot+1)%nSlot;
  2964. }
  2965. }else{
  2966. u64 *aSlot = (u64*)&pHash->p[8];
  2967. while( aSlot[iSlot] ){
  2968. if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1;
  2969. if( nCollide--==0 ) break;
  2970. iSlot = (iSlot+1)%nSlot;
  2971. }
  2972. }
  2973. return 0;
  2974. }
  2975. /*
  2976. ** Return true if the iterator passed as the only argument points
  2977. ** to an segment entry for which there is a tombstone. Return false
  2978. ** if there is no tombstone or if the iterator is already at EOF.
  2979. */
  2980. static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
  2981. int iFirst = pIter->aFirst[1].iFirst;
  2982. Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
  2983. Fts5TombstoneArray *pArray = pSeg->pTombArray;
  2984. if( pSeg->pLeaf && pArray ){
  2985. /* Figure out which page the rowid might be present on. */
  2986. int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone;
  2987. assert( iPg>=0 );
  2988. /* If tombstone hash page iPg has not yet been loaded from the
  2989. ** database, load it now. */
  2990. if( pArray->apTombstone[iPg]==0 ){
  2991. pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
  2992. FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)
  2993. );
  2994. if( pArray->apTombstone[iPg]==0 ) return 0;
  2995. }
  2996. return fts5IndexTombstoneQuery(
  2997. pArray->apTombstone[iPg],
  2998. pArray->nTombstone,
  2999. pSeg->iRowid
  3000. );
  3001. }
  3002. return 0;
  3003. }
  3004. /*
  3005. ** Move the iterator to the next entry.
  3006. **
  3007. ** If an error occurs, an error code is left in Fts5Index.rc. It is not
  3008. ** considered an error if the iterator reaches EOF, or if it is already at
  3009. ** EOF when this function is called.
  3010. */
  3011. static void fts5MultiIterNext(
  3012. Fts5Index *p,
  3013. Fts5Iter *pIter,
  3014. int bFrom, /* True if argument iFrom is valid */
  3015. i64 iFrom /* Advance at least as far as this */
  3016. ){
  3017. int bUseFrom = bFrom;
  3018. assert( pIter->base.bEof==0 );
  3019. while( p->rc==SQLITE_OK ){
  3020. int iFirst = pIter->aFirst[1].iFirst;
  3021. int bNewTerm = 0;
  3022. Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
  3023. assert( p->rc==SQLITE_OK );
  3024. if( bUseFrom && pSeg->pDlidx ){
  3025. fts5SegIterNextFrom(p, pSeg, iFrom);
  3026. }else{
  3027. pSeg->xNext(p, pSeg, &bNewTerm);
  3028. }
  3029. if( pSeg->pLeaf==0 || bNewTerm
  3030. || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
  3031. ){
  3032. fts5MultiIterAdvanced(p, pIter, iFirst, 1);
  3033. fts5MultiIterSetEof(pIter);
  3034. pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
  3035. if( pSeg->pLeaf==0 ) return;
  3036. }
  3037. fts5AssertMultiIterSetup(p, pIter);
  3038. assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
  3039. if( (pIter->bSkipEmpty==0 || pSeg->nPos)
  3040. && 0==fts5MultiIterIsDeleted(pIter)
  3041. ){
  3042. pIter->xSetOutputs(pIter, pSeg);
  3043. return;
  3044. }
  3045. bUseFrom = 0;
  3046. }
  3047. }
  3048. static void fts5MultiIterNext2(
  3049. Fts5Index *p,
  3050. Fts5Iter *pIter,
  3051. int *pbNewTerm /* OUT: True if *might* be new term */
  3052. ){
  3053. assert( pIter->bSkipEmpty );
  3054. if( p->rc==SQLITE_OK ){
  3055. *pbNewTerm = 0;
  3056. do{
  3057. int iFirst = pIter->aFirst[1].iFirst;
  3058. Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
  3059. int bNewTerm = 0;
  3060. assert( p->rc==SQLITE_OK );
  3061. pSeg->xNext(p, pSeg, &bNewTerm);
  3062. if( pSeg->pLeaf==0 || bNewTerm
  3063. || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
  3064. ){
  3065. fts5MultiIterAdvanced(p, pIter, iFirst, 1);
  3066. fts5MultiIterSetEof(pIter);
  3067. *pbNewTerm = 1;
  3068. }
  3069. fts5AssertMultiIterSetup(p, pIter);
  3070. }while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter))
  3071. && (p->rc==SQLITE_OK)
  3072. );
  3073. }
  3074. }
  3075. static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
  3076. UNUSED_PARAM2(pUnused1, pUnused2);
  3077. }
  3078. static Fts5Iter *fts5MultiIterAlloc(
  3079. Fts5Index *p, /* FTS5 backend to iterate within */
  3080. int nSeg
  3081. ){
  3082. Fts5Iter *pNew;
  3083. i64 nSlot; /* Power of two >= nSeg */
  3084. for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
  3085. pNew = fts5IdxMalloc(p,
  3086. SZ_FTS5ITER(nSlot) + /* pNew + pNew->aSeg[] */
  3087. sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
  3088. );
  3089. if( pNew ){
  3090. pNew->nSeg = nSlot;
  3091. pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
  3092. pNew->pIndex = p;
  3093. pNew->xSetOutputs = fts5IterSetOutputs_Noop;
  3094. }
  3095. return pNew;
  3096. }
  3097. static void fts5PoslistCallback(
  3098. Fts5Index *pUnused,
  3099. void *pContext,
  3100. const u8 *pChunk, int nChunk
  3101. ){
  3102. UNUSED_PARAM(pUnused);
  3103. assert_nc( nChunk>=0 );
  3104. if( nChunk>0 ){
  3105. fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
  3106. }
  3107. }
  3108. typedef struct PoslistCallbackCtx PoslistCallbackCtx;
  3109. struct PoslistCallbackCtx {
  3110. Fts5Buffer *pBuf; /* Append to this buffer */
  3111. Fts5Colset *pColset; /* Restrict matches to this column */
  3112. int eState; /* See above */
  3113. };
  3114. typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
  3115. struct PoslistOffsetsCtx {
  3116. Fts5Buffer *pBuf; /* Append to this buffer */
  3117. Fts5Colset *pColset; /* Restrict matches to this column */
  3118. int iRead;
  3119. int iWrite;
  3120. };
  3121. /*
  3122. ** TODO: Make this more efficient!
  3123. */
  3124. static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
  3125. int i;
  3126. for(i=0; i<pColset->nCol; i++){
  3127. if( pColset->aiCol[i]==iCol ) return 1;
  3128. }
  3129. return 0;
  3130. }
  3131. static void fts5PoslistOffsetsCallback(
  3132. Fts5Index *pUnused,
  3133. void *pContext,
  3134. const u8 *pChunk, int nChunk
  3135. ){
  3136. PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
  3137. UNUSED_PARAM(pUnused);
  3138. assert_nc( nChunk>=0 );
  3139. if( nChunk>0 ){
  3140. int i = 0;
  3141. while( i<nChunk ){
  3142. int iVal;
  3143. i += fts5GetVarint32(&pChunk[i], iVal);
  3144. iVal += pCtx->iRead - 2;
  3145. pCtx->iRead = iVal;
  3146. if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
  3147. fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
  3148. pCtx->iWrite = iVal;
  3149. }
  3150. }
  3151. }
  3152. }
  3153. static void fts5PoslistFilterCallback(
  3154. Fts5Index *pUnused,
  3155. void *pContext,
  3156. const u8 *pChunk, int nChunk
  3157. ){
  3158. PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
  3159. UNUSED_PARAM(pUnused);
  3160. assert_nc( nChunk>=0 );
  3161. if( nChunk>0 ){
  3162. /* Search through to find the first varint with value 1. This is the
  3163. ** start of the next columns hits. */
  3164. int i = 0;
  3165. int iStart = 0;
  3166. if( pCtx->eState==2 ){
  3167. int iCol;
  3168. fts5FastGetVarint32(pChunk, i, iCol);
  3169. if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
  3170. pCtx->eState = 1;
  3171. fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
  3172. }else{
  3173. pCtx->eState = 0;
  3174. }
  3175. }
  3176. do {
  3177. while( i<nChunk && pChunk[i]!=0x01 ){
  3178. while( pChunk[i] & 0x80 ) i++;
  3179. i++;
  3180. }
  3181. if( pCtx->eState ){
  3182. fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
  3183. }
  3184. if( i<nChunk ){
  3185. int iCol;
  3186. iStart = i;
  3187. i++;
  3188. if( i>=nChunk ){
  3189. pCtx->eState = 2;
  3190. }else{
  3191. fts5FastGetVarint32(pChunk, i, iCol);
  3192. pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
  3193. if( pCtx->eState ){
  3194. fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
  3195. iStart = i;
  3196. }
  3197. }
  3198. }
  3199. }while( i<nChunk );
  3200. }
  3201. }
  3202. static void fts5ChunkIterate(
  3203. Fts5Index *p, /* Index object */
  3204. Fts5SegIter *pSeg, /* Poslist of this iterator */
  3205. void *pCtx, /* Context pointer for xChunk callback */
  3206. void (*xChunk)(Fts5Index*, void*, const u8*, int)
  3207. ){
  3208. int nRem = pSeg->nPos; /* Number of bytes still to come */
  3209. Fts5Data *pData = 0;
  3210. u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  3211. int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
  3212. int pgno = pSeg->iLeafPgno;
  3213. int pgnoSave = 0;
  3214. /* This function does not work with detail=none databases. */
  3215. assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
  3216. if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
  3217. pgnoSave = pgno+1;
  3218. }
  3219. while( 1 ){
  3220. xChunk(p, pCtx, pChunk, nChunk);
  3221. nRem -= nChunk;
  3222. fts5DataRelease(pData);
  3223. if( nRem<=0 ){
  3224. break;
  3225. }else if( pSeg->pSeg==0 ){
  3226. p->rc = FTS5_CORRUPT;
  3227. return;
  3228. }else{
  3229. pgno++;
  3230. pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
  3231. if( pData==0 ) break;
  3232. pChunk = &pData->p[4];
  3233. nChunk = MIN(nRem, pData->szLeaf - 4);
  3234. if( pgno==pgnoSave ){
  3235. assert( pSeg->pNextLeaf==0 );
  3236. pSeg->pNextLeaf = pData;
  3237. pData = 0;
  3238. }
  3239. }
  3240. }
  3241. }
  3242. /*
  3243. ** Iterator pIter currently points to a valid entry (not EOF). This
  3244. ** function appends the position list data for the current entry to
  3245. ** buffer pBuf. It does not make a copy of the position-list size
  3246. ** field.
  3247. */
  3248. static void fts5SegiterPoslist(
  3249. Fts5Index *p,
  3250. Fts5SegIter *pSeg,
  3251. Fts5Colset *pColset,
  3252. Fts5Buffer *pBuf
  3253. ){
  3254. assert( pBuf!=0 );
  3255. assert( pSeg!=0 );
  3256. if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
  3257. assert( pBuf->p!=0 );
  3258. assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
  3259. memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
  3260. if( pColset==0 ){
  3261. fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
  3262. }else{
  3263. if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
  3264. PoslistCallbackCtx sCtx;
  3265. sCtx.pBuf = pBuf;
  3266. sCtx.pColset = pColset;
  3267. sCtx.eState = fts5IndexColsetTest(pColset, 0);
  3268. assert( sCtx.eState==0 || sCtx.eState==1 );
  3269. fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
  3270. }else{
  3271. PoslistOffsetsCtx sCtx;
  3272. memset(&sCtx, 0, sizeof(sCtx));
  3273. sCtx.pBuf = pBuf;
  3274. sCtx.pColset = pColset;
  3275. fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
  3276. }
  3277. }
  3278. }
  3279. }
  3280. /*
  3281. ** Parameter pPos points to a buffer containing a position list, size nPos.
  3282. ** This function filters it according to pColset (which must be non-NULL)
  3283. ** and sets pIter->base.pData/nData to point to the new position list.
  3284. ** If memory is required for the new position list, use buffer pIter->poslist.
  3285. ** Or, if the new position list is a contiguous subset of the input, set
  3286. ** pIter->base.pData/nData to point directly to it.
  3287. **
  3288. ** This function is a no-op if *pRc is other than SQLITE_OK when it is
  3289. ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
  3290. ** before returning.
  3291. */
  3292. static void fts5IndexExtractColset(
  3293. int *pRc,
  3294. Fts5Colset *pColset, /* Colset to filter on */
  3295. const u8 *pPos, int nPos, /* Position list */
  3296. Fts5Iter *pIter
  3297. ){
  3298. if( *pRc==SQLITE_OK ){
  3299. const u8 *p = pPos;
  3300. const u8 *aCopy = p;
  3301. const u8 *pEnd = &p[nPos]; /* One byte past end of position list */
  3302. int i = 0;
  3303. int iCurrent = 0;
  3304. if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
  3305. return;
  3306. }
  3307. while( 1 ){
  3308. while( pColset->aiCol[i]<iCurrent ){
  3309. i++;
  3310. if( i==pColset->nCol ){
  3311. pIter->base.pData = pIter->poslist.p;
  3312. pIter->base.nData = pIter->poslist.n;
  3313. return;
  3314. }
  3315. }
  3316. /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  3317. ** not part of a varint */
  3318. while( p<pEnd && *p!=0x01 ){
  3319. while( *p++ & 0x80 );
  3320. }
  3321. if( pColset->aiCol[i]==iCurrent ){
  3322. if( pColset->nCol==1 ){
  3323. pIter->base.pData = aCopy;
  3324. pIter->base.nData = p-aCopy;
  3325. return;
  3326. }
  3327. fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
  3328. }
  3329. if( p>=pEnd ){
  3330. pIter->base.pData = pIter->poslist.p;
  3331. pIter->base.nData = pIter->poslist.n;
  3332. return;
  3333. }
  3334. aCopy = p++;
  3335. iCurrent = *p++;
  3336. if( iCurrent & 0x80 ){
  3337. p--;
  3338. p += fts5GetVarint32(p, iCurrent);
  3339. }
  3340. }
  3341. }
  3342. }
  3343. /*
  3344. ** xSetOutputs callback used by detail=none tables.
  3345. */
  3346. static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
  3347. assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
  3348. pIter->base.iRowid = pSeg->iRowid;
  3349. pIter->base.nData = pSeg->nPos;
  3350. }
  3351. /*
  3352. ** xSetOutputs callback used by detail=full and detail=col tables when no
  3353. ** column filters are specified.
  3354. */
  3355. static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
  3356. pIter->base.iRowid = pSeg->iRowid;
  3357. pIter->base.nData = pSeg->nPos;
  3358. assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
  3359. assert( pIter->pColset==0 );
  3360. if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
  3361. /* All data is stored on the current page. Populate the output
  3362. ** variables to point into the body of the page object. */
  3363. pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  3364. }else{
  3365. /* The data is distributed over two or more pages. Copy it into the
  3366. ** Fts5Iter.poslist buffer and then set the output pointer to point
  3367. ** to this buffer. */
  3368. fts5BufferZero(&pIter->poslist);
  3369. fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
  3370. pIter->base.pData = pIter->poslist.p;
  3371. }
  3372. }
  3373. /*
  3374. ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
  3375. ** against no columns at all).
  3376. */
  3377. static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
  3378. UNUSED_PARAM(pSeg);
  3379. pIter->base.nData = 0;
  3380. }
  3381. /*
  3382. ** xSetOutputs callback used by detail=col when there is a column filter
  3383. ** and there are 100 or more columns. Also called as a fallback from
  3384. ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
  3385. */
  3386. static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
  3387. fts5BufferZero(&pIter->poslist);
  3388. fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
  3389. pIter->base.iRowid = pSeg->iRowid;
  3390. pIter->base.pData = pIter->poslist.p;
  3391. pIter->base.nData = pIter->poslist.n;
  3392. }
  3393. /*
  3394. ** xSetOutputs callback used when:
  3395. **
  3396. ** * detail=col,
  3397. ** * there is a column filter, and
  3398. ** * the table contains 100 or fewer columns.
  3399. **
  3400. ** The last point is to ensure all column numbers are stored as
  3401. ** single-byte varints.
  3402. */
  3403. static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
  3404. assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
  3405. assert( pIter->pColset );
  3406. if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
  3407. fts5IterSetOutputs_Col(pIter, pSeg);
  3408. }else{
  3409. u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
  3410. u8 *pEnd = (u8*)&a[pSeg->nPos];
  3411. int iPrev = 0;
  3412. int *aiCol = pIter->pColset->aiCol;
  3413. int *aiColEnd = &aiCol[pIter->pColset->nCol];
  3414. u8 *aOut = pIter->poslist.p;
  3415. int iPrevOut = 0;
  3416. pIter->base.iRowid = pSeg->iRowid;
  3417. while( a<pEnd ){
  3418. iPrev += (int)a++[0] - 2;
  3419. while( *aiCol<iPrev ){
  3420. aiCol++;
  3421. if( aiCol==aiColEnd ) goto setoutputs_col_out;
  3422. }
  3423. if( *aiCol==iPrev ){
  3424. *aOut++ = (u8)((iPrev - iPrevOut) + 2);
  3425. iPrevOut = iPrev;
  3426. }
  3427. }
  3428. setoutputs_col_out:
  3429. pIter->base.pData = pIter->poslist.p;
  3430. pIter->base.nData = aOut - pIter->poslist.p;
  3431. }
  3432. }
  3433. /*
  3434. ** xSetOutputs callback used by detail=full when there is a column filter.
  3435. */
  3436. static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
  3437. Fts5Colset *pColset = pIter->pColset;
  3438. pIter->base.iRowid = pSeg->iRowid;
  3439. assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
  3440. assert( pColset );
  3441. if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
  3442. /* All data is stored on the current page. Populate the output
  3443. ** variables to point into the body of the page object. */
  3444. const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  3445. int *pRc = &pIter->pIndex->rc;
  3446. fts5BufferZero(&pIter->poslist);
  3447. fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
  3448. }else{
  3449. /* The data is distributed over two or more pages. Copy it into the
  3450. ** Fts5Iter.poslist buffer and then set the output pointer to point
  3451. ** to this buffer. */
  3452. fts5BufferZero(&pIter->poslist);
  3453. fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
  3454. pIter->base.pData = pIter->poslist.p;
  3455. pIter->base.nData = pIter->poslist.n;
  3456. }
  3457. }
  3458. static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
  3459. assert( pIter!=0 || (*pRc)!=SQLITE_OK );
  3460. if( *pRc==SQLITE_OK ){
  3461. Fts5Config *pConfig = pIter->pIndex->pConfig;
  3462. if( pConfig->eDetail==FTS5_DETAIL_NONE ){
  3463. pIter->xSetOutputs = fts5IterSetOutputs_None;
  3464. }
  3465. else if( pIter->pColset==0 ){
  3466. pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
  3467. }
  3468. else if( pIter->pColset->nCol==0 ){
  3469. pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
  3470. }
  3471. else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
  3472. pIter->xSetOutputs = fts5IterSetOutputs_Full;
  3473. }
  3474. else{
  3475. assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
  3476. if( pConfig->nCol<=100 ){
  3477. pIter->xSetOutputs = fts5IterSetOutputs_Col100;
  3478. sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
  3479. }else{
  3480. pIter->xSetOutputs = fts5IterSetOutputs_Col;
  3481. }
  3482. }
  3483. }
  3484. }
  3485. /*
  3486. ** All the component segment-iterators of pIter have been set up. This
  3487. ** functions finishes setup for iterator pIter itself.
  3488. */
  3489. static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){
  3490. int iIter;
  3491. for(iIter=pIter->nSeg-1; iIter>0; iIter--){
  3492. int iEq;
  3493. if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){
  3494. Fts5SegIter *pSeg = &pIter->aSeg[iEq];
  3495. if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
  3496. fts5MultiIterAdvanced(p, pIter, iEq, iIter);
  3497. }
  3498. }
  3499. fts5MultiIterSetEof(pIter);
  3500. fts5AssertMultiIterSetup(p, pIter);
  3501. if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter))
  3502. || fts5MultiIterIsDeleted(pIter)
  3503. ){
  3504. fts5MultiIterNext(p, pIter, 0, 0);
  3505. }else if( pIter->base.bEof==0 ){
  3506. Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
  3507. pIter->xSetOutputs(pIter, pSeg);
  3508. }
  3509. }
  3510. /*
  3511. ** Allocate a new Fts5Iter object.
  3512. **
  3513. ** The new object will be used to iterate through data in structure pStruct.
  3514. ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
  3515. ** is zero or greater, data from the first nSegment segments on level iLevel
  3516. ** is merged.
  3517. **
  3518. ** The iterator initially points to the first term/rowid entry in the
  3519. ** iterated data.
  3520. */
  3521. static void fts5MultiIterNew(
  3522. Fts5Index *p, /* FTS5 backend to iterate within */
  3523. Fts5Structure *pStruct, /* Structure of specific index */
  3524. int flags, /* FTS5INDEX_QUERY_XXX flags */
  3525. Fts5Colset *pColset, /* Colset to filter on (or NULL) */
  3526. const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
  3527. int iLevel, /* Level to iterate (-1 for all) */
  3528. int nSegment, /* Number of segments to merge (iLevel>=0) */
  3529. Fts5Iter **ppOut /* New object */
  3530. ){
  3531. int nSeg = 0; /* Number of segment-iters in use */
  3532. int iIter = 0; /* */
  3533. int iSeg; /* Used to iterate through segments */
  3534. Fts5StructureLevel *pLvl;
  3535. Fts5Iter *pNew;
  3536. assert( (pTerm==0 && nTerm==0) || iLevel<0 );
  3537. /* Allocate space for the new multi-seg-iterator. */
  3538. if( p->rc==SQLITE_OK ){
  3539. if( iLevel<0 ){
  3540. assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
  3541. nSeg = pStruct->nSegment;
  3542. nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH));
  3543. }else{
  3544. nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
  3545. }
  3546. }
  3547. *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
  3548. if( pNew==0 ){
  3549. assert( p->rc!=SQLITE_OK );
  3550. goto fts5MultiIterNew_post_check;
  3551. }
  3552. pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
  3553. pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
  3554. pNew->pColset = pColset;
  3555. if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
  3556. fts5IterSetOutputCb(&p->rc, pNew);
  3557. }
  3558. /* Initialize each of the component segment iterators. */
  3559. if( p->rc==SQLITE_OK ){
  3560. if( iLevel<0 ){
  3561. Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
  3562. if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH) ){
  3563. /* Add a segment iterator for the current contents of the hash table. */
  3564. Fts5SegIter *pIter = &pNew->aSeg[iIter++];
  3565. fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
  3566. }
  3567. for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
  3568. for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
  3569. Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
  3570. Fts5SegIter *pIter = &pNew->aSeg[iIter++];
  3571. if( pTerm==0 ){
  3572. fts5SegIterInit(p, pSeg, pIter);
  3573. }else{
  3574. fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
  3575. }
  3576. }
  3577. }
  3578. }else{
  3579. pLvl = &pStruct->aLevel[iLevel];
  3580. for(iSeg=nSeg-1; iSeg>=0; iSeg--){
  3581. fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
  3582. }
  3583. }
  3584. assert( iIter==nSeg );
  3585. }
  3586. /* If the above was successful, each component iterator now points
  3587. ** to the first entry in its segment. In this case initialize the
  3588. ** aFirst[] array. Or, if an error has occurred, free the iterator
  3589. ** object and set the output variable to NULL. */
  3590. if( p->rc==SQLITE_OK ){
  3591. fts5MultiIterFinishSetup(p, pNew);
  3592. }else{
  3593. fts5MultiIterFree(pNew);
  3594. *ppOut = 0;
  3595. }
  3596. fts5MultiIterNew_post_check:
  3597. assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
  3598. return;
  3599. }
  3600. /*
  3601. ** Create an Fts5Iter that iterates through the doclist provided
  3602. ** as the second argument.
  3603. */
  3604. static void fts5MultiIterNew2(
  3605. Fts5Index *p, /* FTS5 backend to iterate within */
  3606. Fts5Data *pData, /* Doclist to iterate through */
  3607. int bDesc, /* True for descending rowid order */
  3608. Fts5Iter **ppOut /* New object */
  3609. ){
  3610. Fts5Iter *pNew;
  3611. pNew = fts5MultiIterAlloc(p, 2);
  3612. if( pNew ){
  3613. Fts5SegIter *pIter = &pNew->aSeg[1];
  3614. pIter->flags = FTS5_SEGITER_ONETERM;
  3615. if( pData->szLeaf>0 ){
  3616. pIter->pLeaf = pData;
  3617. pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
  3618. pIter->iEndofDoclist = pData->nn;
  3619. pNew->aFirst[1].iFirst = 1;
  3620. if( bDesc ){
  3621. pNew->bRev = 1;
  3622. pIter->flags |= FTS5_SEGITER_REVERSE;
  3623. fts5SegIterReverseInitPage(p, pIter);
  3624. }else{
  3625. fts5SegIterLoadNPos(p, pIter);
  3626. }
  3627. pData = 0;
  3628. }else{
  3629. pNew->base.bEof = 1;
  3630. }
  3631. fts5SegIterSetNext(p, pIter);
  3632. *ppOut = pNew;
  3633. }
  3634. fts5DataRelease(pData);
  3635. }
  3636. /*
  3637. ** Return true if the iterator is at EOF or if an error has occurred.
  3638. ** False otherwise.
  3639. */
  3640. static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
  3641. assert( pIter!=0 || p->rc!=SQLITE_OK );
  3642. assert( p->rc!=SQLITE_OK
  3643. || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
  3644. );
  3645. return (p->rc || pIter->base.bEof);
  3646. }
  3647. /*
  3648. ** Return the rowid of the entry that the iterator currently points
  3649. ** to. If the iterator points to EOF when this function is called the
  3650. ** results are undefined.
  3651. */
  3652. static i64 fts5MultiIterRowid(Fts5Iter *pIter){
  3653. assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
  3654. return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
  3655. }
  3656. /*
  3657. ** Move the iterator to the next entry at or following iMatch.
  3658. */
  3659. static void fts5MultiIterNextFrom(
  3660. Fts5Index *p,
  3661. Fts5Iter *pIter,
  3662. i64 iMatch
  3663. ){
  3664. while( 1 ){
  3665. i64 iRowid;
  3666. fts5MultiIterNext(p, pIter, 1, iMatch);
  3667. if( fts5MultiIterEof(p, pIter) ) break;
  3668. iRowid = fts5MultiIterRowid(pIter);
  3669. if( pIter->bRev==0 && iRowid>=iMatch ) break;
  3670. if( pIter->bRev!=0 && iRowid<=iMatch ) break;
  3671. }
  3672. }
  3673. /*
  3674. ** Return a pointer to a buffer containing the term associated with the
  3675. ** entry that the iterator currently points to.
  3676. */
  3677. static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
  3678. Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  3679. *pn = p->term.n;
  3680. return p->term.p;
  3681. }
  3682. /*
  3683. ** Allocate a new segment-id for the structure pStruct. The new segment
  3684. ** id must be between 1 and 65335 inclusive, and must not be used by
  3685. ** any currently existing segment. If a free segment id cannot be found,
  3686. ** SQLITE_FULL is returned.
  3687. **
  3688. ** If an error has already occurred, this function is a no-op. 0 is
  3689. ** returned in this case.
  3690. */
  3691. static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
  3692. int iSegid = 0;
  3693. if( p->rc==SQLITE_OK ){
  3694. if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
  3695. p->rc = SQLITE_FULL;
  3696. }else{
  3697. /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
  3698. ** array is 63 elements, or 252 bytes, in size. */
  3699. u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
  3700. int iLvl, iSeg;
  3701. int i;
  3702. u32 mask;
  3703. memset(aUsed, 0, sizeof(aUsed));
  3704. for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
  3705. for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
  3706. int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
  3707. if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
  3708. aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
  3709. }
  3710. }
  3711. }
  3712. for(i=0; aUsed[i]==0xFFFFFFFF; i++);
  3713. mask = aUsed[i];
  3714. for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
  3715. iSegid += 1 + i*32;
  3716. #ifdef SQLITE_DEBUG
  3717. for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
  3718. for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
  3719. assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
  3720. }
  3721. }
  3722. assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
  3723. {
  3724. sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
  3725. if( p->rc==SQLITE_OK ){
  3726. u8 aBlob[2] = {0xff, 0xff};
  3727. sqlite3_bind_int(pIdxSelect, 1, iSegid);
  3728. sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
  3729. assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
  3730. p->rc = sqlite3_reset(pIdxSelect);
  3731. sqlite3_bind_null(pIdxSelect, 2);
  3732. }
  3733. }
  3734. #endif
  3735. }
  3736. }
  3737. return iSegid;
  3738. }
  3739. /*
  3740. ** Discard all data currently cached in the hash-tables.
  3741. */
  3742. static void fts5IndexDiscardData(Fts5Index *p){
  3743. assert( p->pHash || p->nPendingData==0 );
  3744. if( p->pHash ){
  3745. sqlite3Fts5HashClear(p->pHash);
  3746. p->nPendingData = 0;
  3747. p->nPendingRow = 0;
  3748. p->flushRc = SQLITE_OK;
  3749. }
  3750. p->nContentlessDelete = 0;
  3751. }
  3752. /*
  3753. ** Return the size of the prefix, in bytes, that buffer
  3754. ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
  3755. **
  3756. ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
  3757. ** than buffer (pOld/nOld).
  3758. */
  3759. static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
  3760. int i;
  3761. for(i=0; i<nOld; i++){
  3762. if( pOld[i]!=pNew[i] ) break;
  3763. }
  3764. return i;
  3765. }
  3766. static void fts5WriteDlidxClear(
  3767. Fts5Index *p,
  3768. Fts5SegWriter *pWriter,
  3769. int bFlush /* If true, write dlidx to disk */
  3770. ){
  3771. int i;
  3772. assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
  3773. for(i=0; i<pWriter->nDlidx; i++){
  3774. Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
  3775. if( pDlidx->buf.n==0 ) break;
  3776. if( bFlush ){
  3777. assert( pDlidx->pgno!=0 );
  3778. fts5DataWrite(p,
  3779. FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
  3780. pDlidx->buf.p, pDlidx->buf.n
  3781. );
  3782. }
  3783. sqlite3Fts5BufferZero(&pDlidx->buf);
  3784. pDlidx->bPrevValid = 0;
  3785. }
  3786. }
  3787. /*
  3788. ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
  3789. ** Any new array elements are zeroed before returning.
  3790. */
  3791. static int fts5WriteDlidxGrow(
  3792. Fts5Index *p,
  3793. Fts5SegWriter *pWriter,
  3794. int nLvl
  3795. ){
  3796. if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
  3797. Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
  3798. pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
  3799. );
  3800. if( aDlidx==0 ){
  3801. p->rc = SQLITE_NOMEM;
  3802. }else{
  3803. size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
  3804. memset(&aDlidx[pWriter->nDlidx], 0, nByte);
  3805. pWriter->aDlidx = aDlidx;
  3806. pWriter->nDlidx = nLvl;
  3807. }
  3808. }
  3809. return p->rc;
  3810. }
  3811. /*
  3812. ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
  3813. ** enough, flush it to disk and return 1. Otherwise discard it and return
  3814. ** zero.
  3815. */
  3816. static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
  3817. int bFlag = 0;
  3818. /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
  3819. ** to the database, also write the doclist-index to disk. */
  3820. if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
  3821. bFlag = 1;
  3822. }
  3823. fts5WriteDlidxClear(p, pWriter, bFlag);
  3824. pWriter->nEmpty = 0;
  3825. return bFlag;
  3826. }
  3827. /*
  3828. ** This function is called whenever processing of the doclist for the
  3829. ** last term on leaf page (pWriter->iBtPage) is completed.
  3830. **
  3831. ** The doclist-index for that term is currently stored in-memory within the
  3832. ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
  3833. ** writes it out to disk. Or, if it is too small to bother with, discards
  3834. ** it.
  3835. **
  3836. ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
  3837. */
  3838. static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
  3839. int bFlag;
  3840. assert( pWriter->iBtPage || pWriter->nEmpty==0 );
  3841. if( pWriter->iBtPage==0 ) return;
  3842. bFlag = fts5WriteFlushDlidx(p, pWriter);
  3843. if( p->rc==SQLITE_OK ){
  3844. const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
  3845. /* The following was already done in fts5WriteInit(): */
  3846. /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
  3847. sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
  3848. sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
  3849. sqlite3_step(p->pIdxWriter);
  3850. p->rc = sqlite3_reset(p->pIdxWriter);
  3851. sqlite3_bind_null(p->pIdxWriter, 2);
  3852. }
  3853. pWriter->iBtPage = 0;
  3854. }
  3855. /*
  3856. ** This is called once for each leaf page except the first that contains
  3857. ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
  3858. ** is larger than all terms written to earlier leaves, and equal to or
  3859. ** smaller than the first term on the new leaf.
  3860. **
  3861. ** If an error occurs, an error code is left in Fts5Index.rc. If an error
  3862. ** has already occurred when this function is called, it is a no-op.
  3863. */
  3864. static void fts5WriteBtreeTerm(
  3865. Fts5Index *p, /* FTS5 backend object */
  3866. Fts5SegWriter *pWriter, /* Writer object */
  3867. int nTerm, const u8 *pTerm /* First term on new page */
  3868. ){
  3869. fts5WriteFlushBtree(p, pWriter);
  3870. if( p->rc==SQLITE_OK ){
  3871. fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
  3872. pWriter->iBtPage = pWriter->writer.pgno;
  3873. }
  3874. }
  3875. /*
  3876. ** This function is called when flushing a leaf page that contains no
  3877. ** terms at all to disk.
  3878. */
  3879. static void fts5WriteBtreeNoTerm(
  3880. Fts5Index *p, /* FTS5 backend object */
  3881. Fts5SegWriter *pWriter /* Writer object */
  3882. ){
  3883. /* If there were no rowids on the leaf page either and the doclist-index
  3884. ** has already been started, append an 0x00 byte to it. */
  3885. if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
  3886. Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
  3887. assert( pDlidx->bPrevValid );
  3888. sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
  3889. }
  3890. /* Increment the "number of sequential leaves without a term" counter. */
  3891. pWriter->nEmpty++;
  3892. }
  3893. static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
  3894. i64 iRowid;
  3895. int iOff;
  3896. iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
  3897. fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
  3898. return iRowid;
  3899. }
  3900. /*
  3901. ** Rowid iRowid has just been appended to the current leaf page. It is the
  3902. ** first on the page. This function appends an appropriate entry to the current
  3903. ** doclist-index.
  3904. */
  3905. static void fts5WriteDlidxAppend(
  3906. Fts5Index *p,
  3907. Fts5SegWriter *pWriter,
  3908. i64 iRowid
  3909. ){
  3910. int i;
  3911. int bDone = 0;
  3912. for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
  3913. i64 iVal;
  3914. Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
  3915. if( pDlidx->buf.n>=p->pConfig->pgsz ){
  3916. /* The current doclist-index page is full. Write it to disk and push
  3917. ** a copy of iRowid (which will become the first rowid on the next
  3918. ** doclist-index leaf page) up into the next level of the b-tree
  3919. ** hierarchy. If the node being flushed is currently the root node,
  3920. ** also push its first rowid upwards. */
  3921. pDlidx->buf.p[0] = 0x01; /* Not the root node */
  3922. fts5DataWrite(p,
  3923. FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
  3924. pDlidx->buf.p, pDlidx->buf.n
  3925. );
  3926. fts5WriteDlidxGrow(p, pWriter, i+2);
  3927. pDlidx = &pWriter->aDlidx[i];
  3928. if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
  3929. i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
  3930. /* This was the root node. Push its first rowid up to the new root. */
  3931. pDlidx[1].pgno = pDlidx->pgno;
  3932. sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
  3933. sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
  3934. sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
  3935. pDlidx[1].bPrevValid = 1;
  3936. pDlidx[1].iPrev = iFirst;
  3937. }
  3938. sqlite3Fts5BufferZero(&pDlidx->buf);
  3939. pDlidx->bPrevValid = 0;
  3940. pDlidx->pgno++;
  3941. }else{
  3942. bDone = 1;
  3943. }
  3944. if( pDlidx->bPrevValid ){
  3945. iVal = (u64)iRowid - (u64)pDlidx->iPrev;
  3946. }else{
  3947. i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
  3948. assert( pDlidx->buf.n==0 );
  3949. sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
  3950. sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
  3951. iVal = iRowid;
  3952. }
  3953. sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
  3954. pDlidx->bPrevValid = 1;
  3955. pDlidx->iPrev = iRowid;
  3956. }
  3957. }
  3958. static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  3959. static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  3960. Fts5PageWriter *pPage = &pWriter->writer;
  3961. i64 iRowid;
  3962. assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
  3963. /* Set the szLeaf header field. */
  3964. assert( 0==fts5GetU16(&pPage->buf.p[2]) );
  3965. fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
  3966. if( pWriter->bFirstTermInPage ){
  3967. /* No term was written to this page. */
  3968. assert( pPage->pgidx.n==0 );
  3969. fts5WriteBtreeNoTerm(p, pWriter);
  3970. }else{
  3971. /* Append the pgidx to the page buffer. Set the szLeaf header field. */
  3972. fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
  3973. }
  3974. /* Write the page out to disk */
  3975. iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
  3976. fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
  3977. /* Initialize the next page. */
  3978. fts5BufferZero(&pPage->buf);
  3979. fts5BufferZero(&pPage->pgidx);
  3980. fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  3981. pPage->iPrevPgidx = 0;
  3982. pPage->pgno++;
  3983. /* Increase the leaves written counter */
  3984. pWriter->nLeafWritten++;
  3985. /* The new leaf holds no terms or rowids */
  3986. pWriter->bFirstTermInPage = 1;
  3987. pWriter->bFirstRowidInPage = 1;
  3988. }
  3989. /*
  3990. ** Append term pTerm/nTerm to the segment being written by the writer passed
  3991. ** as the second argument.
  3992. **
  3993. ** If an error occurs, set the Fts5Index.rc error code. If an error has
  3994. ** already occurred, this function is a no-op.
  3995. */
  3996. static void fts5WriteAppendTerm(
  3997. Fts5Index *p,
  3998. Fts5SegWriter *pWriter,
  3999. int nTerm, const u8 *pTerm
  4000. ){
  4001. int nPrefix; /* Bytes of prefix compression for term */
  4002. Fts5PageWriter *pPage = &pWriter->writer;
  4003. Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
  4004. int nMin = MIN(pPage->term.n, nTerm);
  4005. assert( p->rc==SQLITE_OK );
  4006. assert( pPage->buf.n>=4 );
  4007. assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
  4008. /* If the current leaf page is full, flush it to disk. */
  4009. if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
  4010. if( pPage->buf.n>4 ){
  4011. fts5WriteFlushLeaf(p, pWriter);
  4012. if( p->rc!=SQLITE_OK ) return;
  4013. }
  4014. fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
  4015. }
  4016. /* TODO1: Updating pgidx here. */
  4017. pPgidx->n += sqlite3Fts5PutVarint(
  4018. &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
  4019. );
  4020. pPage->iPrevPgidx = pPage->buf.n;
  4021. #if 0
  4022. fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
  4023. pPgidx->n += 2;
  4024. #endif
  4025. if( pWriter->bFirstTermInPage ){
  4026. nPrefix = 0;
  4027. if( pPage->pgno!=1 ){
  4028. /* This is the first term on a leaf that is not the leftmost leaf in
  4029. ** the segment b-tree. In this case it is necessary to add a term to
  4030. ** the b-tree hierarchy that is (a) larger than the largest term
  4031. ** already written to the segment and (b) smaller than or equal to
  4032. ** this term. In other words, a prefix of (pTerm/nTerm) that is one
  4033. ** byte longer than the longest prefix (pTerm/nTerm) shares with the
  4034. ** previous term.
  4035. **
  4036. ** Usually, the previous term is available in pPage->term. The exception
  4037. ** is if this is the first term written in an incremental-merge step.
  4038. ** In this case the previous term is not available, so just write a
  4039. ** copy of (pTerm/nTerm) into the parent node. This is slightly
  4040. ** inefficient, but still correct. */
  4041. int n = nTerm;
  4042. if( pPage->term.n ){
  4043. n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
  4044. }
  4045. fts5WriteBtreeTerm(p, pWriter, n, pTerm);
  4046. if( p->rc!=SQLITE_OK ) return;
  4047. pPage = &pWriter->writer;
  4048. }
  4049. }else{
  4050. nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
  4051. fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
  4052. }
  4053. /* Append the number of bytes of new data, then the term data itself
  4054. ** to the page. */
  4055. fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
  4056. fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
  4057. /* Update the Fts5PageWriter.term field. */
  4058. fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
  4059. pWriter->bFirstTermInPage = 0;
  4060. pWriter->bFirstRowidInPage = 0;
  4061. pWriter->bFirstRowidInDoclist = 1;
  4062. assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
  4063. pWriter->aDlidx[0].pgno = pPage->pgno;
  4064. }
  4065. /*
  4066. ** Append a rowid and position-list size field to the writers output.
  4067. */
  4068. static void fts5WriteAppendRowid(
  4069. Fts5Index *p,
  4070. Fts5SegWriter *pWriter,
  4071. i64 iRowid
  4072. ){
  4073. if( p->rc==SQLITE_OK ){
  4074. Fts5PageWriter *pPage = &pWriter->writer;
  4075. if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
  4076. fts5WriteFlushLeaf(p, pWriter);
  4077. }
  4078. /* If this is to be the first rowid written to the page, set the
  4079. ** rowid-pointer in the page-header. Also append a value to the dlidx
  4080. ** buffer, in case a doclist-index is required. */
  4081. if( pWriter->bFirstRowidInPage ){
  4082. fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
  4083. fts5WriteDlidxAppend(p, pWriter, iRowid);
  4084. }
  4085. /* Write the rowid. */
  4086. if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
  4087. fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
  4088. }else{
  4089. assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
  4090. fts5BufferAppendVarint(&p->rc, &pPage->buf,
  4091. (u64)iRowid - (u64)pWriter->iPrevRowid
  4092. );
  4093. }
  4094. pWriter->iPrevRowid = iRowid;
  4095. pWriter->bFirstRowidInDoclist = 0;
  4096. pWriter->bFirstRowidInPage = 0;
  4097. }
  4098. }
  4099. static void fts5WriteAppendPoslistData(
  4100. Fts5Index *p,
  4101. Fts5SegWriter *pWriter,
  4102. const u8 *aData,
  4103. int nData
  4104. ){
  4105. Fts5PageWriter *pPage = &pWriter->writer;
  4106. const u8 *a = aData;
  4107. int n = nData;
  4108. assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK );
  4109. while( p->rc==SQLITE_OK
  4110. && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
  4111. ){
  4112. int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
  4113. int nCopy = 0;
  4114. while( nCopy<nReq ){
  4115. i64 dummy;
  4116. nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
  4117. }
  4118. fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
  4119. a += nCopy;
  4120. n -= nCopy;
  4121. fts5WriteFlushLeaf(p, pWriter);
  4122. }
  4123. if( n>0 ){
  4124. fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
  4125. }
  4126. }
  4127. /*
  4128. ** Flush any data cached by the writer object to the database. Free any
  4129. ** allocations associated with the writer.
  4130. */
  4131. static void fts5WriteFinish(
  4132. Fts5Index *p,
  4133. Fts5SegWriter *pWriter, /* Writer object */
  4134. int *pnLeaf /* OUT: Number of leaf pages in b-tree */
  4135. ){
  4136. int i;
  4137. Fts5PageWriter *pLeaf = &pWriter->writer;
  4138. if( p->rc==SQLITE_OK ){
  4139. assert( pLeaf->pgno>=1 );
  4140. if( pLeaf->buf.n>4 ){
  4141. fts5WriteFlushLeaf(p, pWriter);
  4142. }
  4143. *pnLeaf = pLeaf->pgno-1;
  4144. if( pLeaf->pgno>1 ){
  4145. fts5WriteFlushBtree(p, pWriter);
  4146. }
  4147. }
  4148. fts5BufferFree(&pLeaf->term);
  4149. fts5BufferFree(&pLeaf->buf);
  4150. fts5BufferFree(&pLeaf->pgidx);
  4151. fts5BufferFree(&pWriter->btterm);
  4152. for(i=0; i<pWriter->nDlidx; i++){
  4153. sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
  4154. }
  4155. sqlite3_free(pWriter->aDlidx);
  4156. }
  4157. static void fts5WriteInit(
  4158. Fts5Index *p,
  4159. Fts5SegWriter *pWriter,
  4160. int iSegid
  4161. ){
  4162. const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
  4163. memset(pWriter, 0, sizeof(Fts5SegWriter));
  4164. pWriter->iSegid = iSegid;
  4165. fts5WriteDlidxGrow(p, pWriter, 1);
  4166. pWriter->writer.pgno = 1;
  4167. pWriter->bFirstTermInPage = 1;
  4168. pWriter->iBtPage = 1;
  4169. assert( pWriter->writer.buf.n==0 );
  4170. assert( pWriter->writer.pgidx.n==0 );
  4171. /* Grow the two buffers to pgsz + padding bytes in size. */
  4172. sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
  4173. sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
  4174. if( p->pIdxWriter==0 ){
  4175. Fts5Config *pConfig = p->pConfig;
  4176. fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
  4177. "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
  4178. pConfig->zDb, pConfig->zName
  4179. ));
  4180. }
  4181. if( p->rc==SQLITE_OK ){
  4182. /* Initialize the 4-byte leaf-page header to 0x00. */
  4183. memset(pWriter->writer.buf.p, 0, 4);
  4184. pWriter->writer.buf.n = 4;
  4185. /* Bind the current output segment id to the index-writer. This is an
  4186. ** optimization over binding the same value over and over as rows are
  4187. ** inserted into %_idx by the current writer. */
  4188. sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  4189. }
  4190. }
  4191. /*
  4192. ** Iterator pIter was used to iterate through the input segments of on an
  4193. ** incremental merge operation. This function is called if the incremental
  4194. ** merge step has finished but the input has not been completely exhausted.
  4195. */
  4196. static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
  4197. int i;
  4198. Fts5Buffer buf;
  4199. memset(&buf, 0, sizeof(Fts5Buffer));
  4200. for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
  4201. Fts5SegIter *pSeg = &pIter->aSeg[i];
  4202. if( pSeg->pSeg==0 ){
  4203. /* no-op */
  4204. }else if( pSeg->pLeaf==0 ){
  4205. /* All keys from this input segment have been transfered to the output.
  4206. ** Set both the first and last page-numbers to 0 to indicate that the
  4207. ** segment is now empty. */
  4208. pSeg->pSeg->pgnoLast = 0;
  4209. pSeg->pSeg->pgnoFirst = 0;
  4210. }else{
  4211. int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
  4212. i64 iLeafRowid;
  4213. Fts5Data *pData;
  4214. int iId = pSeg->pSeg->iSegid;
  4215. u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
  4216. iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
  4217. pData = fts5LeafRead(p, iLeafRowid);
  4218. if( pData ){
  4219. if( iOff>pData->szLeaf ){
  4220. /* This can occur if the pages that the segments occupy overlap - if
  4221. ** a single page has been assigned to more than one segment. In
  4222. ** this case a prior iteration of this loop may have corrupted the
  4223. ** segment currently being trimmed. */
  4224. p->rc = FTS5_CORRUPT;
  4225. }else{
  4226. fts5BufferZero(&buf);
  4227. fts5BufferGrow(&p->rc, &buf, pData->nn);
  4228. fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
  4229. fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
  4230. fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
  4231. fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]);
  4232. if( p->rc==SQLITE_OK ){
  4233. /* Set the szLeaf field */
  4234. fts5PutU16(&buf.p[2], (u16)buf.n);
  4235. }
  4236. /* Set up the new page-index array */
  4237. fts5BufferAppendVarint(&p->rc, &buf, 4);
  4238. if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
  4239. && pSeg->iEndofDoclist<pData->szLeaf
  4240. && pSeg->iPgidxOff<=pData->nn
  4241. ){
  4242. int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
  4243. fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
  4244. fts5BufferAppendBlob(&p->rc, &buf,
  4245. pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
  4246. );
  4247. }
  4248. pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
  4249. fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
  4250. fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
  4251. }
  4252. fts5DataRelease(pData);
  4253. }
  4254. }
  4255. }
  4256. fts5BufferFree(&buf);
  4257. }
  4258. static void fts5MergeChunkCallback(
  4259. Fts5Index *p,
  4260. void *pCtx,
  4261. const u8 *pChunk, int nChunk
  4262. ){
  4263. Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
  4264. fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
  4265. }
  4266. /*
  4267. **
  4268. */
  4269. static void fts5IndexMergeLevel(
  4270. Fts5Index *p, /* FTS5 backend object */
  4271. Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
  4272. int iLvl, /* Level to read input from */
  4273. int *pnRem /* Write up to this many output leaves */
  4274. ){
  4275. Fts5Structure *pStruct = *ppStruct;
  4276. Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
  4277. Fts5StructureLevel *pLvlOut;
  4278. Fts5Iter *pIter = 0; /* Iterator to read input data */
  4279. int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
  4280. int nInput; /* Number of input segments */
  4281. Fts5SegWriter writer; /* Writer object */
  4282. Fts5StructureSegment *pSeg; /* Output segment */
  4283. Fts5Buffer term;
  4284. int bOldest; /* True if the output segment is the oldest */
  4285. int eDetail = p->pConfig->eDetail;
  4286. const int flags = FTS5INDEX_QUERY_NOOUTPUT;
  4287. int bTermWritten = 0; /* True if current term already output */
  4288. assert( iLvl<pStruct->nLevel );
  4289. assert( pLvl->nMerge<=pLvl->nSeg );
  4290. memset(&writer, 0, sizeof(Fts5SegWriter));
  4291. memset(&term, 0, sizeof(Fts5Buffer));
  4292. if( pLvl->nMerge ){
  4293. pLvlOut = &pStruct->aLevel[iLvl+1];
  4294. assert( pLvlOut->nSeg>0 );
  4295. nInput = pLvl->nMerge;
  4296. pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
  4297. fts5WriteInit(p, &writer, pSeg->iSegid);
  4298. writer.writer.pgno = pSeg->pgnoLast+1;
  4299. writer.iBtPage = 0;
  4300. }else{
  4301. int iSegid = fts5AllocateSegid(p, pStruct);
  4302. /* Extend the Fts5Structure object as required to ensure the output
  4303. ** segment exists. */
  4304. if( iLvl==pStruct->nLevel-1 ){
  4305. fts5StructureAddLevel(&p->rc, ppStruct);
  4306. pStruct = *ppStruct;
  4307. }
  4308. fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
  4309. if( p->rc ) return;
  4310. pLvl = &pStruct->aLevel[iLvl];
  4311. pLvlOut = &pStruct->aLevel[iLvl+1];
  4312. fts5WriteInit(p, &writer, iSegid);
  4313. /* Add the new segment to the output level */
  4314. pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
  4315. pLvlOut->nSeg++;
  4316. pSeg->pgnoFirst = 1;
  4317. pSeg->iSegid = iSegid;
  4318. pStruct->nSegment++;
  4319. /* Read input from all segments in the input level */
  4320. nInput = pLvl->nSeg;
  4321. /* Set the range of origins that will go into the output segment. */
  4322. if( pStruct->nOriginCntr>0 ){
  4323. pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1;
  4324. pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2;
  4325. }
  4326. }
  4327. bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
  4328. assert( iLvl>=0 );
  4329. for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
  4330. fts5MultiIterEof(p, pIter)==0;
  4331. fts5MultiIterNext(p, pIter, 0, 0)
  4332. ){
  4333. Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  4334. int nPos; /* position-list size field value */
  4335. int nTerm;
  4336. const u8 *pTerm;
  4337. pTerm = fts5MultiIterTerm(pIter, &nTerm);
  4338. if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
  4339. if( pnRem && writer.nLeafWritten>nRem ){
  4340. break;
  4341. }
  4342. fts5BufferSet(&p->rc, &term, nTerm, pTerm);
  4343. bTermWritten =0;
  4344. }
  4345. /* Check for key annihilation. */
  4346. if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
  4347. if( p->rc==SQLITE_OK && bTermWritten==0 ){
  4348. /* This is a new term. Append a term to the output segment. */
  4349. fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
  4350. bTermWritten = 1;
  4351. }
  4352. /* Append the rowid to the output */
  4353. /* WRITEPOSLISTSIZE */
  4354. fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
  4355. if( eDetail==FTS5_DETAIL_NONE ){
  4356. if( pSegIter->bDel ){
  4357. fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
  4358. if( pSegIter->nPos>0 ){
  4359. fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
  4360. }
  4361. }
  4362. }else{
  4363. /* Append the position-list data to the output */
  4364. nPos = pSegIter->nPos*2 + pSegIter->bDel;
  4365. fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
  4366. fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
  4367. }
  4368. }
  4369. /* Flush the last leaf page to disk. Set the output segment b-tree height
  4370. ** and last leaf page number at the same time. */
  4371. fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
  4372. assert( pIter!=0 || p->rc!=SQLITE_OK );
  4373. if( fts5MultiIterEof(p, pIter) ){
  4374. int i;
  4375. /* Remove the redundant segments from the %_data table */
  4376. assert( pSeg->nEntry==0 );
  4377. for(i=0; i<nInput; i++){
  4378. Fts5StructureSegment *pOld = &pLvl->aSeg[i];
  4379. pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone);
  4380. fts5DataRemoveSegment(p, pOld);
  4381. }
  4382. /* Remove the redundant segments from the input level */
  4383. if( pLvl->nSeg!=nInput ){
  4384. int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
  4385. memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
  4386. }
  4387. pStruct->nSegment -= nInput;
  4388. pLvl->nSeg -= nInput;
  4389. pLvl->nMerge = 0;
  4390. if( pSeg->pgnoLast==0 ){
  4391. pLvlOut->nSeg--;
  4392. pStruct->nSegment--;
  4393. }
  4394. }else{
  4395. assert( pSeg->pgnoLast>0 );
  4396. fts5TrimSegments(p, pIter);
  4397. pLvl->nMerge = nInput;
  4398. }
  4399. fts5MultiIterFree(pIter);
  4400. fts5BufferFree(&term);
  4401. if( pnRem ) *pnRem -= writer.nLeafWritten;
  4402. }
  4403. /*
  4404. ** If this is not a contentless_delete=1 table, or if the 'deletemerge'
  4405. ** configuration option is set to 0, then this function always returns -1.
  4406. ** Otherwise, it searches the structure object passed as the second argument
  4407. ** for a level suitable for merging due to having a large number of
  4408. ** tombstones in the tombstone hash. If one is found, its index is returned.
  4409. ** Otherwise, if there is no suitable level, -1.
  4410. */
  4411. static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){
  4412. Fts5Config *pConfig = p->pConfig;
  4413. int iRet = -1;
  4414. if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){
  4415. int ii;
  4416. int nBest = 0;
  4417. for(ii=0; ii<pStruct->nLevel; ii++){
  4418. Fts5StructureLevel *pLvl = &pStruct->aLevel[ii];
  4419. i64 nEntry = 0;
  4420. i64 nTomb = 0;
  4421. int iSeg;
  4422. for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
  4423. nEntry += pLvl->aSeg[iSeg].nEntry;
  4424. nTomb += pLvl->aSeg[iSeg].nEntryTombstone;
  4425. }
  4426. assert_nc( nEntry>0 || pLvl->nSeg==0 );
  4427. if( nEntry>0 ){
  4428. int nPercent = (nTomb * 100) / nEntry;
  4429. if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){
  4430. iRet = ii;
  4431. nBest = nPercent;
  4432. }
  4433. }
  4434. /* If pLvl is already the input level to an ongoing merge, look no
  4435. ** further for a merge candidate. The caller should be allowed to
  4436. ** continue merging from pLvl first. */
  4437. if( pLvl->nMerge ) break;
  4438. }
  4439. }
  4440. return iRet;
  4441. }
  4442. /*
  4443. ** Do up to nPg pages of automerge work on the index.
  4444. **
  4445. ** Return true if any changes were actually made, or false otherwise.
  4446. */
  4447. static int fts5IndexMerge(
  4448. Fts5Index *p, /* FTS5 backend object */
  4449. Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
  4450. int nPg, /* Pages of work to do */
  4451. int nMin /* Minimum number of segments to merge */
  4452. ){
  4453. int nRem = nPg;
  4454. int bRet = 0;
  4455. Fts5Structure *pStruct = *ppStruct;
  4456. while( nRem>0 && p->rc==SQLITE_OK ){
  4457. int iLvl; /* To iterate through levels */
  4458. int iBestLvl = 0; /* Level offering the most input segments */
  4459. int nBest = 0; /* Number of input segments on best level */
  4460. /* Set iBestLvl to the level to read input segments from. Or to -1 if
  4461. ** there is no level suitable to merge segments from. */
  4462. assert( pStruct->nLevel>0 );
  4463. for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
  4464. Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
  4465. if( pLvl->nMerge ){
  4466. if( pLvl->nMerge>nBest ){
  4467. iBestLvl = iLvl;
  4468. nBest = nMin;
  4469. }
  4470. break;
  4471. }
  4472. if( pLvl->nSeg>nBest ){
  4473. nBest = pLvl->nSeg;
  4474. iBestLvl = iLvl;
  4475. }
  4476. }
  4477. if( nBest<nMin ){
  4478. iBestLvl = fts5IndexFindDeleteMerge(p, pStruct);
  4479. }
  4480. if( iBestLvl<0 ) break;
  4481. bRet = 1;
  4482. fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
  4483. if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
  4484. fts5StructurePromote(p, iBestLvl+1, pStruct);
  4485. }
  4486. if( nMin==1 ) nMin = 2;
  4487. }
  4488. *ppStruct = pStruct;
  4489. return bRet;
  4490. }
  4491. /*
  4492. ** A total of nLeaf leaf pages of data has just been flushed to a level-0
  4493. ** segment. This function updates the write-counter accordingly and, if
  4494. ** necessary, performs incremental merge work.
  4495. **
  4496. ** If an error occurs, set the Fts5Index.rc error code. If an error has
  4497. ** already occurred, this function is a no-op.
  4498. */
  4499. static void fts5IndexAutomerge(
  4500. Fts5Index *p, /* FTS5 backend object */
  4501. Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
  4502. int nLeaf /* Number of output leaves just written */
  4503. ){
  4504. if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
  4505. Fts5Structure *pStruct = *ppStruct;
  4506. u64 nWrite; /* Initial value of write-counter */
  4507. int nWork; /* Number of work-quanta to perform */
  4508. int nRem; /* Number of leaf pages left to write */
  4509. /* Update the write-counter. While doing so, set nWork. */
  4510. nWrite = pStruct->nWriteCounter;
  4511. nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
  4512. pStruct->nWriteCounter += nLeaf;
  4513. nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
  4514. fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
  4515. }
  4516. }
  4517. static void fts5IndexCrisismerge(
  4518. Fts5Index *p, /* FTS5 backend object */
  4519. Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
  4520. ){
  4521. const int nCrisis = p->pConfig->nCrisisMerge;
  4522. Fts5Structure *pStruct = *ppStruct;
  4523. if( pStruct && pStruct->nLevel>0 ){
  4524. int iLvl = 0;
  4525. while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
  4526. fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
  4527. assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
  4528. fts5StructurePromote(p, iLvl+1, pStruct);
  4529. iLvl++;
  4530. }
  4531. *ppStruct = pStruct;
  4532. }
  4533. }
  4534. static int fts5IndexReturn(Fts5Index *p){
  4535. int rc = p->rc;
  4536. p->rc = SQLITE_OK;
  4537. return rc;
  4538. }
  4539. /*
  4540. ** Close the read-only blob handle, if it is open.
  4541. */
  4542. void sqlite3Fts5IndexCloseReader(Fts5Index *p){
  4543. fts5IndexCloseReader(p);
  4544. fts5IndexReturn(p);
  4545. }
  4546. typedef struct Fts5FlushCtx Fts5FlushCtx;
  4547. struct Fts5FlushCtx {
  4548. Fts5Index *pIdx;
  4549. Fts5SegWriter writer;
  4550. };
  4551. /*
  4552. ** Buffer aBuf[] contains a list of varints, all small enough to fit
  4553. ** in a 32-bit integer. Return the size of the largest prefix of this
  4554. ** list nMax bytes or less in size.
  4555. */
  4556. static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
  4557. int ret;
  4558. u32 dummy;
  4559. ret = fts5GetVarint32(aBuf, dummy);
  4560. if( ret<nMax ){
  4561. while( 1 ){
  4562. int i = fts5GetVarint32(&aBuf[ret], dummy);
  4563. if( (ret + i) > nMax ) break;
  4564. ret += i;
  4565. }
  4566. }
  4567. return ret;
  4568. }
  4569. /*
  4570. ** Execute the SQL statement:
  4571. **
  4572. ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
  4573. **
  4574. ** This is used when a secure-delete operation removes the last term
  4575. ** from a segment leaf page. In that case the %_idx entry is removed
  4576. ** too. This is done to ensure that if all instances of a token are
  4577. ** removed from an fts5 database in secure-delete mode, no trace of
  4578. ** the token itself remains in the database.
  4579. */
  4580. static void fts5SecureDeleteIdxEntry(
  4581. Fts5Index *p, /* FTS5 backend object */
  4582. int iSegid, /* Id of segment to delete entry for */
  4583. int iPgno /* Page number within segment */
  4584. ){
  4585. if( iPgno!=1 ){
  4586. assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE );
  4587. if( p->pDeleteFromIdx==0 ){
  4588. fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf(
  4589. "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
  4590. p->pConfig->zDb, p->pConfig->zName
  4591. ));
  4592. }
  4593. if( p->rc==SQLITE_OK ){
  4594. sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid);
  4595. sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno);
  4596. sqlite3_step(p->pDeleteFromIdx);
  4597. p->rc = sqlite3_reset(p->pDeleteFromIdx);
  4598. }
  4599. }
  4600. }
  4601. /*
  4602. ** This is called when a secure-delete operation removes a position-list
  4603. ** that overflows onto segment page iPgno of segment pSeg. This function
  4604. ** rewrites node iPgno, and possibly one or more of its right-hand peers,
  4605. ** to remove this portion of the position list.
  4606. **
  4607. ** Output variable (*pbLastInDoclist) is set to true if the position-list
  4608. ** removed is followed by a new term or the end-of-segment, or false if
  4609. ** it is followed by another rowid/position list.
  4610. */
  4611. static void fts5SecureDeleteOverflow(
  4612. Fts5Index *p,
  4613. Fts5StructureSegment *pSeg,
  4614. int iPgno,
  4615. int *pbLastInDoclist
  4616. ){
  4617. const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
  4618. int pgno;
  4619. Fts5Data *pLeaf = 0;
  4620. assert( iPgno!=1 );
  4621. *pbLastInDoclist = 1;
  4622. for(pgno=iPgno; p->rc==SQLITE_OK && pgno<=pSeg->pgnoLast; pgno++){
  4623. i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
  4624. int iNext = 0;
  4625. u8 *aPg = 0;
  4626. pLeaf = fts5DataRead(p, iRowid);
  4627. if( pLeaf==0 ) break;
  4628. aPg = pLeaf->p;
  4629. iNext = fts5GetU16(&aPg[0]);
  4630. if( iNext!=0 ){
  4631. *pbLastInDoclist = 0;
  4632. }
  4633. if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){
  4634. fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext);
  4635. }
  4636. if( iNext==0 ){
  4637. /* The page contains no terms or rowids. Replace it with an empty
  4638. ** page and move on to the right-hand peer. */
  4639. const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04};
  4640. assert_nc( bDetailNone==0 || pLeaf->nn==4 );
  4641. if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty));
  4642. fts5DataRelease(pLeaf);
  4643. pLeaf = 0;
  4644. }else if( bDetailNone ){
  4645. break;
  4646. }else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){
  4647. p->rc = FTS5_CORRUPT;
  4648. break;
  4649. }else{
  4650. int nShift = iNext - 4;
  4651. int nPg;
  4652. int nIdx = 0;
  4653. u8 *aIdx = 0;
  4654. /* Unless the current page footer is 0 bytes in size (in which case
  4655. ** the new page footer will be as well), allocate and populate a
  4656. ** buffer containing the new page footer. Set stack variables aIdx
  4657. ** and nIdx accordingly. */
  4658. if( pLeaf->nn>pLeaf->szLeaf ){
  4659. int iFirst = 0;
  4660. int i1 = pLeaf->szLeaf;
  4661. int i2 = 0;
  4662. i1 += fts5GetVarint32(&aPg[i1], iFirst);
  4663. if( iFirst<iNext ){
  4664. p->rc = FTS5_CORRUPT;
  4665. break;
  4666. }
  4667. aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2);
  4668. if( aIdx==0 ) break;
  4669. i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift);
  4670. if( i1<pLeaf->nn ){
  4671. memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1);
  4672. i2 += (pLeaf->nn-i1);
  4673. }
  4674. nIdx = i2;
  4675. }
  4676. /* Modify the contents of buffer aPg[]. Set nPg to the new size
  4677. ** in bytes. The new page is always smaller than the old. */
  4678. nPg = pLeaf->szLeaf - nShift;
  4679. memmove(&aPg[4], &aPg[4+nShift], nPg-4);
  4680. fts5PutU16(&aPg[2], nPg);
  4681. if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4);
  4682. if( nIdx>0 ){
  4683. memcpy(&aPg[nPg], aIdx, nIdx);
  4684. nPg += nIdx;
  4685. }
  4686. sqlite3_free(aIdx);
  4687. /* Write the new page to disk and exit the loop */
  4688. assert( nPg>4 || fts5GetU16(aPg)==0 );
  4689. fts5DataWrite(p, iRowid, aPg, nPg);
  4690. break;
  4691. }
  4692. }
  4693. fts5DataRelease(pLeaf);
  4694. }
  4695. /*
  4696. ** Completely remove the entry that pSeg currently points to from
  4697. ** the database.
  4698. */
  4699. static void fts5DoSecureDelete(
  4700. Fts5Index *p,
  4701. Fts5SegIter *pSeg
  4702. ){
  4703. const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
  4704. int iSegid = pSeg->pSeg->iSegid;
  4705. u8 *aPg = pSeg->pLeaf->p;
  4706. int nPg = pSeg->pLeaf->nn;
  4707. int iPgIdx = pSeg->pLeaf->szLeaf;
  4708. u64 iDelta = 0;
  4709. int iNextOff = 0;
  4710. int iOff = 0;
  4711. int nIdx = 0;
  4712. u8 *aIdx = 0;
  4713. int bLastInDoclist = 0;
  4714. int iIdx = 0;
  4715. int iStart = 0;
  4716. int iDelKeyOff = 0; /* Offset of deleted key, if any */
  4717. nIdx = nPg-iPgIdx;
  4718. aIdx = sqlite3Fts5MallocZero(&p->rc, ((i64)nIdx)+16);
  4719. if( p->rc ) return;
  4720. memcpy(aIdx, &aPg[iPgIdx], nIdx);
  4721. /* At this point segment iterator pSeg points to the entry
  4722. ** this function should remove from the b-tree segment.
  4723. **
  4724. ** In detail=full or detail=column mode, pSeg->iLeafOffset is the
  4725. ** offset of the first byte in the position-list for the entry to
  4726. ** remove. Immediately before this comes two varints that will also
  4727. ** need to be removed:
  4728. **
  4729. ** + the rowid or delta rowid value for the entry, and
  4730. ** + the size of the position list in bytes.
  4731. **
  4732. ** Or, in detail=none mode, there is a single varint prior to
  4733. ** pSeg->iLeafOffset - the rowid or delta rowid value.
  4734. **
  4735. ** This block sets the following variables:
  4736. **
  4737. ** iStart:
  4738. ** The offset of the first byte of the rowid or delta-rowid
  4739. ** value for the doclist entry being removed.
  4740. **
  4741. ** iDelta:
  4742. ** The value of the rowid or delta-rowid value for the doclist
  4743. ** entry being removed.
  4744. **
  4745. ** iNextOff:
  4746. ** The offset of the next entry following the position list
  4747. ** for the one being removed. If the position list for this
  4748. ** entry overflows onto the next leaf page, this value will be
  4749. ** greater than pLeaf->szLeaf.
  4750. */
  4751. {
  4752. int iSOP; /* Start-Of-Position-list */
  4753. if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){
  4754. iStart = pSeg->iTermLeafOffset;
  4755. }else{
  4756. iStart = fts5GetU16(&aPg[0]);
  4757. }
  4758. iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
  4759. assert_nc( iSOP<=pSeg->iLeafOffset );
  4760. if( bDetailNone ){
  4761. while( iSOP<pSeg->iLeafOffset ){
  4762. if( aPg[iSOP]==0x00 ) iSOP++;
  4763. if( aPg[iSOP]==0x00 ) iSOP++;
  4764. iStart = iSOP;
  4765. iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
  4766. }
  4767. iNextOff = iSOP;
  4768. if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
  4769. if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
  4770. }else{
  4771. int nPos = 0;
  4772. iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
  4773. while( iSOP<pSeg->iLeafOffset ){
  4774. iStart = iSOP + (nPos/2);
  4775. iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
  4776. iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
  4777. }
  4778. assert_nc( iSOP==pSeg->iLeafOffset );
  4779. iNextOff = pSeg->iLeafOffset + pSeg->nPos;
  4780. }
  4781. }
  4782. iOff = iStart;
  4783. /* If the position-list for the entry being removed flows over past
  4784. ** the end of this page, delete the portion of the position-list on the
  4785. ** next page and beyond.
  4786. **
  4787. ** Set variable bLastInDoclist to true if this entry happens
  4788. ** to be the last rowid in the doclist for its term. */
  4789. if( iNextOff>=iPgIdx ){
  4790. int pgno = pSeg->iLeafPgno+1;
  4791. fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist);
  4792. iNextOff = iPgIdx;
  4793. }
  4794. if( pSeg->bDel==0 ){
  4795. if( iNextOff!=iPgIdx ){
  4796. /* Loop through the page-footer. If iNextOff (offset of the
  4797. ** entry following the one we are removing) is equal to the
  4798. ** offset of a key on this page, then the entry is the last
  4799. ** in its doclist. */
  4800. int iKeyOff = 0;
  4801. for(iIdx=0; iIdx<nIdx; /* no-op */){
  4802. u32 iVal = 0;
  4803. iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
  4804. iKeyOff += iVal;
  4805. if( iKeyOff==iNextOff ){
  4806. bLastInDoclist = 1;
  4807. }
  4808. }
  4809. }
  4810. /* If this is (a) the first rowid on a page and (b) is not followed by
  4811. ** another position list on the same page, set the "first-rowid" field
  4812. ** of the header to 0. */
  4813. if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){
  4814. fts5PutU16(&aPg[0], 0);
  4815. }
  4816. }
  4817. if( pSeg->bDel ){
  4818. iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta);
  4819. aPg[iOff++] = 0x01;
  4820. }else if( bLastInDoclist==0 ){
  4821. if( iNextOff!=iPgIdx ){
  4822. u64 iNextDelta = 0;
  4823. iNextOff += fts5GetVarint(&aPg[iNextOff], &iNextDelta);
  4824. iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta);
  4825. }
  4826. }else if(
  4827. pSeg->iLeafPgno==pSeg->iTermLeafPgno
  4828. && iStart==pSeg->iTermLeafOffset
  4829. ){
  4830. /* The entry being removed was the only position list in its
  4831. ** doclist. Therefore the term needs to be removed as well. */
  4832. int iKey = 0;
  4833. int iKeyOff = 0;
  4834. /* Set iKeyOff to the offset of the term that will be removed - the
  4835. ** last offset in the footer that is not greater than iStart. */
  4836. for(iIdx=0; iIdx<nIdx; iKey++){
  4837. u32 iVal = 0;
  4838. iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
  4839. if( (iKeyOff+iVal)>(u32)iStart ) break;
  4840. iKeyOff += iVal;
  4841. }
  4842. assert_nc( iKey>=1 );
  4843. /* Set iDelKeyOff to the value of the footer entry to remove from
  4844. ** the page. */
  4845. iDelKeyOff = iOff = iKeyOff;
  4846. if( iNextOff!=iPgIdx ){
  4847. /* This is the only position-list associated with the term, and there
  4848. ** is another term following it on this page. So the subsequent term
  4849. ** needs to be moved to replace the term associated with the entry
  4850. ** being removed. */
  4851. int nPrefix = 0;
  4852. int nSuffix = 0;
  4853. int nPrefix2 = 0;
  4854. int nSuffix2 = 0;
  4855. iDelKeyOff = iNextOff;
  4856. iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2);
  4857. iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2);
  4858. if( iKey!=1 ){
  4859. iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix);
  4860. }
  4861. iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix);
  4862. nPrefix = MIN(nPrefix, nPrefix2);
  4863. nSuffix = (nPrefix2 + nSuffix2) - nPrefix;
  4864. if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){
  4865. p->rc = FTS5_CORRUPT;
  4866. }else{
  4867. if( iKey!=1 ){
  4868. iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix);
  4869. }
  4870. iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix);
  4871. if( nPrefix2>pSeg->term.n ){
  4872. p->rc = FTS5_CORRUPT;
  4873. }else if( nPrefix2>nPrefix ){
  4874. memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix);
  4875. iOff += (nPrefix2-nPrefix);
  4876. }
  4877. memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2);
  4878. iOff += nSuffix2;
  4879. iNextOff += nSuffix2;
  4880. }
  4881. }
  4882. }else if( iStart==4 ){
  4883. int iPgno;
  4884. assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno );
  4885. /* The entry being removed may be the only position list in
  4886. ** its doclist. */
  4887. for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
  4888. Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno));
  4889. int bEmpty = (pPg && pPg->nn==4);
  4890. fts5DataRelease(pPg);
  4891. if( bEmpty==0 ) break;
  4892. }
  4893. if( iPgno==pSeg->iTermLeafPgno ){
  4894. i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno);
  4895. Fts5Data *pTerm = fts5DataRead(p, iId);
  4896. if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){
  4897. u8 *aTermIdx = &pTerm->p[pTerm->szLeaf];
  4898. int nTermIdx = pTerm->nn - pTerm->szLeaf;
  4899. int iTermIdx = 0;
  4900. int iTermOff = 0;
  4901. while( 1 ){
  4902. u32 iVal = 0;
  4903. int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal);
  4904. iTermOff += iVal;
  4905. if( (iTermIdx+nByte)>=nTermIdx ) break;
  4906. iTermIdx += nByte;
  4907. }
  4908. nTermIdx = iTermIdx;
  4909. memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx);
  4910. fts5PutU16(&pTerm->p[2], iTermOff);
  4911. fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx);
  4912. if( nTermIdx==0 ){
  4913. fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno);
  4914. }
  4915. }
  4916. fts5DataRelease(pTerm);
  4917. }
  4918. }
  4919. /* Assuming no error has occurred, this block does final edits to the
  4920. ** leaf page before writing it back to disk. Input variables are:
  4921. **
  4922. ** nPg: Total initial size of leaf page.
  4923. ** iPgIdx: Initial offset of page footer.
  4924. **
  4925. ** iOff: Offset to move data to
  4926. ** iNextOff: Offset to move data from
  4927. */
  4928. if( p->rc==SQLITE_OK ){
  4929. const int nMove = nPg - iNextOff; /* Number of bytes to move */
  4930. int nShift = iNextOff - iOff; /* Distance to move them */
  4931. int iPrevKeyOut = 0;
  4932. int iKeyIn = 0;
  4933. memmove(&aPg[iOff], &aPg[iNextOff], nMove);
  4934. iPgIdx -= nShift;
  4935. nPg = iPgIdx;
  4936. fts5PutU16(&aPg[2], iPgIdx);
  4937. for(iIdx=0; iIdx<nIdx; /* no-op */){
  4938. u32 iVal = 0;
  4939. iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
  4940. iKeyIn += iVal;
  4941. if( iKeyIn!=iDelKeyOff ){
  4942. int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0));
  4943. nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut);
  4944. iPrevKeyOut = iKeyOut;
  4945. }
  4946. }
  4947. if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){
  4948. fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno);
  4949. }
  4950. assert_nc( nPg>4 || fts5GetU16(aPg)==0 );
  4951. fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno), aPg, nPg);
  4952. }
  4953. sqlite3_free(aIdx);
  4954. }
  4955. /*
  4956. ** This is called as part of flushing a delete to disk in 'secure-delete'
  4957. ** mode. It edits the segments within the database described by argument
  4958. ** pStruct to remove the entries for term zTerm, rowid iRowid.
  4959. **
  4960. ** Return SQLITE_OK if successful, or an SQLite error code if an error
  4961. ** has occurred. Any error code is also stored in the Fts5Index handle.
  4962. */
  4963. static int fts5FlushSecureDelete(
  4964. Fts5Index *p,
  4965. Fts5Structure *pStruct,
  4966. const char *zTerm,
  4967. int nTerm,
  4968. i64 iRowid
  4969. ){
  4970. const int f = FTS5INDEX_QUERY_SKIPHASH;
  4971. Fts5Iter *pIter = 0; /* Used to find term instance */
  4972. /* If the version number has not been set to SECUREDELETE, do so now. */
  4973. if( p->pConfig->iVersion!=FTS5_CURRENT_VERSION_SECUREDELETE ){
  4974. Fts5Config *pConfig = p->pConfig;
  4975. sqlite3_stmt *pStmt = 0;
  4976. fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
  4977. "REPLACE INTO %Q.'%q_config' VALUES ('version', %d)",
  4978. pConfig->zDb, pConfig->zName, FTS5_CURRENT_VERSION_SECUREDELETE
  4979. ));
  4980. if( p->rc==SQLITE_OK ){
  4981. int rc;
  4982. sqlite3_step(pStmt);
  4983. rc = sqlite3_finalize(pStmt);
  4984. if( p->rc==SQLITE_OK ) p->rc = rc;
  4985. pConfig->iCookie++;
  4986. pConfig->iVersion = FTS5_CURRENT_VERSION_SECUREDELETE;
  4987. }
  4988. }
  4989. fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter);
  4990. if( fts5MultiIterEof(p, pIter)==0 ){
  4991. i64 iThis = fts5MultiIterRowid(pIter);
  4992. if( iThis<iRowid ){
  4993. fts5MultiIterNextFrom(p, pIter, iRowid);
  4994. }
  4995. if( p->rc==SQLITE_OK
  4996. && fts5MultiIterEof(p, pIter)==0
  4997. && iRowid==fts5MultiIterRowid(pIter)
  4998. ){
  4999. Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
  5000. fts5DoSecureDelete(p, pSeg);
  5001. }
  5002. }
  5003. fts5MultiIterFree(pIter);
  5004. return p->rc;
  5005. }
  5006. /*
  5007. ** Flush the contents of in-memory hash table iHash to a new level-0
  5008. ** segment on disk. Also update the corresponding structure record.
  5009. **
  5010. ** If an error occurs, set the Fts5Index.rc error code. If an error has
  5011. ** already occurred, this function is a no-op.
  5012. */
  5013. static void fts5FlushOneHash(Fts5Index *p){
  5014. Fts5Hash *pHash = p->pHash;
  5015. Fts5Structure *pStruct;
  5016. int iSegid;
  5017. int pgnoLast = 0; /* Last leaf page number in segment */
  5018. /* Obtain a reference to the index structure and allocate a new segment-id
  5019. ** for the new level-0 segment. */
  5020. pStruct = fts5StructureRead(p);
  5021. fts5StructureInvalidate(p);
  5022. if( sqlite3Fts5HashIsEmpty(pHash)==0 ){
  5023. iSegid = fts5AllocateSegid(p, pStruct);
  5024. if( iSegid ){
  5025. const int pgsz = p->pConfig->pgsz;
  5026. int eDetail = p->pConfig->eDetail;
  5027. int bSecureDelete = p->pConfig->bSecureDelete;
  5028. Fts5StructureSegment *pSeg; /* New segment within pStruct */
  5029. Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
  5030. Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
  5031. Fts5SegWriter writer;
  5032. fts5WriteInit(p, &writer, iSegid);
  5033. pBuf = &writer.writer.buf;
  5034. pPgidx = &writer.writer.pgidx;
  5035. /* fts5WriteInit() should have initialized the buffers to (most likely)
  5036. ** the maximum space required. */
  5037. assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
  5038. assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
  5039. /* Begin scanning through hash table entries. This loop runs once for each
  5040. ** term/doclist currently stored within the hash table. */
  5041. if( p->rc==SQLITE_OK ){
  5042. p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
  5043. }
  5044. while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
  5045. const char *zTerm; /* Buffer containing term */
  5046. int nTerm; /* Size of zTerm in bytes */
  5047. const u8 *pDoclist; /* Pointer to doclist for this term */
  5048. int nDoclist; /* Size of doclist in bytes */
  5049. /* Get the term and doclist for this entry. */
  5050. sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist);
  5051. if( bSecureDelete==0 ){
  5052. fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
  5053. if( p->rc!=SQLITE_OK ) break;
  5054. assert( writer.bFirstRowidInPage==0 );
  5055. }
  5056. if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
  5057. /* The entire doclist will fit on the current leaf. */
  5058. fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
  5059. }else{
  5060. int bTermWritten = !bSecureDelete;
  5061. i64 iRowid = 0;
  5062. i64 iPrev = 0;
  5063. int iOff = 0;
  5064. /* The entire doclist will not fit on this leaf. The following
  5065. ** loop iterates through the poslists that make up the current
  5066. ** doclist. */
  5067. while( p->rc==SQLITE_OK && iOff<nDoclist ){
  5068. u64 iDelta = 0;
  5069. iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
  5070. iRowid += iDelta;
  5071. /* If in secure delete mode, and if this entry in the poslist is
  5072. ** in fact a delete, then edit the existing segments directly
  5073. ** using fts5FlushSecureDelete(). */
  5074. if( bSecureDelete ){
  5075. if( eDetail==FTS5_DETAIL_NONE ){
  5076. if( iOff<nDoclist && pDoclist[iOff]==0x00
  5077. && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid)
  5078. ){
  5079. iOff++;
  5080. if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
  5081. iOff++;
  5082. nDoclist = 0;
  5083. }else{
  5084. continue;
  5085. }
  5086. }
  5087. }else if( (pDoclist[iOff] & 0x01)
  5088. && !fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid)
  5089. ){
  5090. if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){
  5091. iOff++;
  5092. continue;
  5093. }
  5094. }
  5095. }
  5096. if( p->rc==SQLITE_OK && bTermWritten==0 ){
  5097. fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
  5098. bTermWritten = 1;
  5099. assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 );
  5100. }
  5101. if( writer.bFirstRowidInPage ){
  5102. fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
  5103. pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
  5104. writer.bFirstRowidInPage = 0;
  5105. fts5WriteDlidxAppend(p, &writer, iRowid);
  5106. }else{
  5107. u64 iRowidDelta = (u64)iRowid - (u64)iPrev;
  5108. pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta);
  5109. }
  5110. if( p->rc!=SQLITE_OK ) break;
  5111. assert( pBuf->n<=pBuf->nSpace );
  5112. iPrev = iRowid;
  5113. if( eDetail==FTS5_DETAIL_NONE ){
  5114. if( iOff<nDoclist && pDoclist[iOff]==0 ){
  5115. pBuf->p[pBuf->n++] = 0;
  5116. iOff++;
  5117. if( iOff<nDoclist && pDoclist[iOff]==0 ){
  5118. pBuf->p[pBuf->n++] = 0;
  5119. iOff++;
  5120. }
  5121. }
  5122. if( (pBuf->n + pPgidx->n)>=pgsz ){
  5123. fts5WriteFlushLeaf(p, &writer);
  5124. }
  5125. }else{
  5126. int bDel = 0;
  5127. int nPos = 0;
  5128. int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel);
  5129. if( bDel && bSecureDelete ){
  5130. fts5BufferAppendVarint(&p->rc, pBuf, nPos*2);
  5131. iOff += nCopy;
  5132. nCopy = nPos;
  5133. }else{
  5134. nCopy += nPos;
  5135. }
  5136. if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
  5137. /* The entire poslist will fit on the current leaf. So copy
  5138. ** it in one go. */
  5139. fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
  5140. }else{
  5141. /* The entire poslist will not fit on this leaf. So it needs
  5142. ** to be broken into sections. The only qualification being
  5143. ** that each varint must be stored contiguously. */
  5144. const u8 *pPoslist = &pDoclist[iOff];
  5145. int iPos = 0;
  5146. while( p->rc==SQLITE_OK ){
  5147. int nSpace = pgsz - pBuf->n - pPgidx->n;
  5148. int n = 0;
  5149. if( (nCopy - iPos)<=nSpace ){
  5150. n = nCopy - iPos;
  5151. }else{
  5152. n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
  5153. }
  5154. assert( n>0 );
  5155. fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
  5156. iPos += n;
  5157. if( (pBuf->n + pPgidx->n)>=pgsz ){
  5158. fts5WriteFlushLeaf(p, &writer);
  5159. }
  5160. if( iPos>=nCopy ) break;
  5161. }
  5162. }
  5163. iOff += nCopy;
  5164. }
  5165. }
  5166. }
  5167. /* TODO2: Doclist terminator written here. */
  5168. /* pBuf->p[pBuf->n++] = '\0'; */
  5169. assert( pBuf->n<=pBuf->nSpace );
  5170. if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
  5171. }
  5172. fts5WriteFinish(p, &writer, &pgnoLast);
  5173. assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 );
  5174. if( pgnoLast>0 ){
  5175. /* Update the Fts5Structure. It is written back to the database by the
  5176. ** fts5StructureRelease() call below. */
  5177. if( pStruct->nLevel==0 ){
  5178. fts5StructureAddLevel(&p->rc, &pStruct);
  5179. }
  5180. fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
  5181. if( p->rc==SQLITE_OK ){
  5182. pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
  5183. pSeg->iSegid = iSegid;
  5184. pSeg->pgnoFirst = 1;
  5185. pSeg->pgnoLast = pgnoLast;
  5186. if( pStruct->nOriginCntr>0 ){
  5187. pSeg->iOrigin1 = pStruct->nOriginCntr;
  5188. pSeg->iOrigin2 = pStruct->nOriginCntr;
  5189. pSeg->nEntry = p->nPendingRow;
  5190. pStruct->nOriginCntr++;
  5191. }
  5192. pStruct->nSegment++;
  5193. }
  5194. fts5StructurePromote(p, 0, pStruct);
  5195. }
  5196. }
  5197. }
  5198. fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete);
  5199. fts5IndexCrisismerge(p, &pStruct);
  5200. fts5StructureWrite(p, pStruct);
  5201. fts5StructureRelease(pStruct);
  5202. }
  5203. /*
  5204. ** Flush any data stored in the in-memory hash tables to the database.
  5205. */
  5206. static void fts5IndexFlush(Fts5Index *p){
  5207. /* Unless it is empty, flush the hash table to disk */
  5208. if( p->flushRc ){
  5209. p->rc = p->flushRc;
  5210. return;
  5211. }
  5212. if( p->nPendingData || p->nContentlessDelete ){
  5213. assert( p->pHash );
  5214. fts5FlushOneHash(p);
  5215. if( p->rc==SQLITE_OK ){
  5216. sqlite3Fts5HashClear(p->pHash);
  5217. p->nPendingData = 0;
  5218. p->nPendingRow = 0;
  5219. p->nContentlessDelete = 0;
  5220. }else if( p->nPendingData || p->nContentlessDelete ){
  5221. p->flushRc = p->rc;
  5222. }
  5223. }
  5224. }
  5225. static Fts5Structure *fts5IndexOptimizeStruct(
  5226. Fts5Index *p,
  5227. Fts5Structure *pStruct
  5228. ){
  5229. Fts5Structure *pNew = 0;
  5230. sqlite3_int64 nByte = SZ_FTS5STRUCTURE(1);
  5231. int nSeg = pStruct->nSegment;
  5232. int i;
  5233. /* Figure out if this structure requires optimization. A structure does
  5234. ** not require optimization if either:
  5235. **
  5236. ** 1. it consists of fewer than two segments, or
  5237. ** 2. all segments are on the same level, or
  5238. ** 3. all segments except one are currently inputs to a merge operation.
  5239. **
  5240. ** In the first case, if there are no tombstone hash pages, return NULL. In
  5241. ** the second, increment the ref-count on *pStruct and return a copy of the
  5242. ** pointer to it.
  5243. */
  5244. if( nSeg==0 ) return 0;
  5245. for(i=0; i<pStruct->nLevel; i++){
  5246. int nThis = pStruct->aLevel[i].nSeg;
  5247. int nMerge = pStruct->aLevel[i].nMerge;
  5248. if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){
  5249. if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){
  5250. return 0;
  5251. }
  5252. fts5StructureRef(pStruct);
  5253. return pStruct;
  5254. }
  5255. assert( pStruct->aLevel[i].nMerge<=nThis );
  5256. }
  5257. nByte += (((i64)pStruct->nLevel)+1) * sizeof(Fts5StructureLevel);
  5258. assert( nByte==SZ_FTS5STRUCTURE(pStruct->nLevel+2) );
  5259. pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
  5260. if( pNew ){
  5261. Fts5StructureLevel *pLvl;
  5262. nByte = nSeg * sizeof(Fts5StructureSegment);
  5263. pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL);
  5264. pNew->nRef = 1;
  5265. pNew->nWriteCounter = pStruct->nWriteCounter;
  5266. pNew->nOriginCntr = pStruct->nOriginCntr;
  5267. pLvl = &pNew->aLevel[pNew->nLevel-1];
  5268. pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
  5269. if( pLvl->aSeg ){
  5270. int iLvl, iSeg;
  5271. int iSegOut = 0;
  5272. /* Iterate through all segments, from oldest to newest. Add them to
  5273. ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
  5274. ** segment in the data structure. */
  5275. for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
  5276. for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
  5277. pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
  5278. iSegOut++;
  5279. }
  5280. }
  5281. pNew->nSegment = pLvl->nSeg = nSeg;
  5282. }else{
  5283. sqlite3_free(pNew);
  5284. pNew = 0;
  5285. }
  5286. }
  5287. return pNew;
  5288. }
  5289. int sqlite3Fts5IndexOptimize(Fts5Index *p){
  5290. Fts5Structure *pStruct;
  5291. Fts5Structure *pNew = 0;
  5292. assert( p->rc==SQLITE_OK );
  5293. fts5IndexFlush(p);
  5294. assert( p->rc!=SQLITE_OK || p->nContentlessDelete==0 );
  5295. pStruct = fts5StructureRead(p);
  5296. assert( p->rc!=SQLITE_OK || pStruct!=0 );
  5297. fts5StructureInvalidate(p);
  5298. if( pStruct ){
  5299. pNew = fts5IndexOptimizeStruct(p, pStruct);
  5300. }
  5301. fts5StructureRelease(pStruct);
  5302. assert( pNew==0 || pNew->nSegment>0 );
  5303. if( pNew ){
  5304. int iLvl;
  5305. for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
  5306. while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
  5307. int nRem = FTS5_OPT_WORK_UNIT;
  5308. fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
  5309. }
  5310. fts5StructureWrite(p, pNew);
  5311. fts5StructureRelease(pNew);
  5312. }
  5313. return fts5IndexReturn(p);
  5314. }
  5315. /*
  5316. ** This is called to implement the special "VALUES('merge', $nMerge)"
  5317. ** INSERT command.
  5318. */
  5319. int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
  5320. Fts5Structure *pStruct = 0;
  5321. fts5IndexFlush(p);
  5322. pStruct = fts5StructureRead(p);
  5323. if( pStruct ){
  5324. int nMin = p->pConfig->nUsermerge;
  5325. fts5StructureInvalidate(p);
  5326. if( nMerge<0 ){
  5327. Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
  5328. fts5StructureRelease(pStruct);
  5329. pStruct = pNew;
  5330. nMin = 1;
  5331. nMerge = nMerge*-1;
  5332. }
  5333. if( pStruct && pStruct->nLevel ){
  5334. if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
  5335. fts5StructureWrite(p, pStruct);
  5336. }
  5337. }
  5338. fts5StructureRelease(pStruct);
  5339. }
  5340. return fts5IndexReturn(p);
  5341. }
  5342. static void fts5AppendRowid(
  5343. Fts5Index *p,
  5344. u64 iDelta,
  5345. Fts5Iter *pUnused,
  5346. Fts5Buffer *pBuf
  5347. ){
  5348. UNUSED_PARAM(pUnused);
  5349. fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
  5350. }
  5351. static void fts5AppendPoslist(
  5352. Fts5Index *p,
  5353. u64 iDelta,
  5354. Fts5Iter *pMulti,
  5355. Fts5Buffer *pBuf
  5356. ){
  5357. int nData = pMulti->base.nData;
  5358. int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
  5359. assert( nData>0 );
  5360. if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
  5361. fts5BufferSafeAppendVarint(pBuf, iDelta);
  5362. fts5BufferSafeAppendVarint(pBuf, nData*2);
  5363. fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
  5364. memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
  5365. }
  5366. }
  5367. static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
  5368. u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
  5369. assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
  5370. if( p>=pIter->aEof ){
  5371. pIter->aPoslist = 0;
  5372. }else{
  5373. i64 iDelta;
  5374. p += fts5GetVarint(p, (u64*)&iDelta);
  5375. pIter->iRowid += iDelta;
  5376. /* Read position list size */
  5377. if( p[0] & 0x80 ){
  5378. int nPos;
  5379. pIter->nSize = fts5GetVarint32(p, nPos);
  5380. pIter->nPoslist = (nPos>>1);
  5381. }else{
  5382. pIter->nPoslist = ((int)(p[0])) >> 1;
  5383. pIter->nSize = 1;
  5384. }
  5385. pIter->aPoslist = p;
  5386. if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
  5387. pIter->aPoslist = 0;
  5388. }
  5389. }
  5390. }
  5391. static void fts5DoclistIterInit(
  5392. Fts5Buffer *pBuf,
  5393. Fts5DoclistIter *pIter
  5394. ){
  5395. memset(pIter, 0, sizeof(*pIter));
  5396. if( pBuf->n>0 ){
  5397. pIter->aPoslist = pBuf->p;
  5398. pIter->aEof = &pBuf->p[pBuf->n];
  5399. fts5DoclistIterNext(pIter);
  5400. }
  5401. }
  5402. #if 0
  5403. /*
  5404. ** Append a doclist to buffer pBuf.
  5405. **
  5406. ** This function assumes that space within the buffer has already been
  5407. ** allocated.
  5408. */
  5409. static void fts5MergeAppendDocid(
  5410. Fts5Buffer *pBuf, /* Buffer to write to */
  5411. i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
  5412. i64 iRowid /* Rowid to append */
  5413. ){
  5414. assert( pBuf->n!=0 || (*piLastRowid)==0 );
  5415. fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
  5416. *piLastRowid = iRowid;
  5417. }
  5418. #endif
  5419. #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
  5420. assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
  5421. fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
  5422. (iLastRowid) = (iRowid); \
  5423. }
  5424. /*
  5425. ** Swap the contents of buffer *p1 with that of *p2.
  5426. */
  5427. static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
  5428. Fts5Buffer tmp = *p1;
  5429. *p1 = *p2;
  5430. *p2 = tmp;
  5431. }
  5432. static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
  5433. int i = *piOff;
  5434. if( i>=pBuf->n ){
  5435. *piOff = -1;
  5436. }else{
  5437. u64 iVal;
  5438. *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
  5439. *piRowid += iVal;
  5440. }
  5441. }
  5442. /*
  5443. ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
  5444. ** In this case the buffers consist of a delta-encoded list of rowids only.
  5445. */
  5446. static void fts5MergeRowidLists(
  5447. Fts5Index *p, /* FTS5 backend object */
  5448. Fts5Buffer *p1, /* First list to merge */
  5449. int nBuf, /* Number of entries in apBuf[] */
  5450. Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
  5451. ){
  5452. int i1 = 0;
  5453. int i2 = 0;
  5454. i64 iRowid1 = 0;
  5455. i64 iRowid2 = 0;
  5456. i64 iOut = 0;
  5457. Fts5Buffer *p2 = &aBuf[0];
  5458. Fts5Buffer out;
  5459. (void)nBuf;
  5460. memset(&out, 0, sizeof(out));
  5461. assert( nBuf==1 );
  5462. sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
  5463. if( p->rc ) return;
  5464. fts5NextRowid(p1, &i1, &iRowid1);
  5465. fts5NextRowid(p2, &i2, &iRowid2);
  5466. while( i1>=0 || i2>=0 ){
  5467. if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
  5468. assert( iOut==0 || iRowid1>iOut );
  5469. fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
  5470. iOut = iRowid1;
  5471. fts5NextRowid(p1, &i1, &iRowid1);
  5472. }else{
  5473. assert( iOut==0 || iRowid2>iOut );
  5474. fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
  5475. iOut = iRowid2;
  5476. if( i1>=0 && iRowid1==iRowid2 ){
  5477. fts5NextRowid(p1, &i1, &iRowid1);
  5478. }
  5479. fts5NextRowid(p2, &i2, &iRowid2);
  5480. }
  5481. }
  5482. fts5BufferSwap(&out, p1);
  5483. fts5BufferFree(&out);
  5484. }
  5485. typedef struct PrefixMerger PrefixMerger;
  5486. struct PrefixMerger {
  5487. Fts5DoclistIter iter; /* Doclist iterator */
  5488. i64 iPos; /* For iterating through a position list */
  5489. int iOff;
  5490. u8 *aPos;
  5491. PrefixMerger *pNext; /* Next in docid/poslist order */
  5492. };
  5493. static void fts5PrefixMergerInsertByRowid(
  5494. PrefixMerger **ppHead,
  5495. PrefixMerger *p
  5496. ){
  5497. if( p->iter.aPoslist ){
  5498. PrefixMerger **pp = ppHead;
  5499. while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
  5500. pp = &(*pp)->pNext;
  5501. }
  5502. p->pNext = *pp;
  5503. *pp = p;
  5504. }
  5505. }
  5506. static void fts5PrefixMergerInsertByPosition(
  5507. PrefixMerger **ppHead,
  5508. PrefixMerger *p
  5509. ){
  5510. if( p->iPos>=0 ){
  5511. PrefixMerger **pp = ppHead;
  5512. while( *pp && p->iPos>(*pp)->iPos ){
  5513. pp = &(*pp)->pNext;
  5514. }
  5515. p->pNext = *pp;
  5516. *pp = p;
  5517. }
  5518. }
  5519. /*
  5520. ** Array aBuf[] contains nBuf doclists. These are all merged in with the
  5521. ** doclist in buffer p1.
  5522. */
  5523. static void fts5MergePrefixLists(
  5524. Fts5Index *p, /* FTS5 backend object */
  5525. Fts5Buffer *p1, /* First list to merge */
  5526. int nBuf, /* Number of buffers in array aBuf[] */
  5527. Fts5Buffer *aBuf /* Other lists to merge in */
  5528. ){
  5529. #define fts5PrefixMergerNextPosition(p) \
  5530. sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
  5531. #define FTS5_MERGE_NLIST 16
  5532. PrefixMerger aMerger[FTS5_MERGE_NLIST];
  5533. PrefixMerger *pHead = 0;
  5534. int i;
  5535. int nOut = 0;
  5536. Fts5Buffer out = {0, 0, 0};
  5537. Fts5Buffer tmp = {0, 0, 0};
  5538. i64 iLastRowid = 0;
  5539. /* Initialize a doclist-iterator for each input buffer. Arrange them in
  5540. ** a linked-list starting at pHead in ascending order of rowid. Avoid
  5541. ** linking any iterators already at EOF into the linked list at all. */
  5542. assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
  5543. memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
  5544. pHead = &aMerger[nBuf];
  5545. fts5DoclistIterInit(p1, &pHead->iter);
  5546. for(i=0; i<nBuf; i++){
  5547. fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
  5548. fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
  5549. nOut += aBuf[i].n;
  5550. }
  5551. if( nOut==0 ) return;
  5552. nOut += p1->n + 9 + 10*nBuf;
  5553. /* The maximum size of the output is equal to the sum of the
  5554. ** input sizes + 1 varint (9 bytes). The extra varint is because if the
  5555. ** first rowid in one input is a large negative number, and the first in
  5556. ** the other a non-negative number, the delta for the non-negative
  5557. ** number will be larger on disk than the literal integer value
  5558. ** was.
  5559. **
  5560. ** Or, if the input position-lists are corrupt, then the output might
  5561. ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
  5562. ** (the value PoslistNext64() uses for EOF) as a position and appending
  5563. ** it to the output. This can happen at most once for each input
  5564. ** position-list, hence (nBuf+1) 10 byte paddings. */
  5565. if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
  5566. while( pHead ){
  5567. fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
  5568. if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
  5569. /* Merge data from two or more poslists */
  5570. i64 iPrev = 0;
  5571. int nTmp = FTS5_DATA_ZERO_PADDING;
  5572. int nMerge = 0;
  5573. PrefixMerger *pSave = pHead;
  5574. PrefixMerger *pThis = 0;
  5575. int nTail = 0;
  5576. pHead = 0;
  5577. while( pSave && pSave->iter.iRowid==iLastRowid ){
  5578. PrefixMerger *pNext = pSave->pNext;
  5579. pSave->iOff = 0;
  5580. pSave->iPos = 0;
  5581. pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
  5582. fts5PrefixMergerNextPosition(pSave);
  5583. nTmp += pSave->iter.nPoslist + 10;
  5584. nMerge++;
  5585. fts5PrefixMergerInsertByPosition(&pHead, pSave);
  5586. pSave = pNext;
  5587. }
  5588. if( pHead==0 || pHead->pNext==0 ){
  5589. p->rc = FTS5_CORRUPT;
  5590. break;
  5591. }
  5592. /* See the earlier comment in this function for an explanation of why
  5593. ** corrupt input position lists might cause the output to consume
  5594. ** at most nMerge*10 bytes of unexpected space. */
  5595. if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
  5596. break;
  5597. }
  5598. fts5BufferZero(&tmp);
  5599. pThis = pHead;
  5600. pHead = pThis->pNext;
  5601. sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
  5602. fts5PrefixMergerNextPosition(pThis);
  5603. fts5PrefixMergerInsertByPosition(&pHead, pThis);
  5604. while( pHead->pNext ){
  5605. pThis = pHead;
  5606. if( pThis->iPos!=iPrev ){
  5607. sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
  5608. }
  5609. fts5PrefixMergerNextPosition(pThis);
  5610. pHead = pThis->pNext;
  5611. fts5PrefixMergerInsertByPosition(&pHead, pThis);
  5612. }
  5613. if( pHead->iPos!=iPrev ){
  5614. sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
  5615. }
  5616. nTail = pHead->iter.nPoslist - pHead->iOff;
  5617. /* WRITEPOSLISTSIZE */
  5618. assert_nc( tmp.n+nTail<=nTmp );
  5619. assert( tmp.n+nTail<=nTmp+nMerge*10 );
  5620. if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
  5621. if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
  5622. break;
  5623. }
  5624. fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
  5625. fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
  5626. if( nTail>0 ){
  5627. fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
  5628. }
  5629. pHead = pSave;
  5630. for(i=0; i<nBuf+1; i++){
  5631. PrefixMerger *pX = &aMerger[i];
  5632. if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
  5633. fts5DoclistIterNext(&pX->iter);
  5634. fts5PrefixMergerInsertByRowid(&pHead, pX);
  5635. }
  5636. }
  5637. }else{
  5638. /* Copy poslist from pHead to output */
  5639. PrefixMerger *pThis = pHead;
  5640. Fts5DoclistIter *pI = &pThis->iter;
  5641. fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
  5642. fts5DoclistIterNext(pI);
  5643. pHead = pThis->pNext;
  5644. fts5PrefixMergerInsertByRowid(&pHead, pThis);
  5645. }
  5646. }
  5647. fts5BufferFree(p1);
  5648. fts5BufferFree(&tmp);
  5649. memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
  5650. *p1 = out;
  5651. }
  5652. /*
  5653. ** Iterate through a range of entries in the FTS index, invoking the xVisit
  5654. ** callback for each of them.
  5655. **
  5656. ** Parameter pToken points to an nToken buffer containing an FTS index term
  5657. ** (i.e. a document term with the preceding 1 byte index identifier -
  5658. ** FTS5_MAIN_PREFIX or similar). If bPrefix is true, then the call visits
  5659. ** all entries for terms that have pToken/nToken as a prefix. If bPrefix
  5660. ** is false, then only entries with pToken/nToken as the entire key are
  5661. ** visited.
  5662. **
  5663. ** If the current table is a tokendata=1 table, then if bPrefix is true then
  5664. ** each index term is treated separately. However, if bPrefix is false, then
  5665. ** all index terms corresponding to pToken/nToken are collapsed into a single
  5666. ** term before the callback is invoked.
  5667. **
  5668. ** The callback invoked for each entry visited is specified by paramter xVisit.
  5669. ** Each time it is invoked, it is passed a pointer to the Fts5Index object,
  5670. ** a copy of the 7th paramter to this function (pCtx) and a pointer to the
  5671. ** iterator that indicates the current entry. If the current entry is the
  5672. ** first with a new term (i.e. different from that of the previous entry,
  5673. ** including the very first term), then the final two parameters are passed
  5674. ** a pointer to the term and its size in bytes, respectively. If the current
  5675. ** entry is not the first associated with its term, these two parameters
  5676. ** are passed 0.
  5677. **
  5678. ** If parameter pColset is not NULL, then it is used to filter entries before
  5679. ** the callback is invoked.
  5680. */
  5681. static int fts5VisitEntries(
  5682. Fts5Index *p, /* Fts5 index object */
  5683. Fts5Colset *pColset, /* Columns filter to apply, or NULL */
  5684. u8 *pToken, /* Buffer containing token */
  5685. int nToken, /* Size of buffer pToken in bytes */
  5686. int bPrefix, /* True for a prefix scan */
  5687. void (*xVisit)(Fts5Index*, void *pCtx, Fts5Iter *pIter, const u8*, int),
  5688. void *pCtx /* Passed as second argument to xVisit() */
  5689. ){
  5690. const int flags = (bPrefix ? FTS5INDEX_QUERY_SCAN : 0)
  5691. | FTS5INDEX_QUERY_SKIPEMPTY
  5692. | FTS5INDEX_QUERY_NOOUTPUT;
  5693. Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
  5694. int bNewTerm = 1;
  5695. Fts5Structure *pStruct = fts5StructureRead(p);
  5696. fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
  5697. fts5IterSetOutputCb(&p->rc, p1);
  5698. for( /* no-op */ ;
  5699. fts5MultiIterEof(p, p1)==0;
  5700. fts5MultiIterNext2(p, p1, &bNewTerm)
  5701. ){
  5702. Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
  5703. int nNew = 0;
  5704. const u8 *pNew = 0;
  5705. p1->xSetOutputs(p1, pSeg);
  5706. if( p->rc ) break;
  5707. if( bNewTerm ){
  5708. nNew = pSeg->term.n;
  5709. pNew = pSeg->term.p;
  5710. if( nNew<nToken || memcmp(pToken, pNew, nToken) ) break;
  5711. }
  5712. xVisit(p, pCtx, p1, pNew, nNew);
  5713. }
  5714. fts5MultiIterFree(p1);
  5715. fts5StructureRelease(pStruct);
  5716. return p->rc;
  5717. }
  5718. /*
  5719. ** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
  5720. ** array of these for each row it visits (so all iRowid fields are the same).
  5721. ** Or, for an iterator used by an "ORDER BY rank" query, it accumulates an
  5722. ** array of these for the entire query (in which case iRowid fields may take
  5723. ** a variety of values).
  5724. **
  5725. ** Each instance in the array indicates the iterator (and therefore term)
  5726. ** associated with position iPos of rowid iRowid. This is used by the
  5727. ** xInstToken() API.
  5728. **
  5729. ** iRowid:
  5730. ** Rowid for the current entry.
  5731. **
  5732. ** iPos:
  5733. ** Position of current entry within row. In the usual ((iCol<<32)+iOff)
  5734. ** format (e.g. see macros FTS5_POS2COLUMN() and FTS5_POS2OFFSET()).
  5735. **
  5736. ** iIter:
  5737. ** If the Fts5TokenDataIter iterator that the entry is part of is
  5738. ** actually an iterator (i.e. with nIter>0, not just a container for
  5739. ** Fts5TokenDataMap structures), then this variable is an index into
  5740. ** the apIter[] array. The corresponding term is that which the iterator
  5741. ** at apIter[iIter] currently points to.
  5742. **
  5743. ** Or, if the Fts5TokenDataIter iterator is just a container object
  5744. ** (nIter==0), then iIter is an index into the term.p[] buffer where
  5745. ** the term is stored.
  5746. **
  5747. ** nByte:
  5748. ** In the case where iIter is an index into term.p[], this variable
  5749. ** is the size of the term in bytes. If iIter is an index into apIter[],
  5750. ** this variable is unused.
  5751. */
  5752. struct Fts5TokenDataMap {
  5753. i64 iRowid; /* Row this token is located in */
  5754. i64 iPos; /* Position of token */
  5755. int iIter; /* Iterator token was read from */
  5756. int nByte; /* Length of token in bytes (or 0) */
  5757. };
  5758. /*
  5759. ** An object used to supplement Fts5Iter for tokendata=1 iterators.
  5760. **
  5761. ** This object serves two purposes. The first is as a container for an array
  5762. ** of Fts5TokenDataMap structures, which are used to find the token required
  5763. ** when the xInstToken() API is used. This is done by the nMapAlloc, nMap and
  5764. ** aMap[] variables.
  5765. */
  5766. struct Fts5TokenDataIter {
  5767. int nMapAlloc; /* Allocated size of aMap[] in entries */
  5768. int nMap; /* Number of valid entries in aMap[] */
  5769. Fts5TokenDataMap *aMap; /* Array of (rowid+pos -> token) mappings */
  5770. /* The following are used for prefix-queries only. */
  5771. Fts5Buffer terms;
  5772. /* The following are used for other full-token tokendata queries only. */
  5773. int nIter;
  5774. int nIterAlloc;
  5775. Fts5PoslistReader *aPoslistReader;
  5776. int *aPoslistToIter;
  5777. Fts5Iter *apIter[FLEXARRAY];
  5778. };
  5779. /* Size in bytes of an Fts5TokenDataIter object holding up to N iterators */
  5780. #define SZ_FTS5TOKENDATAITER(N) \
  5781. (offsetof(Fts5TokenDataIter,apIter) + (N)*sizeof(Fts5Iter))
  5782. /*
  5783. ** The two input arrays - a1[] and a2[] - are in sorted order. This function
  5784. ** merges the two arrays together and writes the result to output array
  5785. ** aOut[]. aOut[] is guaranteed to be large enough to hold the result.
  5786. **
  5787. ** Duplicate entries are copied into the output. So the size of the output
  5788. ** array is always (n1+n2) entries.
  5789. */
  5790. static void fts5TokendataMerge(
  5791. Fts5TokenDataMap *a1, int n1, /* Input array 1 */
  5792. Fts5TokenDataMap *a2, int n2, /* Input array 2 */
  5793. Fts5TokenDataMap *aOut /* Output array */
  5794. ){
  5795. int i1 = 0;
  5796. int i2 = 0;
  5797. assert( n1>=0 && n2>=0 );
  5798. while( i1<n1 || i2<n2 ){
  5799. Fts5TokenDataMap *pOut = &aOut[i1+i2];
  5800. if( i2>=n2 || (i1<n1 && (
  5801. a1[i1].iRowid<a2[i2].iRowid
  5802. || (a1[i1].iRowid==a2[i2].iRowid && a1[i1].iPos<=a2[i2].iPos)
  5803. ))){
  5804. memcpy(pOut, &a1[i1], sizeof(Fts5TokenDataMap));
  5805. i1++;
  5806. }else{
  5807. memcpy(pOut, &a2[i2], sizeof(Fts5TokenDataMap));
  5808. i2++;
  5809. }
  5810. }
  5811. }
  5812. /*
  5813. ** Append a mapping to the token-map belonging to object pT.
  5814. */
  5815. static void fts5TokendataIterAppendMap(
  5816. Fts5Index *p,
  5817. Fts5TokenDataIter *pT,
  5818. int iIter,
  5819. int nByte,
  5820. i64 iRowid,
  5821. i64 iPos
  5822. ){
  5823. if( p->rc==SQLITE_OK ){
  5824. if( pT->nMap==pT->nMapAlloc ){
  5825. int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64;
  5826. int nAlloc = nNew * sizeof(Fts5TokenDataMap);
  5827. Fts5TokenDataMap *aNew;
  5828. aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nAlloc);
  5829. if( aNew==0 ){
  5830. p->rc = SQLITE_NOMEM;
  5831. return;
  5832. }
  5833. pT->aMap = aNew;
  5834. pT->nMapAlloc = nNew;
  5835. }
  5836. pT->aMap[pT->nMap].iRowid = iRowid;
  5837. pT->aMap[pT->nMap].iPos = iPos;
  5838. pT->aMap[pT->nMap].iIter = iIter;
  5839. pT->aMap[pT->nMap].nByte = nByte;
  5840. pT->nMap++;
  5841. }
  5842. }
  5843. /*
  5844. ** Sort the contents of the pT->aMap[] array.
  5845. **
  5846. ** The sorting algorithm requires a malloc(). If this fails, an error code
  5847. ** is left in Fts5Index.rc before returning.
  5848. */
  5849. static void fts5TokendataIterSortMap(Fts5Index *p, Fts5TokenDataIter *pT){
  5850. Fts5TokenDataMap *aTmp = 0;
  5851. int nByte = pT->nMap * sizeof(Fts5TokenDataMap);
  5852. aTmp = (Fts5TokenDataMap*)sqlite3Fts5MallocZero(&p->rc, nByte);
  5853. if( aTmp ){
  5854. Fts5TokenDataMap *a1 = pT->aMap;
  5855. Fts5TokenDataMap *a2 = aTmp;
  5856. i64 nHalf;
  5857. for(nHalf=1; nHalf<pT->nMap; nHalf=nHalf*2){
  5858. int i1;
  5859. for(i1=0; i1<pT->nMap; i1+=(nHalf*2)){
  5860. int n1 = MIN(nHalf, pT->nMap-i1);
  5861. int n2 = MIN(nHalf, pT->nMap-i1-n1);
  5862. fts5TokendataMerge(&a1[i1], n1, &a1[i1+n1], n2, &a2[i1]);
  5863. }
  5864. SWAPVAL(Fts5TokenDataMap*, a1, a2);
  5865. }
  5866. if( a1!=pT->aMap ){
  5867. memcpy(pT->aMap, a1, pT->nMap*sizeof(Fts5TokenDataMap));
  5868. }
  5869. sqlite3_free(aTmp);
  5870. #ifdef SQLITE_DEBUG
  5871. {
  5872. int ii;
  5873. for(ii=1; ii<pT->nMap; ii++){
  5874. Fts5TokenDataMap *p1 = &pT->aMap[ii-1];
  5875. Fts5TokenDataMap *p2 = &pT->aMap[ii];
  5876. assert( p1->iRowid<p2->iRowid
  5877. || (p1->iRowid==p2->iRowid && p1->iPos<=p2->iPos)
  5878. );
  5879. }
  5880. }
  5881. #endif
  5882. }
  5883. }
  5884. /*
  5885. ** Delete an Fts5TokenDataIter structure and its contents.
  5886. */
  5887. static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){
  5888. if( pSet ){
  5889. int ii;
  5890. for(ii=0; ii<pSet->nIter; ii++){
  5891. fts5MultiIterFree(pSet->apIter[ii]);
  5892. }
  5893. fts5BufferFree(&pSet->terms);
  5894. sqlite3_free(pSet->aPoslistReader);
  5895. sqlite3_free(pSet->aMap);
  5896. sqlite3_free(pSet);
  5897. }
  5898. }
  5899. /*
  5900. ** fts5VisitEntries() context object used by fts5SetupPrefixIterTokendata()
  5901. ** to pass data to prefixIterSetupTokendataCb().
  5902. */
  5903. typedef struct TokendataSetupCtx TokendataSetupCtx;
  5904. struct TokendataSetupCtx {
  5905. Fts5TokenDataIter *pT; /* Object being populated with mappings */
  5906. int iTermOff; /* Offset of current term in terms.p[] */
  5907. int nTermByte; /* Size of current term in bytes */
  5908. };
  5909. /*
  5910. ** fts5VisitEntries() callback used by fts5SetupPrefixIterTokendata(). This
  5911. ** callback adds an entry to the Fts5TokenDataIter.aMap[] array for each
  5912. ** position in the current position-list. It doesn't matter that some of
  5913. ** these may be out of order - they will be sorted later.
  5914. */
  5915. static void prefixIterSetupTokendataCb(
  5916. Fts5Index *p,
  5917. void *pCtx,
  5918. Fts5Iter *p1,
  5919. const u8 *pNew,
  5920. int nNew
  5921. ){
  5922. TokendataSetupCtx *pSetup = (TokendataSetupCtx*)pCtx;
  5923. int iPosOff = 0;
  5924. i64 iPos = 0;
  5925. if( pNew ){
  5926. pSetup->nTermByte = nNew-1;
  5927. pSetup->iTermOff = pSetup->pT->terms.n;
  5928. fts5BufferAppendBlob(&p->rc, &pSetup->pT->terms, nNew-1, pNew+1);
  5929. }
  5930. while( 0==sqlite3Fts5PoslistNext64(
  5931. p1->base.pData, p1->base.nData, &iPosOff, &iPos
  5932. ) ){
  5933. fts5TokendataIterAppendMap(p,
  5934. pSetup->pT, pSetup->iTermOff, pSetup->nTermByte, p1->base.iRowid, iPos
  5935. );
  5936. }
  5937. }
  5938. /*
  5939. ** Context object passed by fts5SetupPrefixIter() to fts5VisitEntries().
  5940. */
  5941. typedef struct PrefixSetupCtx PrefixSetupCtx;
  5942. struct PrefixSetupCtx {
  5943. void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
  5944. void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
  5945. i64 iLastRowid;
  5946. int nMerge;
  5947. Fts5Buffer *aBuf;
  5948. int nBuf;
  5949. Fts5Buffer doclist;
  5950. TokendataSetupCtx *pTokendata;
  5951. };
  5952. /*
  5953. ** fts5VisitEntries() callback used by fts5SetupPrefixIter()
  5954. */
  5955. static void prefixIterSetupCb(
  5956. Fts5Index *p,
  5957. void *pCtx,
  5958. Fts5Iter *p1,
  5959. const u8 *pNew,
  5960. int nNew
  5961. ){
  5962. PrefixSetupCtx *pSetup = (PrefixSetupCtx*)pCtx;
  5963. const int nMerge = pSetup->nMerge;
  5964. if( p1->base.nData>0 ){
  5965. if( p1->base.iRowid<=pSetup->iLastRowid && pSetup->doclist.n>0 ){
  5966. int i;
  5967. for(i=0; p->rc==SQLITE_OK && pSetup->doclist.n; i++){
  5968. int i1 = i*nMerge;
  5969. int iStore;
  5970. assert( i1+nMerge<=pSetup->nBuf );
  5971. for(iStore=i1; iStore<i1+nMerge; iStore++){
  5972. if( pSetup->aBuf[iStore].n==0 ){
  5973. fts5BufferSwap(&pSetup->doclist, &pSetup->aBuf[iStore]);
  5974. fts5BufferZero(&pSetup->doclist);
  5975. break;
  5976. }
  5977. }
  5978. if( iStore==i1+nMerge ){
  5979. pSetup->xMerge(p, &pSetup->doclist, nMerge, &pSetup->aBuf[i1]);
  5980. for(iStore=i1; iStore<i1+nMerge; iStore++){
  5981. fts5BufferZero(&pSetup->aBuf[iStore]);
  5982. }
  5983. }
  5984. }
  5985. pSetup->iLastRowid = 0;
  5986. }
  5987. pSetup->xAppend(
  5988. p, (u64)p1->base.iRowid-(u64)pSetup->iLastRowid, p1, &pSetup->doclist
  5989. );
  5990. pSetup->iLastRowid = p1->base.iRowid;
  5991. }
  5992. if( pSetup->pTokendata ){
  5993. prefixIterSetupTokendataCb(p, (void*)pSetup->pTokendata, p1, pNew, nNew);
  5994. }
  5995. }
  5996. static void fts5SetupPrefixIter(
  5997. Fts5Index *p, /* Index to read from */
  5998. int bDesc, /* True for "ORDER BY rowid DESC" */
  5999. int iIdx, /* Index to scan for data */
  6000. u8 *pToken, /* Buffer containing prefix to match */
  6001. int nToken, /* Size of buffer pToken in bytes */
  6002. Fts5Colset *pColset, /* Restrict matches to these columns */
  6003. Fts5Iter **ppIter /* OUT: New iterator */
  6004. ){
  6005. Fts5Structure *pStruct;
  6006. PrefixSetupCtx s;
  6007. TokendataSetupCtx s2;
  6008. memset(&s, 0, sizeof(s));
  6009. memset(&s2, 0, sizeof(s2));
  6010. s.nMerge = 1;
  6011. s.iLastRowid = 0;
  6012. s.nBuf = 32;
  6013. if( iIdx==0
  6014. && p->pConfig->eDetail==FTS5_DETAIL_FULL
  6015. && p->pConfig->bPrefixInsttoken
  6016. ){
  6017. s.pTokendata = &s2;
  6018. s2.pT = (Fts5TokenDataIter*)fts5IdxMalloc(p, SZ_FTS5TOKENDATAITER(1));
  6019. }
  6020. if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
  6021. s.xMerge = fts5MergeRowidLists;
  6022. s.xAppend = fts5AppendRowid;
  6023. }else{
  6024. s.nMerge = FTS5_MERGE_NLIST-1;
  6025. s.nBuf = s.nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
  6026. s.xMerge = fts5MergePrefixLists;
  6027. s.xAppend = fts5AppendPoslist;
  6028. }
  6029. s.aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*s.nBuf);
  6030. pStruct = fts5StructureRead(p);
  6031. assert( p->rc!=SQLITE_OK || (s.aBuf && pStruct) );
  6032. if( p->rc==SQLITE_OK ){
  6033. void *pCtx = (void*)&s;
  6034. int i;
  6035. Fts5Data *pData;
  6036. /* If iIdx is non-zero, then it is the number of a prefix-index for
  6037. ** prefixes 1 character longer than the prefix being queried for. That
  6038. ** index contains all the doclists required, except for the one
  6039. ** corresponding to the prefix itself. That one is extracted from the
  6040. ** main term index here. */
  6041. if( iIdx!=0 ){
  6042. pToken[0] = FTS5_MAIN_PREFIX;
  6043. fts5VisitEntries(p, pColset, pToken, nToken, 0, prefixIterSetupCb, pCtx);
  6044. }
  6045. pToken[0] = FTS5_MAIN_PREFIX + iIdx;
  6046. fts5VisitEntries(p, pColset, pToken, nToken, 1, prefixIterSetupCb, pCtx);
  6047. assert( (s.nBuf%s.nMerge)==0 );
  6048. for(i=0; i<s.nBuf; i+=s.nMerge){
  6049. int iFree;
  6050. if( p->rc==SQLITE_OK ){
  6051. s.xMerge(p, &s.doclist, s.nMerge, &s.aBuf[i]);
  6052. }
  6053. for(iFree=i; iFree<i+s.nMerge; iFree++){
  6054. fts5BufferFree(&s.aBuf[iFree]);
  6055. }
  6056. }
  6057. pData = fts5IdxMalloc(p, sizeof(*pData)
  6058. + ((i64)s.doclist.n)+FTS5_DATA_ZERO_PADDING);
  6059. assert( pData!=0 || p->rc!=SQLITE_OK );
  6060. if( pData ){
  6061. pData->p = (u8*)&pData[1];
  6062. pData->nn = pData->szLeaf = s.doclist.n;
  6063. if( s.doclist.n ) memcpy(pData->p, s.doclist.p, s.doclist.n);
  6064. fts5MultiIterNew2(p, pData, bDesc, ppIter);
  6065. }
  6066. assert( (*ppIter)!=0 || p->rc!=SQLITE_OK );
  6067. if( p->rc==SQLITE_OK && s.pTokendata ){
  6068. fts5TokendataIterSortMap(p, s2.pT);
  6069. (*ppIter)->pTokenDataIter = s2.pT;
  6070. s2.pT = 0;
  6071. }
  6072. }
  6073. fts5TokendataIterDelete(s2.pT);
  6074. fts5BufferFree(&s.doclist);
  6075. fts5StructureRelease(pStruct);
  6076. sqlite3_free(s.aBuf);
  6077. }
  6078. /*
  6079. ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
  6080. ** to the document with rowid iRowid.
  6081. */
  6082. int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
  6083. assert( p->rc==SQLITE_OK );
  6084. /* Allocate the hash table if it has not already been allocated */
  6085. if( p->pHash==0 ){
  6086. p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
  6087. }
  6088. /* Flush the hash table to disk if required */
  6089. if( iRowid<p->iWriteRowid
  6090. || (iRowid==p->iWriteRowid && p->bDelete==0)
  6091. || (p->nPendingData > p->pConfig->nHashSize)
  6092. ){
  6093. fts5IndexFlush(p);
  6094. }
  6095. p->iWriteRowid = iRowid;
  6096. p->bDelete = bDelete;
  6097. if( bDelete==0 ){
  6098. p->nPendingRow++;
  6099. }
  6100. return fts5IndexReturn(p);
  6101. }
  6102. /*
  6103. ** Commit data to disk.
  6104. */
  6105. int sqlite3Fts5IndexSync(Fts5Index *p){
  6106. assert( p->rc==SQLITE_OK );
  6107. fts5IndexFlush(p);
  6108. fts5IndexCloseReader(p);
  6109. return fts5IndexReturn(p);
  6110. }
  6111. /*
  6112. ** Discard any data stored in the in-memory hash tables. Do not write it
  6113. ** to the database. Additionally, assume that the contents of the %_data
  6114. ** table may have changed on disk. So any in-memory caches of %_data
  6115. ** records must be invalidated.
  6116. */
  6117. int sqlite3Fts5IndexRollback(Fts5Index *p){
  6118. fts5IndexCloseReader(p);
  6119. fts5IndexDiscardData(p);
  6120. fts5StructureInvalidate(p);
  6121. return fts5IndexReturn(p);
  6122. }
  6123. /*
  6124. ** The %_data table is completely empty when this function is called. This
  6125. ** function populates it with the initial structure objects for each index,
  6126. ** and the initial version of the "averages" record (a zero-byte blob).
  6127. */
  6128. int sqlite3Fts5IndexReinit(Fts5Index *p){
  6129. Fts5Structure *pTmp;
  6130. u8 tmpSpace[SZ_FTS5STRUCTURE(1)];
  6131. fts5StructureInvalidate(p);
  6132. fts5IndexDiscardData(p);
  6133. pTmp = (Fts5Structure*)tmpSpace;
  6134. memset(pTmp, 0, SZ_FTS5STRUCTURE(1));
  6135. if( p->pConfig->bContentlessDelete ){
  6136. pTmp->nOriginCntr = 1;
  6137. }
  6138. fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
  6139. fts5StructureWrite(p, pTmp);
  6140. return fts5IndexReturn(p);
  6141. }
  6142. /*
  6143. ** Open a new Fts5Index handle. If the bCreate argument is true, create
  6144. ** and initialize the underlying %_data table.
  6145. **
  6146. ** If successful, set *pp to point to the new object and return SQLITE_OK.
  6147. ** Otherwise, set *pp to NULL and return an SQLite error code.
  6148. */
  6149. int sqlite3Fts5IndexOpen(
  6150. Fts5Config *pConfig,
  6151. int bCreate,
  6152. Fts5Index **pp,
  6153. char **pzErr
  6154. ){
  6155. int rc = SQLITE_OK;
  6156. Fts5Index *p; /* New object */
  6157. *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
  6158. if( rc==SQLITE_OK ){
  6159. p->pConfig = pConfig;
  6160. p->nWorkUnit = FTS5_WORK_UNIT;
  6161. p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
  6162. if( p->zDataTbl && bCreate ){
  6163. rc = sqlite3Fts5CreateTable(
  6164. pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
  6165. );
  6166. if( rc==SQLITE_OK ){
  6167. rc = sqlite3Fts5CreateTable(pConfig, "idx",
  6168. "segid, term, pgno, PRIMARY KEY(segid, term)",
  6169. 1, pzErr
  6170. );
  6171. }
  6172. if( rc==SQLITE_OK ){
  6173. rc = sqlite3Fts5IndexReinit(p);
  6174. }
  6175. }
  6176. }
  6177. assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
  6178. if( rc ){
  6179. sqlite3Fts5IndexClose(p);
  6180. *pp = 0;
  6181. }
  6182. return rc;
  6183. }
  6184. /*
  6185. ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
  6186. */
  6187. int sqlite3Fts5IndexClose(Fts5Index *p){
  6188. int rc = SQLITE_OK;
  6189. if( p ){
  6190. assert( p->pReader==0 );
  6191. fts5StructureInvalidate(p);
  6192. sqlite3_finalize(p->pWriter);
  6193. sqlite3_finalize(p->pDeleter);
  6194. sqlite3_finalize(p->pIdxWriter);
  6195. sqlite3_finalize(p->pIdxDeleter);
  6196. sqlite3_finalize(p->pIdxSelect);
  6197. sqlite3_finalize(p->pIdxNextSelect);
  6198. sqlite3_finalize(p->pDataVersion);
  6199. sqlite3_finalize(p->pDeleteFromIdx);
  6200. sqlite3Fts5HashFree(p->pHash);
  6201. sqlite3_free(p->zDataTbl);
  6202. sqlite3_free(p);
  6203. }
  6204. return rc;
  6205. }
  6206. /*
  6207. ** Argument p points to a buffer containing utf-8 text that is n bytes in
  6208. ** size. Return the number of bytes in the nChar character prefix of the
  6209. ** buffer, or 0 if there are less than nChar characters in total.
  6210. */
  6211. int sqlite3Fts5IndexCharlenToBytelen(
  6212. const char *p,
  6213. int nByte,
  6214. int nChar
  6215. ){
  6216. int n = 0;
  6217. int i;
  6218. for(i=0; i<nChar; i++){
  6219. if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
  6220. if( (unsigned char)p[n++]>=0xc0 ){
  6221. if( n>=nByte ) return 0;
  6222. while( (p[n] & 0xc0)==0x80 ){
  6223. n++;
  6224. if( n>=nByte ){
  6225. if( i+1==nChar ) break;
  6226. return 0;
  6227. }
  6228. }
  6229. }
  6230. }
  6231. return n;
  6232. }
  6233. /*
  6234. ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
  6235. ** unicode characters in the string.
  6236. */
  6237. static int fts5IndexCharlen(const char *pIn, int nIn){
  6238. int nChar = 0;
  6239. int i = 0;
  6240. while( i<nIn ){
  6241. if( (unsigned char)pIn[i++]>=0xc0 ){
  6242. while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
  6243. }
  6244. nChar++;
  6245. }
  6246. return nChar;
  6247. }
  6248. /*
  6249. ** Insert or remove data to or from the index. Each time a document is
  6250. ** added to or removed from the index, this function is called one or more
  6251. ** times.
  6252. **
  6253. ** For an insert, it must be called once for each token in the new document.
  6254. ** If the operation is a delete, it must be called (at least) once for each
  6255. ** unique token in the document with an iCol value less than zero. The iPos
  6256. ** argument is ignored for a delete.
  6257. */
  6258. int sqlite3Fts5IndexWrite(
  6259. Fts5Index *p, /* Index to write to */
  6260. int iCol, /* Column token appears in (-ve -> delete) */
  6261. int iPos, /* Position of token within column */
  6262. const char *pToken, int nToken /* Token to add or remove to or from index */
  6263. ){
  6264. int i; /* Used to iterate through indexes */
  6265. int rc = SQLITE_OK; /* Return code */
  6266. Fts5Config *pConfig = p->pConfig;
  6267. assert( p->rc==SQLITE_OK );
  6268. assert( (iCol<0)==p->bDelete );
  6269. /* Add the entry to the main terms index. */
  6270. rc = sqlite3Fts5HashWrite(
  6271. p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
  6272. );
  6273. for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
  6274. const int nChar = pConfig->aPrefix[i];
  6275. int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
  6276. if( nByte ){
  6277. rc = sqlite3Fts5HashWrite(p->pHash,
  6278. p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
  6279. nByte
  6280. );
  6281. }
  6282. }
  6283. return rc;
  6284. }
  6285. /*
  6286. ** pToken points to a buffer of size nToken bytes containing a search
  6287. ** term, including the index number at the start, used on a tokendata=1
  6288. ** table. This function returns true if the term in buffer pBuf matches
  6289. ** token pToken/nToken.
  6290. */
  6291. static int fts5IsTokendataPrefix(
  6292. Fts5Buffer *pBuf,
  6293. const u8 *pToken,
  6294. int nToken
  6295. ){
  6296. return (
  6297. pBuf->n>=nToken
  6298. && 0==memcmp(pBuf->p, pToken, nToken)
  6299. && (pBuf->n==nToken || pBuf->p[nToken]==0x00)
  6300. );
  6301. }
  6302. /*
  6303. ** Ensure the segment-iterator passed as the only argument points to EOF.
  6304. */
  6305. static void fts5SegIterSetEOF(Fts5SegIter *pSeg){
  6306. fts5DataRelease(pSeg->pLeaf);
  6307. pSeg->pLeaf = 0;
  6308. }
  6309. static void fts5IterClose(Fts5IndexIter *pIndexIter){
  6310. if( pIndexIter ){
  6311. Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  6312. Fts5Index *pIndex = pIter->pIndex;
  6313. fts5TokendataIterDelete(pIter->pTokenDataIter);
  6314. fts5MultiIterFree(pIter);
  6315. fts5IndexCloseReader(pIndex);
  6316. }
  6317. }
  6318. /*
  6319. ** This function appends iterator pAppend to Fts5TokenDataIter pIn and
  6320. ** returns the result.
  6321. */
  6322. static Fts5TokenDataIter *fts5AppendTokendataIter(
  6323. Fts5Index *p, /* Index object (for error code) */
  6324. Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */
  6325. Fts5Iter *pAppend /* Append this iterator */
  6326. ){
  6327. Fts5TokenDataIter *pRet = pIn;
  6328. if( p->rc==SQLITE_OK ){
  6329. if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){
  6330. int nAlloc = pIn ? pIn->nIterAlloc*2 : 16;
  6331. int nByte = SZ_FTS5TOKENDATAITER(nAlloc+1);
  6332. Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte);
  6333. if( pNew==0 ){
  6334. p->rc = SQLITE_NOMEM;
  6335. }else{
  6336. if( pIn==0 ) memset(pNew, 0, nByte);
  6337. pRet = pNew;
  6338. pNew->nIterAlloc = nAlloc;
  6339. }
  6340. }
  6341. }
  6342. if( p->rc ){
  6343. fts5IterClose((Fts5IndexIter*)pAppend);
  6344. }else{
  6345. pRet->apIter[pRet->nIter++] = pAppend;
  6346. }
  6347. assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc );
  6348. return pRet;
  6349. }
  6350. /*
  6351. ** The iterator passed as the only argument must be a tokendata=1 iterator
  6352. ** (pIter->pTokenDataIter!=0). This function sets the iterator output
  6353. ** variables (pIter->base.*) according to the contents of the current
  6354. ** row.
  6355. */
  6356. static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){
  6357. int ii;
  6358. int nHit = 0;
  6359. i64 iRowid = SMALLEST_INT64;
  6360. int iMin = 0;
  6361. Fts5TokenDataIter *pT = pIter->pTokenDataIter;
  6362. pIter->base.nData = 0;
  6363. pIter->base.pData = 0;
  6364. for(ii=0; ii<pT->nIter; ii++){
  6365. Fts5Iter *p = pT->apIter[ii];
  6366. if( p->base.bEof==0 ){
  6367. if( nHit==0 || p->base.iRowid<iRowid ){
  6368. iRowid = p->base.iRowid;
  6369. nHit = 1;
  6370. pIter->base.pData = p->base.pData;
  6371. pIter->base.nData = p->base.nData;
  6372. iMin = ii;
  6373. }else if( p->base.iRowid==iRowid ){
  6374. nHit++;
  6375. }
  6376. }
  6377. }
  6378. if( nHit==0 ){
  6379. pIter->base.bEof = 1;
  6380. }else{
  6381. int eDetail = pIter->pIndex->pConfig->eDetail;
  6382. pIter->base.bEof = 0;
  6383. pIter->base.iRowid = iRowid;
  6384. if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){
  6385. fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, 0, iRowid, -1);
  6386. }else
  6387. if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){
  6388. int nReader = 0;
  6389. int nByte = 0;
  6390. i64 iPrev = 0;
  6391. /* Allocate array of iterators if they are not already allocated. */
  6392. if( pT->aPoslistReader==0 ){
  6393. pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero(
  6394. &pIter->pIndex->rc,
  6395. pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int))
  6396. );
  6397. if( pT->aPoslistReader==0 ) return;
  6398. pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter];
  6399. }
  6400. /* Populate an iterator for each poslist that will be merged */
  6401. for(ii=0; ii<pT->nIter; ii++){
  6402. Fts5Iter *p = pT->apIter[ii];
  6403. if( iRowid==p->base.iRowid ){
  6404. pT->aPoslistToIter[nReader] = ii;
  6405. sqlite3Fts5PoslistReaderInit(
  6406. p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++]
  6407. );
  6408. nByte += p->base.nData;
  6409. }
  6410. }
  6411. /* Ensure the output buffer is large enough */
  6412. if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10) ){
  6413. return;
  6414. }
  6415. /* Ensure the token-mapping is large enough */
  6416. if( eDetail==FTS5_DETAIL_FULL && pT->nMapAlloc<(pT->nMap + nByte) ){
  6417. int nNew = (pT->nMapAlloc + nByte) * 2;
  6418. Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_realloc(
  6419. pT->aMap, nNew*sizeof(Fts5TokenDataMap)
  6420. );
  6421. if( aNew==0 ){
  6422. pIter->pIndex->rc = SQLITE_NOMEM;
  6423. return;
  6424. }
  6425. pT->aMap = aNew;
  6426. pT->nMapAlloc = nNew;
  6427. }
  6428. pIter->poslist.n = 0;
  6429. while( 1 ){
  6430. i64 iMinPos = LARGEST_INT64;
  6431. /* Find smallest position */
  6432. iMin = 0;
  6433. for(ii=0; ii<nReader; ii++){
  6434. Fts5PoslistReader *pReader = &pT->aPoslistReader[ii];
  6435. if( pReader->bEof==0 ){
  6436. if( pReader->iPos<iMinPos ){
  6437. iMinPos = pReader->iPos;
  6438. iMin = ii;
  6439. }
  6440. }
  6441. }
  6442. /* If all readers were at EOF, break out of the loop. */
  6443. if( iMinPos==LARGEST_INT64 ) break;
  6444. sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos);
  6445. sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]);
  6446. if( eDetail==FTS5_DETAIL_FULL ){
  6447. pT->aMap[pT->nMap].iPos = iMinPos;
  6448. pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin];
  6449. pT->aMap[pT->nMap].iRowid = iRowid;
  6450. pT->nMap++;
  6451. }
  6452. }
  6453. pIter->base.pData = pIter->poslist.p;
  6454. pIter->base.nData = pIter->poslist.n;
  6455. }
  6456. }
  6457. }
  6458. /*
  6459. ** The iterator passed as the only argument must be a tokendata=1 iterator
  6460. ** (pIter->pTokenDataIter!=0). This function advances the iterator. If
  6461. ** argument bFrom is false, then the iterator is advanced to the next
  6462. ** entry. Or, if bFrom is true, it is advanced to the first entry with
  6463. ** a rowid of iFrom or greater.
  6464. */
  6465. static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){
  6466. int ii;
  6467. Fts5TokenDataIter *pT = pIter->pTokenDataIter;
  6468. Fts5Index *pIndex = pIter->pIndex;
  6469. for(ii=0; ii<pT->nIter; ii++){
  6470. Fts5Iter *p = pT->apIter[ii];
  6471. if( p->base.bEof==0
  6472. && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom))
  6473. ){
  6474. fts5MultiIterNext(pIndex, p, bFrom, iFrom);
  6475. while( bFrom && p->base.bEof==0
  6476. && p->base.iRowid<iFrom
  6477. && pIndex->rc==SQLITE_OK
  6478. ){
  6479. fts5MultiIterNext(pIndex, p, 0, 0);
  6480. }
  6481. }
  6482. }
  6483. if( pIndex->rc==SQLITE_OK ){
  6484. fts5IterSetOutputsTokendata(pIter);
  6485. }
  6486. }
  6487. /*
  6488. ** If the segment-iterator passed as the first argument is at EOF, then
  6489. ** set pIter->term to a copy of buffer pTerm.
  6490. */
  6491. static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){
  6492. if( pIter && pIter->aSeg[0].pLeaf==0 ){
  6493. fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p);
  6494. }
  6495. }
  6496. /*
  6497. ** This function sets up an iterator to use for a non-prefix query on a
  6498. ** tokendata=1 table.
  6499. */
  6500. static Fts5Iter *fts5SetupTokendataIter(
  6501. Fts5Index *p, /* FTS index to query */
  6502. const u8 *pToken, /* Buffer containing query term */
  6503. int nToken, /* Size of buffer pToken in bytes */
  6504. Fts5Colset *pColset /* Colset to filter on */
  6505. ){
  6506. Fts5Iter *pRet = 0;
  6507. Fts5TokenDataIter *pSet = 0;
  6508. Fts5Structure *pStruct = 0;
  6509. const int flags = FTS5INDEX_QUERY_SCANONETERM | FTS5INDEX_QUERY_SCAN;
  6510. Fts5Buffer bSeek = {0, 0, 0};
  6511. Fts5Buffer *pSmall = 0;
  6512. fts5IndexFlush(p);
  6513. pStruct = fts5StructureRead(p);
  6514. while( p->rc==SQLITE_OK ){
  6515. Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0;
  6516. Fts5Iter *pNew = 0;
  6517. Fts5SegIter *pNewIter = 0;
  6518. Fts5SegIter *pPrevIter = 0;
  6519. int iLvl, iSeg, ii;
  6520. pNew = fts5MultiIterAlloc(p, pStruct->nSegment);
  6521. if( pSmall ){
  6522. fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p);
  6523. fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0");
  6524. }else{
  6525. fts5BufferSet(&p->rc, &bSeek, nToken, pToken);
  6526. }
  6527. if( p->rc ){
  6528. fts5IterClose((Fts5IndexIter*)pNew);
  6529. break;
  6530. }
  6531. pNewIter = &pNew->aSeg[0];
  6532. pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0);
  6533. for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
  6534. for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
  6535. Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
  6536. int bDone = 0;
  6537. if( pPrevIter ){
  6538. if( fts5BufferCompare(pSmall, &pPrevIter->term) ){
  6539. memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter));
  6540. memset(pPrevIter, 0, sizeof(Fts5SegIter));
  6541. bDone = 1;
  6542. }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){
  6543. fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter);
  6544. bDone = 1;
  6545. }
  6546. }
  6547. if( bDone==0 ){
  6548. fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter);
  6549. }
  6550. if( pPrevIter ){
  6551. if( pPrevIter->pTombArray ){
  6552. pNewIter->pTombArray = pPrevIter->pTombArray;
  6553. pNewIter->pTombArray->nRef++;
  6554. }
  6555. }else{
  6556. fts5SegIterAllocTombstone(p, pNewIter);
  6557. }
  6558. pNewIter++;
  6559. if( pPrevIter ) pPrevIter++;
  6560. if( p->rc ) break;
  6561. }
  6562. }
  6563. fts5TokendataSetTermIfEof(pPrev, pSmall);
  6564. pNew->bSkipEmpty = 1;
  6565. pNew->pColset = pColset;
  6566. fts5IterSetOutputCb(&p->rc, pNew);
  6567. /* Loop through all segments in the new iterator. Find the smallest
  6568. ** term that any segment-iterator points to. Iterator pNew will be
  6569. ** used for this term. Also, set any iterator that points to a term that
  6570. ** does not match pToken/nToken to point to EOF */
  6571. pSmall = 0;
  6572. for(ii=0; ii<pNew->nSeg; ii++){
  6573. Fts5SegIter *pII = &pNew->aSeg[ii];
  6574. if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){
  6575. fts5SegIterSetEOF(pII);
  6576. }
  6577. if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){
  6578. pSmall = &pII->term;
  6579. }
  6580. }
  6581. /* If pSmall is still NULL at this point, then the new iterator does
  6582. ** not point to any terms that match the query. So delete it and break
  6583. ** out of the loop - all required iterators have been collected. */
  6584. if( pSmall==0 ){
  6585. fts5IterClose((Fts5IndexIter*)pNew);
  6586. break;
  6587. }
  6588. /* Append this iterator to the set and continue. */
  6589. pSet = fts5AppendTokendataIter(p, pSet, pNew);
  6590. }
  6591. if( p->rc==SQLITE_OK && pSet ){
  6592. int ii;
  6593. for(ii=0; ii<pSet->nIter; ii++){
  6594. Fts5Iter *pIter = pSet->apIter[ii];
  6595. int iSeg;
  6596. for(iSeg=0; iSeg<pIter->nSeg; iSeg++){
  6597. pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM;
  6598. }
  6599. fts5MultiIterFinishSetup(p, pIter);
  6600. }
  6601. }
  6602. if( p->rc==SQLITE_OK ){
  6603. pRet = fts5MultiIterAlloc(p, 0);
  6604. }
  6605. if( pRet ){
  6606. pRet->nSeg = 0;
  6607. pRet->pTokenDataIter = pSet;
  6608. if( pSet ){
  6609. fts5IterSetOutputsTokendata(pRet);
  6610. }else{
  6611. pRet->base.bEof = 1;
  6612. }
  6613. }else{
  6614. fts5TokendataIterDelete(pSet);
  6615. }
  6616. fts5StructureRelease(pStruct);
  6617. fts5BufferFree(&bSeek);
  6618. return pRet;
  6619. }
  6620. /*
  6621. ** Open a new iterator to iterate though all rowid that match the
  6622. ** specified token or token prefix.
  6623. */
  6624. int sqlite3Fts5IndexQuery(
  6625. Fts5Index *p, /* FTS index to query */
  6626. const char *pToken, int nToken, /* Token (or prefix) to query for */
  6627. int flags, /* Mask of FTS5INDEX_QUERY_X flags */
  6628. Fts5Colset *pColset, /* Match these columns only */
  6629. Fts5IndexIter **ppIter /* OUT: New iterator object */
  6630. ){
  6631. Fts5Config *pConfig = p->pConfig;
  6632. Fts5Iter *pRet = 0;
  6633. Fts5Buffer buf = {0, 0, 0};
  6634. /* If the QUERY_SCAN flag is set, all other flags must be clear. */
  6635. assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
  6636. if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
  6637. int iIdx = 0; /* Index to search */
  6638. int iPrefixIdx = 0; /* +1 prefix index */
  6639. int bTokendata = pConfig->bTokendata;
  6640. assert( buf.p!=0 );
  6641. if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
  6642. /* The NOTOKENDATA flag is set when each token in a tokendata=1 table
  6643. ** should be treated individually, instead of merging all those with
  6644. ** a common prefix into a single entry. This is used, for example, by
  6645. ** queries performed as part of an integrity-check, or by the fts5vocab
  6646. ** module. */
  6647. if( flags & (FTS5INDEX_QUERY_NOTOKENDATA|FTS5INDEX_QUERY_SCAN) ){
  6648. bTokendata = 0;
  6649. }
  6650. /* Figure out which index to search and set iIdx accordingly. If this
  6651. ** is a prefix query for which there is no prefix index, set iIdx to
  6652. ** greater than pConfig->nPrefix to indicate that the query will be
  6653. ** satisfied by scanning multiple terms in the main index.
  6654. **
  6655. ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
  6656. ** prefix-query. Instead of using a prefix-index (if one exists),
  6657. ** evaluate the prefix query using the main FTS index. This is used
  6658. ** for internal sanity checking by the integrity-check in debug
  6659. ** mode only. */
  6660. #ifdef SQLITE_DEBUG
  6661. if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
  6662. assert( flags & FTS5INDEX_QUERY_PREFIX );
  6663. iIdx = 1+pConfig->nPrefix;
  6664. }else
  6665. #endif
  6666. if( flags & FTS5INDEX_QUERY_PREFIX ){
  6667. int nChar = fts5IndexCharlen(pToken, nToken);
  6668. for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
  6669. int nIdxChar = pConfig->aPrefix[iIdx-1];
  6670. if( nIdxChar==nChar ) break;
  6671. if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
  6672. }
  6673. }
  6674. if( bTokendata && iIdx==0 ){
  6675. buf.p[0] = FTS5_MAIN_PREFIX;
  6676. pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset);
  6677. }else if( iIdx<=pConfig->nPrefix ){
  6678. /* Straight index lookup */
  6679. Fts5Structure *pStruct = fts5StructureRead(p);
  6680. buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
  6681. if( pStruct ){
  6682. fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
  6683. pColset, buf.p, nToken+1, -1, 0, &pRet
  6684. );
  6685. fts5StructureRelease(pStruct);
  6686. }
  6687. }else{
  6688. /* Scan multiple terms in the main index for a prefix query. */
  6689. int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
  6690. fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
  6691. if( pRet==0 ){
  6692. assert( p->rc!=SQLITE_OK );
  6693. }else{
  6694. assert( pRet->pColset==0 );
  6695. fts5IterSetOutputCb(&p->rc, pRet);
  6696. if( p->rc==SQLITE_OK ){
  6697. Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
  6698. if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
  6699. }
  6700. }
  6701. }
  6702. if( p->rc ){
  6703. fts5IterClose((Fts5IndexIter*)pRet);
  6704. pRet = 0;
  6705. fts5IndexCloseReader(p);
  6706. }
  6707. *ppIter = (Fts5IndexIter*)pRet;
  6708. sqlite3Fts5BufferFree(&buf);
  6709. }
  6710. return fts5IndexReturn(p);
  6711. }
  6712. /*
  6713. ** Return true if the iterator passed as the only argument is at EOF.
  6714. */
  6715. /*
  6716. ** Move to the next matching rowid.
  6717. */
  6718. int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
  6719. Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  6720. assert( pIter->pIndex->rc==SQLITE_OK );
  6721. if( pIter->nSeg==0 ){
  6722. assert( pIter->pTokenDataIter );
  6723. fts5TokendataIterNext(pIter, 0, 0);
  6724. }else{
  6725. fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
  6726. }
  6727. return fts5IndexReturn(pIter->pIndex);
  6728. }
  6729. /*
  6730. ** Move to the next matching term/rowid. Used by the fts5vocab module.
  6731. */
  6732. int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
  6733. Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  6734. Fts5Index *p = pIter->pIndex;
  6735. assert( pIter->pIndex->rc==SQLITE_OK );
  6736. fts5MultiIterNext(p, pIter, 0, 0);
  6737. if( p->rc==SQLITE_OK ){
  6738. Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  6739. if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
  6740. fts5DataRelease(pSeg->pLeaf);
  6741. pSeg->pLeaf = 0;
  6742. pIter->base.bEof = 1;
  6743. }
  6744. }
  6745. return fts5IndexReturn(pIter->pIndex);
  6746. }
  6747. /*
  6748. ** Move to the next matching rowid that occurs at or after iMatch. The
  6749. ** definition of "at or after" depends on whether this iterator iterates
  6750. ** in ascending or descending rowid order.
  6751. */
  6752. int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
  6753. Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  6754. if( pIter->nSeg==0 ){
  6755. assert( pIter->pTokenDataIter );
  6756. fts5TokendataIterNext(pIter, 1, iMatch);
  6757. }else{
  6758. fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
  6759. }
  6760. return fts5IndexReturn(pIter->pIndex);
  6761. }
  6762. /*
  6763. ** Return the current term.
  6764. */
  6765. const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
  6766. int n;
  6767. const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
  6768. assert_nc( z || n<=1 );
  6769. *pn = n-1;
  6770. return (z ? &z[1] : 0);
  6771. }
  6772. /*
  6773. ** pIter is a prefix query. This function populates pIter->pTokenDataIter
  6774. ** with an Fts5TokenDataIter object containing mappings for all rows
  6775. ** matched by the query.
  6776. */
  6777. static int fts5SetupPrefixIterTokendata(
  6778. Fts5Iter *pIter,
  6779. const char *pToken, /* Token prefix to search for */
  6780. int nToken /* Size of pToken in bytes */
  6781. ){
  6782. Fts5Index *p = pIter->pIndex;
  6783. Fts5Buffer token = {0, 0, 0};
  6784. TokendataSetupCtx ctx;
  6785. memset(&ctx, 0, sizeof(ctx));
  6786. fts5BufferGrow(&p->rc, &token, nToken+1);
  6787. assert( token.p!=0 || p->rc!=SQLITE_OK );
  6788. ctx.pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc,
  6789. SZ_FTS5TOKENDATAITER(1));
  6790. if( p->rc==SQLITE_OK ){
  6791. /* Fill in the token prefix to search for */
  6792. token.p[0] = FTS5_MAIN_PREFIX;
  6793. memcpy(&token.p[1], pToken, nToken);
  6794. token.n = nToken+1;
  6795. fts5VisitEntries(
  6796. p, 0, token.p, token.n, 1, prefixIterSetupTokendataCb, (void*)&ctx
  6797. );
  6798. fts5TokendataIterSortMap(p, ctx.pT);
  6799. }
  6800. if( p->rc==SQLITE_OK ){
  6801. pIter->pTokenDataIter = ctx.pT;
  6802. }else{
  6803. fts5TokendataIterDelete(ctx.pT);
  6804. }
  6805. fts5BufferFree(&token);
  6806. return fts5IndexReturn(p);
  6807. }
  6808. /*
  6809. ** This is used by xInstToken() to access the token at offset iOff, column
  6810. ** iCol of row iRowid. The token is returned via output variables *ppOut
  6811. ** and *pnOut. The iterator passed as the first argument must be a tokendata=1
  6812. ** iterator (pIter->pTokenDataIter!=0).
  6813. **
  6814. ** pToken/nToken:
  6815. */
  6816. int sqlite3Fts5IterToken(
  6817. Fts5IndexIter *pIndexIter,
  6818. const char *pToken, int nToken,
  6819. i64 iRowid,
  6820. int iCol,
  6821. int iOff,
  6822. const char **ppOut, int *pnOut
  6823. ){
  6824. Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  6825. Fts5TokenDataIter *pT = pIter->pTokenDataIter;
  6826. i64 iPos = (((i64)iCol)<<32) + iOff;
  6827. Fts5TokenDataMap *aMap = 0;
  6828. int i1 = 0;
  6829. int i2 = 0;
  6830. int iTest = 0;
  6831. assert( pT || (pToken && pIter->nSeg>0) );
  6832. if( pT==0 ){
  6833. int rc = fts5SetupPrefixIterTokendata(pIter, pToken, nToken);
  6834. if( rc!=SQLITE_OK ) return rc;
  6835. pT = pIter->pTokenDataIter;
  6836. }
  6837. i2 = pT->nMap;
  6838. aMap = pT->aMap;
  6839. while( i2>i1 ){
  6840. iTest = (i1 + i2) / 2;
  6841. if( aMap[iTest].iRowid<iRowid ){
  6842. i1 = iTest+1;
  6843. }else if( aMap[iTest].iRowid>iRowid ){
  6844. i2 = iTest;
  6845. }else{
  6846. if( aMap[iTest].iPos<iPos ){
  6847. if( aMap[iTest].iPos<0 ){
  6848. break;
  6849. }
  6850. i1 = iTest+1;
  6851. }else if( aMap[iTest].iPos>iPos ){
  6852. i2 = iTest;
  6853. }else{
  6854. break;
  6855. }
  6856. }
  6857. }
  6858. if( i2>i1 ){
  6859. if( pIter->nSeg==0 ){
  6860. Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter];
  6861. *ppOut = (const char*)pMap->aSeg[0].term.p+1;
  6862. *pnOut = pMap->aSeg[0].term.n-1;
  6863. }else{
  6864. Fts5TokenDataMap *p = &aMap[iTest];
  6865. *ppOut = (const char*)&pT->terms.p[p->iIter];
  6866. *pnOut = aMap[iTest].nByte;
  6867. }
  6868. }
  6869. return SQLITE_OK;
  6870. }
  6871. /*
  6872. ** Clear any existing entries from the token-map associated with the
  6873. ** iterator passed as the only argument.
  6874. */
  6875. void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){
  6876. Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  6877. if( pIter && pIter->pTokenDataIter
  6878. && (pIter->nSeg==0 || pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_FULL)
  6879. ){
  6880. pIter->pTokenDataIter->nMap = 0;
  6881. }
  6882. }
  6883. /*
  6884. ** Set a token-mapping for the iterator passed as the first argument. This
  6885. ** is used in detail=column or detail=none mode when a token is requested
  6886. ** using the xInstToken() API. In this case the caller tokenizers the
  6887. ** current row and configures the token-mapping via multiple calls to this
  6888. ** function.
  6889. */
  6890. int sqlite3Fts5IndexIterWriteTokendata(
  6891. Fts5IndexIter *pIndexIter,
  6892. const char *pToken, int nToken,
  6893. i64 iRowid, int iCol, int iOff
  6894. ){
  6895. Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
  6896. Fts5TokenDataIter *pT = pIter->pTokenDataIter;
  6897. Fts5Index *p = pIter->pIndex;
  6898. i64 iPos = (((i64)iCol)<<32) + iOff;
  6899. assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL );
  6900. assert( pIter->pTokenDataIter || pIter->nSeg>0 );
  6901. if( pIter->nSeg>0 ){
  6902. /* This is a prefix term iterator. */
  6903. if( pT==0 ){
  6904. pT = (Fts5TokenDataIter*)sqlite3Fts5MallocZero(&p->rc,
  6905. SZ_FTS5TOKENDATAITER(1));
  6906. pIter->pTokenDataIter = pT;
  6907. }
  6908. if( pT ){
  6909. fts5TokendataIterAppendMap(p, pT, pT->terms.n, nToken, iRowid, iPos);
  6910. fts5BufferAppendBlob(&p->rc, &pT->terms, nToken, (const u8*)pToken);
  6911. }
  6912. }else{
  6913. int ii;
  6914. for(ii=0; ii<pT->nIter; ii++){
  6915. Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term;
  6916. if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break;
  6917. }
  6918. if( ii<pT->nIter ){
  6919. fts5TokendataIterAppendMap(p, pT, ii, 0, iRowid, iPos);
  6920. }
  6921. }
  6922. return fts5IndexReturn(p);
  6923. }
  6924. /*
  6925. ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
  6926. */
  6927. void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
  6928. if( pIndexIter ){
  6929. Fts5Index *pIndex = ((Fts5Iter*)pIndexIter)->pIndex;
  6930. fts5IterClose(pIndexIter);
  6931. fts5IndexReturn(pIndex);
  6932. }
  6933. }
  6934. /*
  6935. ** Read and decode the "averages" record from the database.
  6936. **
  6937. ** Parameter anSize must point to an array of size nCol, where nCol is
  6938. ** the number of user defined columns in the FTS table.
  6939. */
  6940. int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
  6941. int nCol = p->pConfig->nCol;
  6942. Fts5Data *pData;
  6943. *pnRow = 0;
  6944. memset(anSize, 0, sizeof(i64) * nCol);
  6945. pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
  6946. if( p->rc==SQLITE_OK && pData->nn ){
  6947. int i = 0;
  6948. int iCol;
  6949. i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
  6950. for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
  6951. i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
  6952. }
  6953. }
  6954. fts5DataRelease(pData);
  6955. return fts5IndexReturn(p);
  6956. }
  6957. /*
  6958. ** Replace the current "averages" record with the contents of the buffer
  6959. ** supplied as the second argument.
  6960. */
  6961. int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
  6962. assert( p->rc==SQLITE_OK );
  6963. fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
  6964. return fts5IndexReturn(p);
  6965. }
  6966. /*
  6967. ** Return the total number of blocks this module has read from the %_data
  6968. ** table since it was created.
  6969. */
  6970. int sqlite3Fts5IndexReads(Fts5Index *p){
  6971. return p->nRead;
  6972. }
  6973. /*
  6974. ** Set the 32-bit cookie value stored at the start of all structure
  6975. ** records to the value passed as the second argument.
  6976. **
  6977. ** Return SQLITE_OK if successful, or an SQLite error code if an error
  6978. ** occurs.
  6979. */
  6980. int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
  6981. int rc; /* Return code */
  6982. Fts5Config *pConfig = p->pConfig; /* Configuration object */
  6983. u8 aCookie[4]; /* Binary representation of iNew */
  6984. sqlite3_blob *pBlob = 0;
  6985. assert( p->rc==SQLITE_OK );
  6986. sqlite3Fts5Put32(aCookie, iNew);
  6987. rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
  6988. "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
  6989. );
  6990. if( rc==SQLITE_OK ){
  6991. sqlite3_blob_write(pBlob, aCookie, 4, 0);
  6992. rc = sqlite3_blob_close(pBlob);
  6993. }
  6994. return rc;
  6995. }
  6996. int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
  6997. Fts5Structure *pStruct;
  6998. pStruct = fts5StructureRead(p);
  6999. fts5StructureRelease(pStruct);
  7000. return fts5IndexReturn(p);
  7001. }
  7002. /*
  7003. ** Retrieve the origin value that will be used for the segment currently
  7004. ** being accumulated in the in-memory hash table when it is flushed to
  7005. ** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to
  7006. ** the queried value. Or, if an error occurs, an error code is returned
  7007. ** and the final value of (*piOrigin) is undefined.
  7008. */
  7009. int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){
  7010. Fts5Structure *pStruct;
  7011. pStruct = fts5StructureRead(p);
  7012. if( pStruct ){
  7013. *piOrigin = pStruct->nOriginCntr;
  7014. fts5StructureRelease(pStruct);
  7015. }
  7016. return fts5IndexReturn(p);
  7017. }
  7018. /*
  7019. ** Buffer pPg contains a page of a tombstone hash table - one of nPg pages
  7020. ** associated with the same segment. This function adds rowid iRowid to
  7021. ** the hash table. The caller is required to guarantee that there is at
  7022. ** least one free slot on the page.
  7023. **
  7024. ** If parameter bForce is false and the hash table is deemed to be full
  7025. ** (more than half of the slots are occupied), then non-zero is returned
  7026. ** and iRowid not inserted. Or, if bForce is true or if the hash table page
  7027. ** is not full, iRowid is inserted and zero returned.
  7028. */
  7029. static int fts5IndexTombstoneAddToPage(
  7030. Fts5Data *pPg,
  7031. int bForce,
  7032. int nPg,
  7033. u64 iRowid
  7034. ){
  7035. const int szKey = TOMBSTONE_KEYSIZE(pPg);
  7036. const int nSlot = TOMBSTONE_NSLOT(pPg);
  7037. const int nElem = fts5GetU32(&pPg->p[4]);
  7038. int iSlot = (iRowid / nPg) % nSlot;
  7039. int nCollide = nSlot;
  7040. if( szKey==4 && iRowid>0xFFFFFFFF ) return 2;
  7041. if( iRowid==0 ){
  7042. pPg->p[1] = 0x01;
  7043. return 0;
  7044. }
  7045. if( bForce==0 && nElem>=(nSlot/2) ){
  7046. return 1;
  7047. }
  7048. fts5PutU32(&pPg->p[4], nElem+1);
  7049. if( szKey==4 ){
  7050. u32 *aSlot = (u32*)&pPg->p[8];
  7051. while( aSlot[iSlot] ){
  7052. iSlot = (iSlot + 1) % nSlot;
  7053. if( nCollide--==0 ) return 0;
  7054. }
  7055. fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid);
  7056. }else{
  7057. u64 *aSlot = (u64*)&pPg->p[8];
  7058. while( aSlot[iSlot] ){
  7059. iSlot = (iSlot + 1) % nSlot;
  7060. if( nCollide--==0 ) return 0;
  7061. }
  7062. fts5PutU64((u8*)&aSlot[iSlot], iRowid);
  7063. }
  7064. return 0;
  7065. }
  7066. /*
  7067. ** This function attempts to build a new hash containing all the keys
  7068. ** currently in the tombstone hash table for segment pSeg. The new
  7069. ** hash will be stored in the nOut buffers passed in array apOut[].
  7070. ** All pages of the new hash use key-size szKey (4 or 8).
  7071. **
  7072. ** Return 0 if the hash is successfully rebuilt into the nOut pages.
  7073. ** Or non-zero if it is not (because one page became overfull). In this
  7074. ** case the caller should retry with a larger nOut parameter.
  7075. **
  7076. ** Parameter pData1 is page iPg1 of the hash table being rebuilt.
  7077. */
  7078. static int fts5IndexTombstoneRehash(
  7079. Fts5Index *p,
  7080. Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
  7081. Fts5Data *pData1, /* One page of current hash - or NULL */
  7082. int iPg1, /* Which page of the current hash is pData1 */
  7083. int szKey, /* 4 or 8, the keysize */
  7084. int nOut, /* Number of output pages */
  7085. Fts5Data **apOut /* Array of output hash pages */
  7086. ){
  7087. int ii;
  7088. int res = 0;
  7089. /* Initialize the headers of all the output pages */
  7090. for(ii=0; ii<nOut; ii++){
  7091. apOut[ii]->p[0] = szKey;
  7092. fts5PutU32(&apOut[ii]->p[4], 0);
  7093. }
  7094. /* Loop through the current pages of the hash table. */
  7095. for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){
  7096. Fts5Data *pData = 0; /* Page ii of the current hash table */
  7097. Fts5Data *pFree = 0; /* Free this at the end of the loop */
  7098. if( iPg1==ii ){
  7099. pData = pData1;
  7100. }else{
  7101. pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii));
  7102. }
  7103. if( pData ){
  7104. int szKeyIn = TOMBSTONE_KEYSIZE(pData);
  7105. int nSlotIn = (pData->nn - 8) / szKeyIn;
  7106. int iIn;
  7107. for(iIn=0; iIn<nSlotIn; iIn++){
  7108. u64 iVal = 0;
  7109. /* Read the value from slot iIn of the input page into iVal. */
  7110. if( szKeyIn==4 ){
  7111. u32 *aSlot = (u32*)&pData->p[8];
  7112. if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]);
  7113. }else{
  7114. u64 *aSlot = (u64*)&pData->p[8];
  7115. if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]);
  7116. }
  7117. /* If iVal is not 0 at this point, insert it into the new hash table */
  7118. if( iVal ){
  7119. Fts5Data *pPg = apOut[(iVal % nOut)];
  7120. res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal);
  7121. if( res ) break;
  7122. }
  7123. }
  7124. /* If this is page 0 of the old hash, copy the rowid-0-flag from the
  7125. ** old hash to the new. */
  7126. if( ii==0 ){
  7127. apOut[0]->p[1] = pData->p[1];
  7128. }
  7129. }
  7130. fts5DataRelease(pFree);
  7131. }
  7132. return res;
  7133. }
  7134. /*
  7135. ** This is called to rebuild the hash table belonging to segment pSeg.
  7136. ** If parameter pData1 is not NULL, then one page of the existing hash table
  7137. ** has already been loaded - pData1, which is page iPg1. The key-size for
  7138. ** the new hash table is szKey (4 or 8).
  7139. **
  7140. ** If successful, the new hash table is not written to disk. Instead,
  7141. ** output parameter (*pnOut) is set to the number of pages in the new
  7142. ** hash table, and (*papOut) to point to an array of buffers containing
  7143. ** the new page data.
  7144. **
  7145. ** If an error occurs, an error code is left in the Fts5Index object and
  7146. ** both output parameters set to 0 before returning.
  7147. */
  7148. static void fts5IndexTombstoneRebuild(
  7149. Fts5Index *p,
  7150. Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
  7151. Fts5Data *pData1, /* One page of current hash - or NULL */
  7152. int iPg1, /* Which page of the current hash is pData1 */
  7153. int szKey, /* 4 or 8, the keysize */
  7154. int *pnOut, /* OUT: Number of output pages */
  7155. Fts5Data ***papOut /* OUT: Output hash pages */
  7156. ){
  7157. const int MINSLOT = 32;
  7158. int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey);
  7159. int nSlot = 0; /* Number of slots in each output page */
  7160. int nOut = 0;
  7161. /* Figure out how many output pages (nOut) and how many slots per
  7162. ** page (nSlot). There are three possibilities:
  7163. **
  7164. ** 1. The hash table does not yet exist. In this case the new hash
  7165. ** table will consist of a single page with MINSLOT slots.
  7166. **
  7167. ** 2. The hash table exists but is currently a single page. In this
  7168. ** case an attempt is made to grow the page to accommodate the new
  7169. ** entry. The page is allowed to grow up to nSlotPerPage (see above)
  7170. ** slots.
  7171. **
  7172. ** 3. The hash table already consists of more than one page, or of
  7173. ** a single page already so large that it cannot be grown. In this
  7174. ** case the new hash consists of (nPg*2+1) pages of nSlotPerPage
  7175. ** slots each, where nPg is the current number of pages in the
  7176. ** hash table.
  7177. */
  7178. if( pSeg->nPgTombstone==0 ){
  7179. /* Case 1. */
  7180. nOut = 1;
  7181. nSlot = MINSLOT;
  7182. }else if( pSeg->nPgTombstone==1 ){
  7183. /* Case 2. */
  7184. int nElem = (int)fts5GetU32(&pData1->p[4]);
  7185. assert( pData1 && iPg1==0 );
  7186. nOut = 1;
  7187. nSlot = MAX(nElem*4, MINSLOT);
  7188. if( nSlot>nSlotPerPage ) nOut = 0;
  7189. }
  7190. if( nOut==0 ){
  7191. /* Case 3. */
  7192. nOut = (pSeg->nPgTombstone * 2 + 1);
  7193. nSlot = nSlotPerPage;
  7194. }
  7195. /* Allocate the required array and output pages */
  7196. while( 1 ){
  7197. int res = 0;
  7198. int ii = 0;
  7199. int szPage = 0;
  7200. Fts5Data **apOut = 0;
  7201. /* Allocate space for the new hash table */
  7202. assert( nSlot>=MINSLOT );
  7203. apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut);
  7204. szPage = 8 + nSlot*szKey;
  7205. for(ii=0; ii<nOut; ii++){
  7206. Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc,
  7207. sizeof(Fts5Data)+szPage
  7208. );
  7209. if( pNew ){
  7210. pNew->nn = szPage;
  7211. pNew->p = (u8*)&pNew[1];
  7212. apOut[ii] = pNew;
  7213. }
  7214. }
  7215. /* Rebuild the hash table. */
  7216. if( p->rc==SQLITE_OK ){
  7217. res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut);
  7218. }
  7219. if( res==0 ){
  7220. if( p->rc ){
  7221. fts5IndexFreeArray(apOut, nOut);
  7222. apOut = 0;
  7223. nOut = 0;
  7224. }
  7225. *pnOut = nOut;
  7226. *papOut = apOut;
  7227. break;
  7228. }
  7229. /* If control flows to here, it was not possible to rebuild the hash
  7230. ** table. Free all buffers and then try again with more pages. */
  7231. assert( p->rc==SQLITE_OK );
  7232. fts5IndexFreeArray(apOut, nOut);
  7233. nSlot = nSlotPerPage;
  7234. nOut = nOut*2 + 1;
  7235. }
  7236. }
  7237. /*
  7238. ** Add a tombstone for rowid iRowid to segment pSeg.
  7239. */
  7240. static void fts5IndexTombstoneAdd(
  7241. Fts5Index *p,
  7242. Fts5StructureSegment *pSeg,
  7243. u64 iRowid
  7244. ){
  7245. Fts5Data *pPg = 0;
  7246. int iPg = -1;
  7247. int szKey = 0;
  7248. int nHash = 0;
  7249. Fts5Data **apHash = 0;
  7250. p->nContentlessDelete++;
  7251. if( pSeg->nPgTombstone>0 ){
  7252. iPg = iRowid % pSeg->nPgTombstone;
  7253. pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg));
  7254. if( pPg==0 ){
  7255. assert( p->rc!=SQLITE_OK );
  7256. return;
  7257. }
  7258. if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){
  7259. fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg), pPg->p, pPg->nn);
  7260. fts5DataRelease(pPg);
  7261. return;
  7262. }
  7263. }
  7264. /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */
  7265. szKey = pPg ? TOMBSTONE_KEYSIZE(pPg) : 4;
  7266. if( iRowid>0xFFFFFFFF ) szKey = 8;
  7267. /* Rebuild the hash table */
  7268. fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash);
  7269. assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) );
  7270. /* If all has succeeded, write the new rowid into one of the new hash
  7271. ** table pages, then write them all out to disk. */
  7272. if( nHash ){
  7273. int ii = 0;
  7274. fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid);
  7275. for(ii=0; ii<nHash; ii++){
  7276. i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii);
  7277. fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn);
  7278. }
  7279. pSeg->nPgTombstone = nHash;
  7280. fts5StructureWrite(p, p->pStruct);
  7281. }
  7282. fts5DataRelease(pPg);
  7283. fts5IndexFreeArray(apHash, nHash);
  7284. }
  7285. /*
  7286. ** Add iRowid to the tombstone list of the segment or segments that contain
  7287. ** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite
  7288. ** error code otherwise.
  7289. */
  7290. int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){
  7291. Fts5Structure *pStruct;
  7292. pStruct = fts5StructureRead(p);
  7293. if( pStruct ){
  7294. int bFound = 0; /* True after pSeg->nEntryTombstone incr. */
  7295. int iLvl;
  7296. for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
  7297. int iSeg;
  7298. for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
  7299. Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
  7300. if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){
  7301. if( bFound==0 ){
  7302. pSeg->nEntryTombstone++;
  7303. bFound = 1;
  7304. }
  7305. fts5IndexTombstoneAdd(p, pSeg, iRowid);
  7306. }
  7307. }
  7308. }
  7309. fts5StructureRelease(pStruct);
  7310. }
  7311. return fts5IndexReturn(p);
  7312. }
  7313. /*************************************************************************
  7314. **************************************************************************
  7315. ** Below this point is the implementation of the integrity-check
  7316. ** functionality.
  7317. */
  7318. /*
  7319. ** Return a simple checksum value based on the arguments.
  7320. */
  7321. u64 sqlite3Fts5IndexEntryCksum(
  7322. i64 iRowid,
  7323. int iCol,
  7324. int iPos,
  7325. int iIdx,
  7326. const char *pTerm,
  7327. int nTerm
  7328. ){
  7329. int i;
  7330. u64 ret = iRowid;
  7331. ret += (ret<<3) + iCol;
  7332. ret += (ret<<3) + iPos;
  7333. if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
  7334. for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
  7335. return ret;
  7336. }
  7337. #ifdef SQLITE_DEBUG
  7338. /*
  7339. ** This function is purely an internal test. It does not contribute to
  7340. ** FTS functionality, or even the integrity-check, in any way.
  7341. **
  7342. ** Instead, it tests that the same set of pgno/rowid combinations are
  7343. ** visited regardless of whether the doclist-index identified by parameters
  7344. ** iSegid/iLeaf is iterated in forwards or reverse order.
  7345. */
  7346. static void fts5TestDlidxReverse(
  7347. Fts5Index *p,
  7348. int iSegid, /* Segment id to load from */
  7349. int iLeaf /* Load doclist-index for this leaf */
  7350. ){
  7351. Fts5DlidxIter *pDlidx = 0;
  7352. u64 cksum1 = 13;
  7353. u64 cksum2 = 13;
  7354. for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
  7355. fts5DlidxIterEof(p, pDlidx)==0;
  7356. fts5DlidxIterNext(p, pDlidx)
  7357. ){
  7358. i64 iRowid = fts5DlidxIterRowid(pDlidx);
  7359. int pgno = fts5DlidxIterPgno(pDlidx);
  7360. assert( pgno>iLeaf );
  7361. cksum1 += iRowid + ((i64)pgno<<32);
  7362. }
  7363. fts5DlidxIterFree(pDlidx);
  7364. pDlidx = 0;
  7365. for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
  7366. fts5DlidxIterEof(p, pDlidx)==0;
  7367. fts5DlidxIterPrev(p, pDlidx)
  7368. ){
  7369. i64 iRowid = fts5DlidxIterRowid(pDlidx);
  7370. int pgno = fts5DlidxIterPgno(pDlidx);
  7371. assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
  7372. cksum2 += iRowid + ((i64)pgno<<32);
  7373. }
  7374. fts5DlidxIterFree(pDlidx);
  7375. pDlidx = 0;
  7376. if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
  7377. }
  7378. static int fts5QueryCksum(
  7379. Fts5Index *p, /* Fts5 index object */
  7380. int iIdx,
  7381. const char *z, /* Index key to query for */
  7382. int n, /* Size of index key in bytes */
  7383. int flags, /* Flags for Fts5IndexQuery */
  7384. u64 *pCksum /* IN/OUT: Checksum value */
  7385. ){
  7386. int eDetail = p->pConfig->eDetail;
  7387. u64 cksum = *pCksum;
  7388. Fts5IndexIter *pIter = 0;
  7389. int rc = sqlite3Fts5IndexQuery(
  7390. p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA), 0, &pIter
  7391. );
  7392. while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
  7393. i64 rowid = pIter->iRowid;
  7394. if( eDetail==FTS5_DETAIL_NONE ){
  7395. cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
  7396. }else{
  7397. Fts5PoslistReader sReader;
  7398. for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
  7399. sReader.bEof==0;
  7400. sqlite3Fts5PoslistReaderNext(&sReader)
  7401. ){
  7402. int iCol = FTS5_POS2COLUMN(sReader.iPos);
  7403. int iOff = FTS5_POS2OFFSET(sReader.iPos);
  7404. cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
  7405. }
  7406. }
  7407. if( rc==SQLITE_OK ){
  7408. rc = sqlite3Fts5IterNext(pIter);
  7409. }
  7410. }
  7411. fts5IterClose(pIter);
  7412. *pCksum = cksum;
  7413. return rc;
  7414. }
  7415. /*
  7416. ** Check if buffer z[], size n bytes, contains as series of valid utf-8
  7417. ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
  7418. ** contain valid utf-8, return non-zero.
  7419. */
  7420. static int fts5TestUtf8(const char *z, int n){
  7421. int i = 0;
  7422. assert_nc( n>0 );
  7423. while( i<n ){
  7424. if( (z[i] & 0x80)==0x00 ){
  7425. i++;
  7426. }else
  7427. if( (z[i] & 0xE0)==0xC0 ){
  7428. if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
  7429. i += 2;
  7430. }else
  7431. if( (z[i] & 0xF0)==0xE0 ){
  7432. if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
  7433. i += 3;
  7434. }else
  7435. if( (z[i] & 0xF8)==0xF0 ){
  7436. if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
  7437. if( (z[i+2] & 0xC0)!=0x80 ) return 1;
  7438. i += 3;
  7439. }else{
  7440. return 1;
  7441. }
  7442. }
  7443. return 0;
  7444. }
  7445. /*
  7446. ** This function is also purely an internal test. It does not contribute to
  7447. ** FTS functionality, or even the integrity-check, in any way.
  7448. */
  7449. static void fts5TestTerm(
  7450. Fts5Index *p,
  7451. Fts5Buffer *pPrev, /* Previous term */
  7452. const char *z, int n, /* Possibly new term to test */
  7453. u64 expected,
  7454. u64 *pCksum
  7455. ){
  7456. int rc = p->rc;
  7457. if( pPrev->n==0 ){
  7458. fts5BufferSet(&rc, pPrev, n, (const u8*)z);
  7459. }else
  7460. if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
  7461. u64 cksum3 = *pCksum;
  7462. const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
  7463. int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
  7464. int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
  7465. int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
  7466. u64 ck1 = 0;
  7467. u64 ck2 = 0;
  7468. /* Check that the results returned for ASC and DESC queries are
  7469. ** the same. If not, call this corruption. */
  7470. rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
  7471. if( rc==SQLITE_OK ){
  7472. int f = flags|FTS5INDEX_QUERY_DESC;
  7473. rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
  7474. }
  7475. if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
  7476. /* If this is a prefix query, check that the results returned if the
  7477. ** the index is disabled are the same. In both ASC and DESC order.
  7478. **
  7479. ** This check may only be performed if the hash table is empty. This
  7480. ** is because the hash table only supports a single scan query at
  7481. ** a time, and the multi-iter loop from which this function is called
  7482. ** is already performing such a scan.
  7483. **
  7484. ** Also only do this if buffer zTerm contains nTerm bytes of valid
  7485. ** utf-8. Otherwise, the last part of the buffer contents might contain
  7486. ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
  7487. ** character stored in the main fts index, which will cause the
  7488. ** test to fail. */
  7489. if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
  7490. if( iIdx>0 && rc==SQLITE_OK ){
  7491. int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
  7492. ck2 = 0;
  7493. rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
  7494. if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
  7495. }
  7496. if( iIdx>0 && rc==SQLITE_OK ){
  7497. int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
  7498. ck2 = 0;
  7499. rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
  7500. if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
  7501. }
  7502. }
  7503. cksum3 ^= ck1;
  7504. fts5BufferSet(&rc, pPrev, n, (const u8*)z);
  7505. if( rc==SQLITE_OK && cksum3!=expected ){
  7506. rc = FTS5_CORRUPT;
  7507. }
  7508. *pCksum = cksum3;
  7509. }
  7510. p->rc = rc;
  7511. }
  7512. #else
  7513. # define fts5TestDlidxReverse(x,y,z)
  7514. # define fts5TestTerm(u,v,w,x,y,z)
  7515. #endif
  7516. /*
  7517. ** Check that:
  7518. **
  7519. ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
  7520. ** contain zero terms.
  7521. ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
  7522. ** contain zero rowids.
  7523. */
  7524. static void fts5IndexIntegrityCheckEmpty(
  7525. Fts5Index *p,
  7526. Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
  7527. int iFirst,
  7528. int iNoRowid,
  7529. int iLast
  7530. ){
  7531. int i;
  7532. /* Now check that the iter.nEmpty leaves following the current leaf
  7533. ** (a) exist and (b) contain no terms. */
  7534. for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
  7535. Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
  7536. if( pLeaf ){
  7537. if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
  7538. if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
  7539. }
  7540. fts5DataRelease(pLeaf);
  7541. }
  7542. }
  7543. static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
  7544. i64 iTermOff = 0;
  7545. int ii;
  7546. Fts5Buffer buf1 = {0,0,0};
  7547. Fts5Buffer buf2 = {0,0,0};
  7548. ii = pLeaf->szLeaf;
  7549. while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
  7550. int res;
  7551. i64 iOff;
  7552. int nIncr;
  7553. ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
  7554. iTermOff += nIncr;
  7555. iOff = iTermOff;
  7556. if( iOff>=pLeaf->szLeaf ){
  7557. p->rc = FTS5_CORRUPT;
  7558. }else if( iTermOff==nIncr ){
  7559. int nByte;
  7560. iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
  7561. if( (iOff+nByte)>pLeaf->szLeaf ){
  7562. p->rc = FTS5_CORRUPT;
  7563. }else{
  7564. fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
  7565. }
  7566. }else{
  7567. int nKeep, nByte;
  7568. iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
  7569. iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
  7570. if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
  7571. p->rc = FTS5_CORRUPT;
  7572. }else{
  7573. buf1.n = nKeep;
  7574. fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
  7575. }
  7576. if( p->rc==SQLITE_OK ){
  7577. res = fts5BufferCompare(&buf1, &buf2);
  7578. if( res<=0 ) p->rc = FTS5_CORRUPT;
  7579. }
  7580. }
  7581. fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
  7582. }
  7583. fts5BufferFree(&buf1);
  7584. fts5BufferFree(&buf2);
  7585. }
  7586. static void fts5IndexIntegrityCheckSegment(
  7587. Fts5Index *p, /* FTS5 backend object */
  7588. Fts5StructureSegment *pSeg /* Segment to check internal consistency */
  7589. ){
  7590. Fts5Config *pConfig = p->pConfig;
  7591. int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE);
  7592. sqlite3_stmt *pStmt = 0;
  7593. int rc2;
  7594. int iIdxPrevLeaf = pSeg->pgnoFirst-1;
  7595. int iDlidxPrevLeaf = pSeg->pgnoLast;
  7596. if( pSeg->pgnoFirst==0 ) return;
  7597. fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
  7598. "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
  7599. "ORDER BY 1, 2",
  7600. pConfig->zDb, pConfig->zName, pSeg->iSegid
  7601. ));
  7602. /* Iterate through the b-tree hierarchy. */
  7603. while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
  7604. i64 iRow; /* Rowid for this leaf */
  7605. Fts5Data *pLeaf; /* Data for this leaf */
  7606. const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
  7607. int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
  7608. int iIdxLeaf = sqlite3_column_int(pStmt, 2);
  7609. int bIdxDlidx = sqlite3_column_int(pStmt, 3);
  7610. /* If the leaf in question has already been trimmed from the segment,
  7611. ** ignore this b-tree entry. Otherwise, load it into memory. */
  7612. if( iIdxLeaf<pSeg->pgnoFirst ) continue;
  7613. iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
  7614. pLeaf = fts5LeafRead(p, iRow);
  7615. if( pLeaf==0 ) break;
  7616. /* Check that the leaf contains at least one term, and that it is equal
  7617. ** to or larger than the split-key in zIdxTerm. Also check that if there
  7618. ** is also a rowid pointer within the leaf page header, it points to a
  7619. ** location before the term. */
  7620. if( pLeaf->nn<=pLeaf->szLeaf ){
  7621. if( nIdxTerm==0
  7622. && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE
  7623. && pLeaf->nn==pLeaf->szLeaf
  7624. && pLeaf->nn==4
  7625. ){
  7626. /* special case - the very first page in a segment keeps its %_idx
  7627. ** entry even if all the terms are removed from it by secure-delete
  7628. ** operations. */
  7629. }else{
  7630. p->rc = FTS5_CORRUPT;
  7631. }
  7632. }else{
  7633. int iOff; /* Offset of first term on leaf */
  7634. int iRowidOff; /* Offset of first rowid on leaf */
  7635. int nTerm; /* Size of term on leaf in bytes */
  7636. int res; /* Comparison of term and split-key */
  7637. iOff = fts5LeafFirstTermOff(pLeaf);
  7638. iRowidOff = fts5LeafFirstRowidOff(pLeaf);
  7639. if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
  7640. p->rc = FTS5_CORRUPT;
  7641. }else{
  7642. iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
  7643. res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
  7644. if( res==0 ) res = nTerm - nIdxTerm;
  7645. if( res<0 ) p->rc = FTS5_CORRUPT;
  7646. }
  7647. fts5IntegrityCheckPgidx(p, pLeaf);
  7648. }
  7649. fts5DataRelease(pLeaf);
  7650. if( p->rc ) break;
  7651. /* Now check that the iter.nEmpty leaves following the current leaf
  7652. ** (a) exist and (b) contain no terms. */
  7653. fts5IndexIntegrityCheckEmpty(
  7654. p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
  7655. );
  7656. if( p->rc ) break;
  7657. /* If there is a doclist-index, check that it looks right. */
  7658. if( bIdxDlidx ){
  7659. Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
  7660. int iPrevLeaf = iIdxLeaf;
  7661. int iSegid = pSeg->iSegid;
  7662. int iPg = 0;
  7663. i64 iKey;
  7664. for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
  7665. fts5DlidxIterEof(p, pDlidx)==0;
  7666. fts5DlidxIterNext(p, pDlidx)
  7667. ){
  7668. /* Check any rowid-less pages that occur before the current leaf. */
  7669. for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
  7670. iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
  7671. pLeaf = fts5DataRead(p, iKey);
  7672. if( pLeaf ){
  7673. if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
  7674. fts5DataRelease(pLeaf);
  7675. }
  7676. }
  7677. iPrevLeaf = fts5DlidxIterPgno(pDlidx);
  7678. /* Check that the leaf page indicated by the iterator really does
  7679. ** contain the rowid suggested by the same. */
  7680. iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
  7681. pLeaf = fts5DataRead(p, iKey);
  7682. if( pLeaf ){
  7683. i64 iRowid;
  7684. int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
  7685. ASSERT_SZLEAF_OK(pLeaf);
  7686. if( iRowidOff>=pLeaf->szLeaf ){
  7687. p->rc = FTS5_CORRUPT;
  7688. }else if( bSecureDelete==0 || iRowidOff>0 ){
  7689. i64 iDlRowid = fts5DlidxIterRowid(pDlidx);
  7690. fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
  7691. if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){
  7692. p->rc = FTS5_CORRUPT;
  7693. }
  7694. }
  7695. fts5DataRelease(pLeaf);
  7696. }
  7697. }
  7698. iDlidxPrevLeaf = iPg;
  7699. fts5DlidxIterFree(pDlidx);
  7700. fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
  7701. }else{
  7702. iDlidxPrevLeaf = pSeg->pgnoLast;
  7703. /* TODO: Check there is no doclist index */
  7704. }
  7705. iIdxPrevLeaf = iIdxLeaf;
  7706. }
  7707. rc2 = sqlite3_finalize(pStmt);
  7708. if( p->rc==SQLITE_OK ) p->rc = rc2;
  7709. /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
  7710. #if 0
  7711. if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
  7712. p->rc = FTS5_CORRUPT;
  7713. }
  7714. #endif
  7715. }
  7716. /*
  7717. ** Run internal checks to ensure that the FTS index (a) is internally
  7718. ** consistent and (b) contains entries for which the XOR of the checksums
  7719. ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
  7720. **
  7721. ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
  7722. ** checksum does not match. Return SQLITE_OK if all checks pass without
  7723. ** error, or some other SQLite error code if another error (e.g. OOM)
  7724. ** occurs.
  7725. */
  7726. int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
  7727. int eDetail = p->pConfig->eDetail;
  7728. u64 cksum2 = 0; /* Checksum based on contents of indexes */
  7729. Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
  7730. Fts5Iter *pIter; /* Used to iterate through entire index */
  7731. Fts5Structure *pStruct; /* Index structure */
  7732. int iLvl, iSeg;
  7733. #ifdef SQLITE_DEBUG
  7734. /* Used by extra internal tests only run if NDEBUG is not defined */
  7735. u64 cksum3 = 0; /* Checksum based on contents of indexes */
  7736. Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
  7737. #endif
  7738. const int flags = FTS5INDEX_QUERY_NOOUTPUT;
  7739. /* Load the FTS index structure */
  7740. pStruct = fts5StructureRead(p);
  7741. if( pStruct==0 ){
  7742. assert( p->rc!=SQLITE_OK );
  7743. return fts5IndexReturn(p);
  7744. }
  7745. /* Check that the internal nodes of each segment match the leaves */
  7746. for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
  7747. for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
  7748. Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
  7749. fts5IndexIntegrityCheckSegment(p, pSeg);
  7750. }
  7751. }
  7752. /* The cksum argument passed to this function is a checksum calculated
  7753. ** based on all expected entries in the FTS index (including prefix index
  7754. ** entries). This block checks that a checksum calculated based on the
  7755. ** actual contents of FTS index is identical.
  7756. **
  7757. ** Two versions of the same checksum are calculated. The first (stack
  7758. ** variable cksum2) based on entries extracted from the full-text index
  7759. ** while doing a linear scan of each individual index in turn.
  7760. **
  7761. ** As each term visited by the linear scans, a separate query for the
  7762. ** same term is performed. cksum3 is calculated based on the entries
  7763. ** extracted by these queries.
  7764. */
  7765. for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
  7766. fts5MultiIterEof(p, pIter)==0;
  7767. fts5MultiIterNext(p, pIter, 0, 0)
  7768. ){
  7769. int n; /* Size of term in bytes */
  7770. i64 iPos = 0; /* Position read from poslist */
  7771. int iOff = 0; /* Offset within poslist */
  7772. i64 iRowid = fts5MultiIterRowid(pIter);
  7773. char *z = (char*)fts5MultiIterTerm(pIter, &n);
  7774. /* If this is a new term, query for it. Update cksum3 with the results. */
  7775. fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
  7776. if( p->rc ) break;
  7777. if( eDetail==FTS5_DETAIL_NONE ){
  7778. if( 0==fts5MultiIterIsEmpty(p, pIter) ){
  7779. cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
  7780. }
  7781. }else{
  7782. poslist.n = 0;
  7783. fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
  7784. fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
  7785. while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
  7786. int iCol = FTS5_POS2COLUMN(iPos);
  7787. int iTokOff = FTS5_POS2OFFSET(iPos);
  7788. cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
  7789. }
  7790. }
  7791. }
  7792. fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
  7793. fts5MultiIterFree(pIter);
  7794. if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
  7795. fts5StructureRelease(pStruct);
  7796. #ifdef SQLITE_DEBUG
  7797. fts5BufferFree(&term);
  7798. #endif
  7799. fts5BufferFree(&poslist);
  7800. return fts5IndexReturn(p);
  7801. }
  7802. /*************************************************************************
  7803. **************************************************************************
  7804. ** Below this point is the implementation of the fts5_decode() scalar
  7805. ** function only.
  7806. */
  7807. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  7808. /*
  7809. ** Decode a segment-data rowid from the %_data table. This function is
  7810. ** the opposite of macro FTS5_SEGMENT_ROWID().
  7811. */
  7812. static void fts5DecodeRowid(
  7813. i64 iRowid, /* Rowid from %_data table */
  7814. int *pbTombstone, /* OUT: Tombstone hash flag */
  7815. int *piSegid, /* OUT: Segment id */
  7816. int *pbDlidx, /* OUT: Dlidx flag */
  7817. int *piHeight, /* OUT: Height */
  7818. int *piPgno /* OUT: Page number */
  7819. ){
  7820. *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
  7821. iRowid >>= FTS5_DATA_PAGE_B;
  7822. *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
  7823. iRowid >>= FTS5_DATA_HEIGHT_B;
  7824. *pbDlidx = (int)(iRowid & 0x0001);
  7825. iRowid >>= FTS5_DATA_DLI_B;
  7826. *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
  7827. iRowid >>= FTS5_DATA_ID_B;
  7828. *pbTombstone = (int)(iRowid & 0x0001);
  7829. }
  7830. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  7831. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  7832. static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
  7833. int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid components */
  7834. fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
  7835. if( iSegid==0 ){
  7836. if( iKey==FTS5_AVERAGES_ROWID ){
  7837. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
  7838. }else{
  7839. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
  7840. }
  7841. }
  7842. else{
  7843. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}",
  7844. bDlidx ? "dlidx " : "",
  7845. bTomb ? "tombstone " : "",
  7846. iSegid, iHeight, iPgno
  7847. );
  7848. }
  7849. }
  7850. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  7851. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  7852. static void fts5DebugStructure(
  7853. int *pRc, /* IN/OUT: error code */
  7854. Fts5Buffer *pBuf,
  7855. Fts5Structure *p
  7856. ){
  7857. int iLvl, iSeg; /* Iterate through levels, segments */
  7858. for(iLvl=0; iLvl<p->nLevel; iLvl++){
  7859. Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
  7860. sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
  7861. " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
  7862. );
  7863. for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
  7864. Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
  7865. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d",
  7866. pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
  7867. );
  7868. if( pSeg->iOrigin1>0 ){
  7869. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld",
  7870. pSeg->iOrigin1, pSeg->iOrigin2
  7871. );
  7872. }
  7873. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  7874. }
  7875. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  7876. }
  7877. }
  7878. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  7879. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  7880. /*
  7881. ** This is part of the fts5_decode() debugging aid.
  7882. **
  7883. ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
  7884. ** function appends a human-readable representation of the same object
  7885. ** to the buffer passed as the second argument.
  7886. */
  7887. static void fts5DecodeStructure(
  7888. int *pRc, /* IN/OUT: error code */
  7889. Fts5Buffer *pBuf,
  7890. const u8 *pBlob, int nBlob
  7891. ){
  7892. int rc; /* Return code */
  7893. Fts5Structure *p = 0; /* Decoded structure object */
  7894. rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
  7895. if( rc!=SQLITE_OK ){
  7896. *pRc = rc;
  7897. return;
  7898. }
  7899. fts5DebugStructure(pRc, pBuf, p);
  7900. fts5StructureRelease(p);
  7901. }
  7902. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  7903. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  7904. /*
  7905. ** This is part of the fts5_decode() debugging aid.
  7906. **
  7907. ** Arguments pBlob/nBlob contain an "averages" record. This function
  7908. ** appends a human-readable representation of record to the buffer passed
  7909. ** as the second argument.
  7910. */
  7911. static void fts5DecodeAverages(
  7912. int *pRc, /* IN/OUT: error code */
  7913. Fts5Buffer *pBuf,
  7914. const u8 *pBlob, int nBlob
  7915. ){
  7916. int i = 0;
  7917. const char *zSpace = "";
  7918. while( i<nBlob ){
  7919. u64 iVal;
  7920. i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
  7921. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
  7922. zSpace = " ";
  7923. }
  7924. }
  7925. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  7926. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  7927. /*
  7928. ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
  7929. ** each varint and append its string representation to buffer pBuf. Return
  7930. ** after either the input buffer is exhausted or a 0 value is read.
  7931. **
  7932. ** The return value is the number of bytes read from the input buffer.
  7933. */
  7934. static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  7935. int iOff = 0;
  7936. while( iOff<n ){
  7937. int iVal;
  7938. iOff += fts5GetVarint32(&a[iOff], iVal);
  7939. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
  7940. }
  7941. return iOff;
  7942. }
  7943. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  7944. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  7945. /*
  7946. ** The start of buffer (a/n) contains the start of a doclist. The doclist
  7947. ** may or may not finish within the buffer. This function appends a text
  7948. ** representation of the part of the doclist that is present to buffer
  7949. ** pBuf.
  7950. **
  7951. ** The return value is the number of bytes read from the input buffer.
  7952. */
  7953. static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  7954. i64 iDocid = 0;
  7955. int iOff = 0;
  7956. if( n>0 ){
  7957. iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
  7958. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
  7959. }
  7960. while( iOff<n ){
  7961. int nPos;
  7962. int bDel;
  7963. iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
  7964. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
  7965. iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
  7966. if( iOff<n ){
  7967. i64 iDelta;
  7968. iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
  7969. iDocid += iDelta;
  7970. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
  7971. }
  7972. }
  7973. return iOff;
  7974. }
  7975. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  7976. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  7977. /*
  7978. ** This function is part of the fts5_decode() debugging function. It is
  7979. ** only ever used with detail=none tables.
  7980. **
  7981. ** Buffer (pData/nData) contains a doclist in the format used by detail=none
  7982. ** tables. This function appends a human-readable version of that list to
  7983. ** buffer pBuf.
  7984. **
  7985. ** If *pRc is other than SQLITE_OK when this function is called, it is a
  7986. ** no-op. If an OOM or other error occurs within this function, *pRc is
  7987. ** set to an SQLite error code before returning. The final state of buffer
  7988. ** pBuf is undefined in this case.
  7989. */
  7990. static void fts5DecodeRowidList(
  7991. int *pRc, /* IN/OUT: Error code */
  7992. Fts5Buffer *pBuf, /* Buffer to append text to */
  7993. const u8 *pData, int nData /* Data to decode list-of-rowids from */
  7994. ){
  7995. int i = 0;
  7996. i64 iRowid = 0;
  7997. while( i<nData ){
  7998. const char *zApp = "";
  7999. u64 iVal;
  8000. i += sqlite3Fts5GetVarint(&pData[i], &iVal);
  8001. iRowid += iVal;
  8002. if( i<nData && pData[i]==0x00 ){
  8003. i++;
  8004. if( i<nData && pData[i]==0x00 ){
  8005. i++;
  8006. zApp = "+";
  8007. }else{
  8008. zApp = "*";
  8009. }
  8010. }
  8011. sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
  8012. }
  8013. }
  8014. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  8015. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  8016. static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){
  8017. int ii;
  8018. fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1);
  8019. if( *pRc==SQLITE_OK ){
  8020. for(ii=0; ii<pTerm->n; ii++){
  8021. if( pTerm->p[ii]==0x00 ){
  8022. pBuf->p[pBuf->n++] = '\\';
  8023. pBuf->p[pBuf->n++] = '0';
  8024. }else{
  8025. pBuf->p[pBuf->n++] = pTerm->p[ii];
  8026. }
  8027. }
  8028. pBuf->p[pBuf->n] = 0x00;
  8029. }
  8030. }
  8031. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  8032. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  8033. /*
  8034. ** The implementation of user-defined scalar function fts5_decode().
  8035. */
  8036. static void fts5DecodeFunction(
  8037. sqlite3_context *pCtx, /* Function call context */
  8038. int nArg, /* Number of args (always 2) */
  8039. sqlite3_value **apVal /* Function arguments */
  8040. ){
  8041. i64 iRowid; /* Rowid for record being decoded */
  8042. int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
  8043. int bTomb;
  8044. const u8 *aBlob; int n; /* Record to decode */
  8045. u8 *a = 0;
  8046. Fts5Buffer s; /* Build up text to return here */
  8047. int rc = SQLITE_OK; /* Return code */
  8048. sqlite3_int64 nSpace = 0;
  8049. int eDetailNone = (sqlite3_user_data(pCtx)!=0);
  8050. assert( nArg==2 );
  8051. UNUSED_PARAM(nArg);
  8052. memset(&s, 0, sizeof(Fts5Buffer));
  8053. iRowid = sqlite3_value_int64(apVal[0]);
  8054. /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
  8055. ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
  8056. ** buffer overreads even if the record is corrupt. */
  8057. n = sqlite3_value_bytes(apVal[1]);
  8058. aBlob = sqlite3_value_blob(apVal[1]);
  8059. nSpace = ((i64)n) + FTS5_DATA_ZERO_PADDING;
  8060. a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  8061. if( a==0 ) goto decode_out;
  8062. if( n>0 ) memcpy(a, aBlob, n);
  8063. fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
  8064. fts5DebugRowid(&rc, &s, iRowid);
  8065. if( bDlidx ){
  8066. Fts5Data dlidx;
  8067. Fts5DlidxLvl lvl;
  8068. dlidx.p = a;
  8069. dlidx.nn = n;
  8070. memset(&lvl, 0, sizeof(Fts5DlidxLvl));
  8071. lvl.pData = &dlidx;
  8072. lvl.iLeafPgno = iPgno;
  8073. for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
  8074. sqlite3Fts5BufferAppendPrintf(&rc, &s,
  8075. " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
  8076. );
  8077. }
  8078. }else if( bTomb ){
  8079. u32 nElem = fts5GetU32(&a[4]);
  8080. int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8;
  8081. int nSlot = (n - 8) / szKey;
  8082. int ii;
  8083. sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem);
  8084. if( aBlob[1] ){
  8085. sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0");
  8086. }
  8087. for(ii=0; ii<nSlot; ii++){
  8088. u64 iVal = 0;
  8089. if( szKey==4 ){
  8090. u32 *aSlot = (u32*)&aBlob[8];
  8091. if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]);
  8092. }else{
  8093. u64 *aSlot = (u64*)&aBlob[8];
  8094. if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]);
  8095. }
  8096. if( iVal!=0 ){
  8097. sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal);
  8098. }
  8099. }
  8100. }else if( iSegid==0 ){
  8101. if( iRowid==FTS5_AVERAGES_ROWID ){
  8102. fts5DecodeAverages(&rc, &s, a, n);
  8103. }else{
  8104. fts5DecodeStructure(&rc, &s, a, n);
  8105. }
  8106. }else if( eDetailNone ){
  8107. Fts5Buffer term; /* Current term read from page */
  8108. int szLeaf;
  8109. int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
  8110. int iTermOff;
  8111. int nKeep = 0;
  8112. int iOff;
  8113. memset(&term, 0, sizeof(Fts5Buffer));
  8114. /* Decode any entries that occur before the first term. */
  8115. if( szLeaf<n ){
  8116. iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
  8117. }else{
  8118. iTermOff = szLeaf;
  8119. }
  8120. fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
  8121. iOff = iTermOff;
  8122. while( iOff<szLeaf && rc==SQLITE_OK ){
  8123. int nAppend;
  8124. /* Read the term data for the next term*/
  8125. iOff += fts5GetVarint32(&a[iOff], nAppend);
  8126. term.n = nKeep;
  8127. fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
  8128. sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
  8129. fts5BufferAppendTerm(&rc, &s, &term);
  8130. iOff += nAppend;
  8131. /* Figure out where the doclist for this term ends */
  8132. if( iPgidxOff<n ){
  8133. int nIncr;
  8134. iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
  8135. iTermOff += nIncr;
  8136. }else{
  8137. iTermOff = szLeaf;
  8138. }
  8139. if( iTermOff>szLeaf ){
  8140. rc = FTS5_CORRUPT;
  8141. }else{
  8142. fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
  8143. }
  8144. iOff = iTermOff;
  8145. if( iOff<szLeaf ){
  8146. iOff += fts5GetVarint32(&a[iOff], nKeep);
  8147. }
  8148. }
  8149. fts5BufferFree(&term);
  8150. }else{
  8151. Fts5Buffer term; /* Current term read from page */
  8152. int szLeaf; /* Offset of pgidx in a[] */
  8153. int iPgidxOff;
  8154. int iPgidxPrev = 0; /* Previous value read from pgidx */
  8155. int iTermOff = 0;
  8156. int iRowidOff = 0;
  8157. int iOff;
  8158. int nDoclist;
  8159. memset(&term, 0, sizeof(Fts5Buffer));
  8160. if( n<4 ){
  8161. sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
  8162. goto decode_out;
  8163. }else{
  8164. iRowidOff = fts5GetU16(&a[0]);
  8165. iPgidxOff = szLeaf = fts5GetU16(&a[2]);
  8166. if( iPgidxOff<n ){
  8167. fts5GetVarint32(&a[iPgidxOff], iTermOff);
  8168. }else if( iPgidxOff>n ){
  8169. rc = FTS5_CORRUPT;
  8170. goto decode_out;
  8171. }
  8172. }
  8173. /* Decode the position list tail at the start of the page */
  8174. if( iRowidOff!=0 ){
  8175. iOff = iRowidOff;
  8176. }else if( iTermOff!=0 ){
  8177. iOff = iTermOff;
  8178. }else{
  8179. iOff = szLeaf;
  8180. }
  8181. if( iOff>n ){
  8182. rc = FTS5_CORRUPT;
  8183. goto decode_out;
  8184. }
  8185. fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
  8186. /* Decode any more doclist data that appears on the page before the
  8187. ** first term. */
  8188. nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
  8189. if( nDoclist+iOff>n ){
  8190. rc = FTS5_CORRUPT;
  8191. goto decode_out;
  8192. }
  8193. fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
  8194. while( iPgidxOff<n && rc==SQLITE_OK ){
  8195. int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
  8196. int nByte; /* Bytes of data */
  8197. int iEnd;
  8198. iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
  8199. iPgidxPrev += nByte;
  8200. iOff = iPgidxPrev;
  8201. if( iPgidxOff<n ){
  8202. fts5GetVarint32(&a[iPgidxOff], nByte);
  8203. iEnd = iPgidxPrev + nByte;
  8204. }else{
  8205. iEnd = szLeaf;
  8206. }
  8207. if( iEnd>szLeaf ){
  8208. rc = FTS5_CORRUPT;
  8209. break;
  8210. }
  8211. if( bFirst==0 ){
  8212. iOff += fts5GetVarint32(&a[iOff], nByte);
  8213. if( nByte>term.n ){
  8214. rc = FTS5_CORRUPT;
  8215. break;
  8216. }
  8217. term.n = nByte;
  8218. }
  8219. iOff += fts5GetVarint32(&a[iOff], nByte);
  8220. if( iOff+nByte>n ){
  8221. rc = FTS5_CORRUPT;
  8222. break;
  8223. }
  8224. fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
  8225. iOff += nByte;
  8226. sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
  8227. fts5BufferAppendTerm(&rc, &s, &term);
  8228. iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
  8229. }
  8230. fts5BufferFree(&term);
  8231. }
  8232. decode_out:
  8233. sqlite3_free(a);
  8234. if( rc==SQLITE_OK ){
  8235. sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
  8236. }else{
  8237. sqlite3_result_error_code(pCtx, rc);
  8238. }
  8239. fts5BufferFree(&s);
  8240. }
  8241. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  8242. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  8243. /*
  8244. ** The implementation of user-defined scalar function fts5_rowid().
  8245. */
  8246. static void fts5RowidFunction(
  8247. sqlite3_context *pCtx, /* Function call context */
  8248. int nArg, /* Number of args (always 2) */
  8249. sqlite3_value **apVal /* Function arguments */
  8250. ){
  8251. const char *zArg;
  8252. if( nArg==0 ){
  8253. sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
  8254. }else{
  8255. zArg = (const char*)sqlite3_value_text(apVal[0]);
  8256. if( 0==sqlite3_stricmp(zArg, "segment") ){
  8257. i64 iRowid;
  8258. int segid, pgno;
  8259. if( nArg!=3 ){
  8260. sqlite3_result_error(pCtx,
  8261. "should be: fts5_rowid('segment', segid, pgno))", -1
  8262. );
  8263. }else{
  8264. segid = sqlite3_value_int(apVal[1]);
  8265. pgno = sqlite3_value_int(apVal[2]);
  8266. iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
  8267. sqlite3_result_int64(pCtx, iRowid);
  8268. }
  8269. }else{
  8270. sqlite3_result_error(pCtx,
  8271. "first arg to fts5_rowid() must be 'segment'" , -1
  8272. );
  8273. }
  8274. }
  8275. }
  8276. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  8277. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  8278. typedef struct Fts5StructVtab Fts5StructVtab;
  8279. struct Fts5StructVtab {
  8280. sqlite3_vtab base;
  8281. };
  8282. typedef struct Fts5StructVcsr Fts5StructVcsr;
  8283. struct Fts5StructVcsr {
  8284. sqlite3_vtab_cursor base;
  8285. Fts5Structure *pStruct;
  8286. int iLevel;
  8287. int iSeg;
  8288. int iRowid;
  8289. };
  8290. /*
  8291. ** Create a new fts5_structure() table-valued function.
  8292. */
  8293. static int fts5structConnectMethod(
  8294. sqlite3 *db,
  8295. void *pAux,
  8296. int argc, const char *const*argv,
  8297. sqlite3_vtab **ppVtab,
  8298. char **pzErr
  8299. ){
  8300. Fts5StructVtab *pNew = 0;
  8301. int rc = SQLITE_OK;
  8302. rc = sqlite3_declare_vtab(db,
  8303. "CREATE TABLE xyz("
  8304. "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, "
  8305. "npgtombstone, nentrytombstone, nentry, struct HIDDEN);"
  8306. );
  8307. if( rc==SQLITE_OK ){
  8308. pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
  8309. }
  8310. *ppVtab = (sqlite3_vtab*)pNew;
  8311. return rc;
  8312. }
  8313. /*
  8314. ** We must have a single struct=? constraint that will be passed through
  8315. ** into the xFilter method. If there is no valid struct=? constraint,
  8316. ** then return an SQLITE_CONSTRAINT error.
  8317. */
  8318. static int fts5structBestIndexMethod(
  8319. sqlite3_vtab *tab,
  8320. sqlite3_index_info *pIdxInfo
  8321. ){
  8322. int i;
  8323. int rc = SQLITE_CONSTRAINT;
  8324. struct sqlite3_index_constraint *p;
  8325. pIdxInfo->estimatedCost = (double)100;
  8326. pIdxInfo->estimatedRows = 100;
  8327. pIdxInfo->idxNum = 0;
  8328. for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){
  8329. if( p->usable==0 ) continue;
  8330. if( p->op==SQLITE_INDEX_CONSTRAINT_EQ && p->iColumn==11 ){
  8331. rc = SQLITE_OK;
  8332. pIdxInfo->aConstraintUsage[i].omit = 1;
  8333. pIdxInfo->aConstraintUsage[i].argvIndex = 1;
  8334. break;
  8335. }
  8336. }
  8337. return rc;
  8338. }
  8339. /*
  8340. ** This method is the destructor for bytecodevtab objects.
  8341. */
  8342. static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){
  8343. Fts5StructVtab *p = (Fts5StructVtab*)pVtab;
  8344. sqlite3_free(p);
  8345. return SQLITE_OK;
  8346. }
  8347. /*
  8348. ** Constructor for a new bytecodevtab_cursor object.
  8349. */
  8350. static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){
  8351. int rc = SQLITE_OK;
  8352. Fts5StructVcsr *pNew = 0;
  8353. pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
  8354. *ppCsr = (sqlite3_vtab_cursor*)pNew;
  8355. return SQLITE_OK;
  8356. }
  8357. /*
  8358. ** Destructor for a bytecodevtab_cursor.
  8359. */
  8360. static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){
  8361. Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
  8362. fts5StructureRelease(pCsr->pStruct);
  8363. sqlite3_free(pCsr);
  8364. return SQLITE_OK;
  8365. }
  8366. /*
  8367. ** Advance a bytecodevtab_cursor to its next row of output.
  8368. */
  8369. static int fts5structNextMethod(sqlite3_vtab_cursor *cur){
  8370. Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
  8371. Fts5Structure *p = pCsr->pStruct;
  8372. assert( pCsr->pStruct );
  8373. pCsr->iSeg++;
  8374. pCsr->iRowid++;
  8375. while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){
  8376. pCsr->iLevel++;
  8377. pCsr->iSeg = 0;
  8378. }
  8379. if( pCsr->iLevel>=p->nLevel ){
  8380. fts5StructureRelease(pCsr->pStruct);
  8381. pCsr->pStruct = 0;
  8382. }
  8383. return SQLITE_OK;
  8384. }
  8385. /*
  8386. ** Return TRUE if the cursor has been moved off of the last
  8387. ** row of output.
  8388. */
  8389. static int fts5structEofMethod(sqlite3_vtab_cursor *cur){
  8390. Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
  8391. return pCsr->pStruct==0;
  8392. }
  8393. static int fts5structRowidMethod(
  8394. sqlite3_vtab_cursor *cur,
  8395. sqlite_int64 *piRowid
  8396. ){
  8397. Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
  8398. *piRowid = pCsr->iRowid;
  8399. return SQLITE_OK;
  8400. }
  8401. /*
  8402. ** Return values of columns for the row at which the bytecodevtab_cursor
  8403. ** is currently pointing.
  8404. */
  8405. static int fts5structColumnMethod(
  8406. sqlite3_vtab_cursor *cur, /* The cursor */
  8407. sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
  8408. int i /* Which column to return */
  8409. ){
  8410. Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
  8411. Fts5Structure *p = pCsr->pStruct;
  8412. Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg];
  8413. switch( i ){
  8414. case 0: /* level */
  8415. sqlite3_result_int(ctx, pCsr->iLevel);
  8416. break;
  8417. case 1: /* segment */
  8418. sqlite3_result_int(ctx, pCsr->iSeg);
  8419. break;
  8420. case 2: /* merge */
  8421. sqlite3_result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge);
  8422. break;
  8423. case 3: /* segid */
  8424. sqlite3_result_int(ctx, pSeg->iSegid);
  8425. break;
  8426. case 4: /* leaf1 */
  8427. sqlite3_result_int(ctx, pSeg->pgnoFirst);
  8428. break;
  8429. case 5: /* leaf2 */
  8430. sqlite3_result_int(ctx, pSeg->pgnoLast);
  8431. break;
  8432. case 6: /* origin1 */
  8433. sqlite3_result_int64(ctx, pSeg->iOrigin1);
  8434. break;
  8435. case 7: /* origin2 */
  8436. sqlite3_result_int64(ctx, pSeg->iOrigin2);
  8437. break;
  8438. case 8: /* npgtombstone */
  8439. sqlite3_result_int(ctx, pSeg->nPgTombstone);
  8440. break;
  8441. case 9: /* nentrytombstone */
  8442. sqlite3_result_int64(ctx, pSeg->nEntryTombstone);
  8443. break;
  8444. case 10: /* nentry */
  8445. sqlite3_result_int64(ctx, pSeg->nEntry);
  8446. break;
  8447. }
  8448. return SQLITE_OK;
  8449. }
  8450. /*
  8451. ** Initialize a cursor.
  8452. **
  8453. ** idxNum==0 means show all subprograms
  8454. ** idxNum==1 means show only the main bytecode and omit subprograms.
  8455. */
  8456. static int fts5structFilterMethod(
  8457. sqlite3_vtab_cursor *pVtabCursor,
  8458. int idxNum, const char *idxStr,
  8459. int argc, sqlite3_value **argv
  8460. ){
  8461. Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor;
  8462. int rc = SQLITE_OK;
  8463. const u8 *aBlob = 0;
  8464. int nBlob = 0;
  8465. assert( argc==1 );
  8466. fts5StructureRelease(pCsr->pStruct);
  8467. pCsr->pStruct = 0;
  8468. nBlob = sqlite3_value_bytes(argv[0]);
  8469. aBlob = (const u8*)sqlite3_value_blob(argv[0]);
  8470. rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct);
  8471. if( rc==SQLITE_OK ){
  8472. pCsr->iLevel = 0;
  8473. pCsr->iRowid = 0;
  8474. pCsr->iSeg = -1;
  8475. rc = fts5structNextMethod(pVtabCursor);
  8476. }
  8477. return rc;
  8478. }
  8479. #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
  8480. /*
  8481. ** This is called as part of registering the FTS5 module with database
  8482. ** connection db. It registers several user-defined scalar functions useful
  8483. ** with FTS5.
  8484. **
  8485. ** If successful, SQLITE_OK is returned. If an error occurs, some other
  8486. ** SQLite error code is returned instead.
  8487. */
  8488. int sqlite3Fts5IndexInit(sqlite3 *db){
  8489. #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
  8490. int rc = sqlite3_create_function(
  8491. db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
  8492. );
  8493. if( rc==SQLITE_OK ){
  8494. rc = sqlite3_create_function(
  8495. db, "fts5_decode_none", 2,
  8496. SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
  8497. );
  8498. }
  8499. if( rc==SQLITE_OK ){
  8500. rc = sqlite3_create_function(
  8501. db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
  8502. );
  8503. }
  8504. if( rc==SQLITE_OK ){
  8505. static const sqlite3_module fts5structure_module = {
  8506. 0, /* iVersion */
  8507. 0, /* xCreate */
  8508. fts5structConnectMethod, /* xConnect */
  8509. fts5structBestIndexMethod, /* xBestIndex */
  8510. fts5structDisconnectMethod, /* xDisconnect */
  8511. 0, /* xDestroy */
  8512. fts5structOpenMethod, /* xOpen */
  8513. fts5structCloseMethod, /* xClose */
  8514. fts5structFilterMethod, /* xFilter */
  8515. fts5structNextMethod, /* xNext */
  8516. fts5structEofMethod, /* xEof */
  8517. fts5structColumnMethod, /* xColumn */
  8518. fts5structRowidMethod, /* xRowid */
  8519. 0, /* xUpdate */
  8520. 0, /* xBegin */
  8521. 0, /* xSync */
  8522. 0, /* xCommit */
  8523. 0, /* xRollback */
  8524. 0, /* xFindFunction */
  8525. 0, /* xRename */
  8526. 0, /* xSavepoint */
  8527. 0, /* xRelease */
  8528. 0, /* xRollbackTo */
  8529. 0, /* xShadowName */
  8530. 0 /* xIntegrity */
  8531. };
  8532. rc = sqlite3_create_module(db, "fts5_structure", &fts5structure_module, 0);
  8533. }
  8534. return rc;
  8535. #else
  8536. return SQLITE_OK;
  8537. UNUSED_PARAM(db);
  8538. #endif
  8539. }
  8540. int sqlite3Fts5IndexReset(Fts5Index *p){
  8541. assert( p->pStruct==0 || p->iStructVersion!=0 );
  8542. if( fts5IndexDataVersion(p)!=p->iStructVersion ){
  8543. fts5StructureInvalidate(p);
  8544. }
  8545. return fts5IndexReturn(p);
  8546. }