fts5_buffer.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. /*
  2. ** 2014 May 31
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. */
  13. #include "fts5Int.h"
  14. int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
  15. if( (u32)pBuf->nSpace<nByte ){
  16. u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
  17. u8 *pNew;
  18. while( nNew<nByte ){
  19. nNew = nNew * 2;
  20. }
  21. pNew = sqlite3_realloc64(pBuf->p, nNew);
  22. if( pNew==0 ){
  23. *pRc = SQLITE_NOMEM;
  24. return 1;
  25. }else{
  26. pBuf->nSpace = (int)nNew;
  27. pBuf->p = pNew;
  28. }
  29. }
  30. return 0;
  31. }
  32. /*
  33. ** Encode value iVal as an SQLite varint and append it to the buffer object
  34. ** pBuf. If an OOM error occurs, set the error code in p.
  35. */
  36. void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
  37. if( fts5BufferGrow(pRc, pBuf, 9) ) return;
  38. pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
  39. }
  40. void sqlite3Fts5Put32(u8 *aBuf, int iVal){
  41. aBuf[0] = (iVal>>24) & 0x00FF;
  42. aBuf[1] = (iVal>>16) & 0x00FF;
  43. aBuf[2] = (iVal>> 8) & 0x00FF;
  44. aBuf[3] = (iVal>> 0) & 0x00FF;
  45. }
  46. int sqlite3Fts5Get32(const u8 *aBuf){
  47. return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]);
  48. }
  49. /*
  50. ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
  51. ** the error code in p. If an error has already occurred when this function
  52. ** is called, it is a no-op.
  53. */
  54. void sqlite3Fts5BufferAppendBlob(
  55. int *pRc,
  56. Fts5Buffer *pBuf,
  57. u32 nData,
  58. const u8 *pData
  59. ){
  60. if( nData ){
  61. if( fts5BufferGrow(pRc, pBuf, nData) ) return;
  62. assert( pBuf->p!=0 );
  63. memcpy(&pBuf->p[pBuf->n], pData, nData);
  64. pBuf->n += nData;
  65. }
  66. }
  67. /*
  68. ** Append the nul-terminated string zStr to the buffer pBuf. This function
  69. ** ensures that the byte following the buffer data is set to 0x00, even
  70. ** though this byte is not included in the pBuf->n count.
  71. */
  72. void sqlite3Fts5BufferAppendString(
  73. int *pRc,
  74. Fts5Buffer *pBuf,
  75. const char *zStr
  76. ){
  77. int nStr = (int)strlen(zStr);
  78. sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
  79. pBuf->n--;
  80. }
  81. /*
  82. ** Argument zFmt is a printf() style format string. This function performs
  83. ** the printf() style processing, then appends the results to buffer pBuf.
  84. **
  85. ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
  86. ** following the buffer data is set to 0x00, even though this byte is not
  87. ** included in the pBuf->n count.
  88. */
  89. void sqlite3Fts5BufferAppendPrintf(
  90. int *pRc,
  91. Fts5Buffer *pBuf,
  92. char *zFmt, ...
  93. ){
  94. if( *pRc==SQLITE_OK ){
  95. char *zTmp;
  96. va_list ap;
  97. va_start(ap, zFmt);
  98. zTmp = sqlite3_vmprintf(zFmt, ap);
  99. va_end(ap);
  100. if( zTmp==0 ){
  101. *pRc = SQLITE_NOMEM;
  102. }else{
  103. sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
  104. sqlite3_free(zTmp);
  105. }
  106. }
  107. }
  108. char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
  109. char *zRet = 0;
  110. if( *pRc==SQLITE_OK ){
  111. va_list ap;
  112. va_start(ap, zFmt);
  113. zRet = sqlite3_vmprintf(zFmt, ap);
  114. va_end(ap);
  115. if( zRet==0 ){
  116. *pRc = SQLITE_NOMEM;
  117. }
  118. }
  119. return zRet;
  120. }
  121. /*
  122. ** Free any buffer allocated by pBuf. Zero the structure before returning.
  123. */
  124. void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
  125. sqlite3_free(pBuf->p);
  126. memset(pBuf, 0, sizeof(Fts5Buffer));
  127. }
  128. /*
  129. ** Zero the contents of the buffer object. But do not free the associated
  130. ** memory allocation.
  131. */
  132. void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
  133. pBuf->n = 0;
  134. }
  135. /*
  136. ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
  137. ** the error code in p. If an error has already occurred when this function
  138. ** is called, it is a no-op.
  139. */
  140. void sqlite3Fts5BufferSet(
  141. int *pRc,
  142. Fts5Buffer *pBuf,
  143. int nData,
  144. const u8 *pData
  145. ){
  146. pBuf->n = 0;
  147. sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
  148. }
  149. int sqlite3Fts5PoslistNext64(
  150. const u8 *a, int n, /* Buffer containing poslist */
  151. int *pi, /* IN/OUT: Offset within a[] */
  152. i64 *piOff /* IN/OUT: Current offset */
  153. ){
  154. int i = *pi;
  155. assert( a!=0 || i==0 );
  156. if( i>=n ){
  157. /* EOF */
  158. *piOff = -1;
  159. return 1;
  160. }else{
  161. i64 iOff = *piOff;
  162. u32 iVal;
  163. assert( a!=0 );
  164. fts5FastGetVarint32(a, i, iVal);
  165. if( iVal<=1 ){
  166. if( iVal==0 ){
  167. *pi = i;
  168. return 0;
  169. }
  170. fts5FastGetVarint32(a, i, iVal);
  171. iOff = ((i64)iVal) << 32;
  172. assert( iOff>=0 );
  173. fts5FastGetVarint32(a, i, iVal);
  174. if( iVal<2 ){
  175. /* This is a corrupt record. So stop parsing it here. */
  176. *piOff = -1;
  177. return 1;
  178. }
  179. *piOff = iOff + ((iVal-2) & 0x7FFFFFFF);
  180. }else{
  181. *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF);
  182. }
  183. *pi = i;
  184. assert_nc( *piOff>=iOff );
  185. return 0;
  186. }
  187. }
  188. /*
  189. ** Advance the iterator object passed as the only argument. Return true
  190. ** if the iterator reaches EOF, or false otherwise.
  191. */
  192. int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
  193. if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
  194. pIter->bEof = 1;
  195. }
  196. return pIter->bEof;
  197. }
  198. int sqlite3Fts5PoslistReaderInit(
  199. const u8 *a, int n, /* Poslist buffer to iterate through */
  200. Fts5PoslistReader *pIter /* Iterator object to initialize */
  201. ){
  202. memset(pIter, 0, sizeof(*pIter));
  203. pIter->a = a;
  204. pIter->n = n;
  205. sqlite3Fts5PoslistReaderNext(pIter);
  206. return pIter->bEof;
  207. }
  208. /*
  209. ** Append position iPos to the position list being accumulated in buffer
  210. ** pBuf, which must be already be large enough to hold the new data.
  211. ** The previous position written to this list is *piPrev. *piPrev is set
  212. ** to iPos before returning.
  213. */
  214. void sqlite3Fts5PoslistSafeAppend(
  215. Fts5Buffer *pBuf,
  216. i64 *piPrev,
  217. i64 iPos
  218. ){
  219. if( iPos>=*piPrev ){
  220. static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
  221. if( (iPos & colmask) != (*piPrev & colmask) ){
  222. pBuf->p[pBuf->n++] = 1;
  223. pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
  224. *piPrev = (iPos & colmask);
  225. }
  226. pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
  227. *piPrev = iPos;
  228. }
  229. }
  230. int sqlite3Fts5PoslistWriterAppend(
  231. Fts5Buffer *pBuf,
  232. Fts5PoslistWriter *pWriter,
  233. i64 iPos
  234. ){
  235. int rc = 0; /* Initialized only to suppress erroneous warning from Clang */
  236. if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
  237. sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
  238. return SQLITE_OK;
  239. }
  240. void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){
  241. void *pRet = 0;
  242. if( *pRc==SQLITE_OK ){
  243. pRet = sqlite3_malloc64(nByte);
  244. if( pRet==0 ){
  245. if( nByte>0 ) *pRc = SQLITE_NOMEM;
  246. }else{
  247. memset(pRet, 0, (size_t)nByte);
  248. }
  249. }
  250. return pRet;
  251. }
  252. /*
  253. ** Return a nul-terminated copy of the string indicated by pIn. If nIn
  254. ** is non-negative, then it is the length of the string in bytes. Otherwise,
  255. ** the length of the string is determined using strlen().
  256. **
  257. ** It is the responsibility of the caller to eventually free the returned
  258. ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
  259. */
  260. char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
  261. char *zRet = 0;
  262. if( *pRc==SQLITE_OK ){
  263. if( nIn<0 ){
  264. nIn = (int)strlen(pIn);
  265. }
  266. zRet = (char*)sqlite3_malloc(nIn+1);
  267. if( zRet ){
  268. memcpy(zRet, pIn, nIn);
  269. zRet[nIn] = '\0';
  270. }else{
  271. *pRc = SQLITE_NOMEM;
  272. }
  273. }
  274. return zRet;
  275. }
  276. /*
  277. ** Return true if character 't' may be part of an FTS5 bareword, or false
  278. ** otherwise. Characters that may be part of barewords:
  279. **
  280. ** * All non-ASCII characters,
  281. ** * The 52 upper and lower case ASCII characters, and
  282. ** * The 10 integer ASCII characters.
  283. ** * The underscore character "_" (0x5F).
  284. ** * The unicode "subsitute" character (0x1A).
  285. */
  286. int sqlite3Fts5IsBareword(char t){
  287. u8 aBareword[128] = {
  288. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */
  289. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */
  290. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */
  291. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */
  292. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */
  293. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */
  294. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */
  295. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */
  296. };
  297. return (t & 0x80) || aBareword[(int)t];
  298. }
  299. /*************************************************************************
  300. */
  301. typedef struct Fts5TermsetEntry Fts5TermsetEntry;
  302. struct Fts5TermsetEntry {
  303. char *pTerm;
  304. int nTerm;
  305. int iIdx; /* Index (main or aPrefix[] entry) */
  306. Fts5TermsetEntry *pNext;
  307. };
  308. struct Fts5Termset {
  309. Fts5TermsetEntry *apHash[512];
  310. };
  311. int sqlite3Fts5TermsetNew(Fts5Termset **pp){
  312. int rc = SQLITE_OK;
  313. *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
  314. return rc;
  315. }
  316. int sqlite3Fts5TermsetAdd(
  317. Fts5Termset *p,
  318. int iIdx,
  319. const char *pTerm, int nTerm,
  320. int *pbPresent
  321. ){
  322. int rc = SQLITE_OK;
  323. *pbPresent = 0;
  324. if( p ){
  325. int i;
  326. u32 hash = 13;
  327. Fts5TermsetEntry *pEntry;
  328. /* Calculate a hash value for this term. This is the same hash checksum
  329. ** used by the fts5_hash.c module. This is not important for correct
  330. ** operation of the module, but is necessary to ensure that some tests
  331. ** designed to produce hash table collisions really do work. */
  332. for(i=nTerm-1; i>=0; i--){
  333. hash = (hash << 3) ^ hash ^ pTerm[i];
  334. }
  335. hash = (hash << 3) ^ hash ^ iIdx;
  336. hash = hash % ArraySize(p->apHash);
  337. for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
  338. if( pEntry->iIdx==iIdx
  339. && pEntry->nTerm==nTerm
  340. && memcmp(pEntry->pTerm, pTerm, nTerm)==0
  341. ){
  342. *pbPresent = 1;
  343. break;
  344. }
  345. }
  346. if( pEntry==0 ){
  347. pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
  348. if( pEntry ){
  349. pEntry->pTerm = (char*)&pEntry[1];
  350. pEntry->nTerm = nTerm;
  351. pEntry->iIdx = iIdx;
  352. memcpy(pEntry->pTerm, pTerm, nTerm);
  353. pEntry->pNext = p->apHash[hash];
  354. p->apHash[hash] = pEntry;
  355. }
  356. }
  357. }
  358. return rc;
  359. }
  360. void sqlite3Fts5TermsetFree(Fts5Termset *p){
  361. if( p ){
  362. u32 i;
  363. for(i=0; i<ArraySize(p->apHash); i++){
  364. Fts5TermsetEntry *pEntry = p->apHash[i];
  365. while( pEntry ){
  366. Fts5TermsetEntry *pDel = pEntry;
  367. pEntry = pEntry->pNext;
  368. sqlite3_free(pDel);
  369. }
  370. }
  371. sqlite3_free(p);
  372. }
  373. }