123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591 |
- /*
- ** 2014 August 11
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- ******************************************************************************
- **
- */
- #include "fts5Int.h"
- typedef struct Fts5HashEntry Fts5HashEntry;
- /*
- ** This file contains the implementation of an in-memory hash table used
- ** to accumuluate "term -> doclist" content before it is flused to a level-0
- ** segment.
- */
- struct Fts5Hash {
- int eDetail; /* Copy of Fts5Config.eDetail */
- int *pnByte; /* Pointer to bytes counter */
- int nEntry; /* Number of entries currently in hash */
- int nSlot; /* Size of aSlot[] array */
- Fts5HashEntry *pScan; /* Current ordered scan item */
- Fts5HashEntry **aSlot; /* Array of hash slots */
- };
- /*
- ** Each entry in the hash table is represented by an object of the
- ** following type. Each object, its key, and its current data are stored
- ** in a single memory allocation. The key immediately follows the object
- ** in memory. The position list data immediately follows the key data
- ** in memory.
- **
- ** The key is Fts5HashEntry.nKey bytes in size. It consists of a single
- ** byte identifying the index (either the main term index or a prefix-index),
- ** followed by the term data. For example: "0token". There is no
- ** nul-terminator - in this case nKey=6.
- **
- ** The data that follows the key is in a similar, but not identical format
- ** to the doclist data stored in the database. It is:
- **
- ** * Rowid, as a varint
- ** * Position list, without 0x00 terminator.
- ** * Size of previous position list and rowid, as a 4 byte
- ** big-endian integer.
- **
- ** iRowidOff:
- ** Offset of last rowid written to data area. Relative to first byte of
- ** structure.
- **
- ** nData:
- ** Bytes of data written since iRowidOff.
- */
- struct Fts5HashEntry {
- Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
- Fts5HashEntry *pScanNext; /* Next entry in sorted order */
-
- int nAlloc; /* Total size of allocation */
- int iSzPoslist; /* Offset of space for 4-byte poslist size */
- int nData; /* Total bytes of data (incl. structure) */
- int nKey; /* Length of key in bytes */
- u8 bDel; /* Set delete-flag @ iSzPoslist */
- u8 bContent; /* Set content-flag (detail=none mode) */
- i16 iCol; /* Column of last value written */
- int iPos; /* Position of last value written */
- i64 iRowid; /* Rowid of last value written */
- };
- /*
- ** Eqivalent to:
- **
- ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; }
- */
- #define fts5EntryKey(p) ( ((char *)(&(p)[1])) )
- /*
- ** Allocate a new hash table.
- */
- int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){
- int rc = SQLITE_OK;
- Fts5Hash *pNew;
- *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
- if( pNew==0 ){
- rc = SQLITE_NOMEM;
- }else{
- sqlite3_int64 nByte;
- memset(pNew, 0, sizeof(Fts5Hash));
- pNew->pnByte = pnByte;
- pNew->eDetail = pConfig->eDetail;
- pNew->nSlot = 1024;
- nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
- pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64(nByte);
- if( pNew->aSlot==0 ){
- sqlite3_free(pNew);
- *ppNew = 0;
- rc = SQLITE_NOMEM;
- }else{
- memset(pNew->aSlot, 0, (size_t)nByte);
- }
- }
- return rc;
- }
- /*
- ** Free a hash table object.
- */
- void sqlite3Fts5HashFree(Fts5Hash *pHash){
- if( pHash ){
- sqlite3Fts5HashClear(pHash);
- sqlite3_free(pHash->aSlot);
- sqlite3_free(pHash);
- }
- }
- /*
- ** Empty (but do not delete) a hash table.
- */
- void sqlite3Fts5HashClear(Fts5Hash *pHash){
- int i;
- for(i=0; i<pHash->nSlot; i++){
- Fts5HashEntry *pNext;
- Fts5HashEntry *pSlot;
- for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
- pNext = pSlot->pHashNext;
- sqlite3_free(pSlot);
- }
- }
- memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
- pHash->nEntry = 0;
- }
- static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
- int i;
- unsigned int h = 13;
- for(i=n-1; i>=0; i--){
- h = (h << 3) ^ h ^ p[i];
- }
- return (h % nSlot);
- }
- static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
- int i;
- unsigned int h = 13;
- for(i=n-1; i>=0; i--){
- h = (h << 3) ^ h ^ p[i];
- }
- h = (h << 3) ^ h ^ b;
- return (h % nSlot);
- }
- /*
- ** Resize the hash table by doubling the number of slots.
- */
- static int fts5HashResize(Fts5Hash *pHash){
- int nNew = pHash->nSlot*2;
- int i;
- Fts5HashEntry **apNew;
- Fts5HashEntry **apOld = pHash->aSlot;
- apNew = (Fts5HashEntry**)sqlite3_malloc64(nNew*sizeof(Fts5HashEntry*));
- if( !apNew ) return SQLITE_NOMEM;
- memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
- for(i=0; i<pHash->nSlot; i++){
- while( apOld[i] ){
- unsigned int iHash;
- Fts5HashEntry *p = apOld[i];
- apOld[i] = p->pHashNext;
- iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), p->nKey);
- p->pHashNext = apNew[iHash];
- apNew[iHash] = p;
- }
- }
- sqlite3_free(apOld);
- pHash->nSlot = nNew;
- pHash->aSlot = apNew;
- return SQLITE_OK;
- }
- static int fts5HashAddPoslistSize(
- Fts5Hash *pHash,
- Fts5HashEntry *p,
- Fts5HashEntry *p2
- ){
- int nRet = 0;
- if( p->iSzPoslist ){
- u8 *pPtr = p2 ? (u8*)p2 : (u8*)p;
- int nData = p->nData;
- if( pHash->eDetail==FTS5_DETAIL_NONE ){
- assert( nData==p->iSzPoslist );
- if( p->bDel ){
- pPtr[nData++] = 0x00;
- if( p->bContent ){
- pPtr[nData++] = 0x00;
- }
- }
- }else{
- int nSz = (nData - p->iSzPoslist - 1); /* Size in bytes */
- int nPos = nSz*2 + p->bDel; /* Value of nPos field */
- assert( p->bDel==0 || p->bDel==1 );
- if( nPos<=127 ){
- pPtr[p->iSzPoslist] = (u8)nPos;
- }else{
- int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
- memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
- sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
- nData += (nByte-1);
- }
- }
- nRet = nData - p->nData;
- if( p2==0 ){
- p->iSzPoslist = 0;
- p->bDel = 0;
- p->bContent = 0;
- p->nData = nData;
- }
- }
- return nRet;
- }
- /*
- ** Add an entry to the in-memory hash table. The key is the concatenation
- ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
- **
- ** (bByte || pToken) -> (iRowid,iCol,iPos)
- **
- ** Or, if iCol is negative, then the value is a delete marker.
- */
- int sqlite3Fts5HashWrite(
- Fts5Hash *pHash,
- i64 iRowid, /* Rowid for this entry */
- int iCol, /* Column token appears in (-ve -> delete) */
- int iPos, /* Position of token within column */
- char bByte, /* First byte of token */
- const char *pToken, int nToken /* Token to add or remove to or from index */
- ){
- unsigned int iHash;
- Fts5HashEntry *p;
- u8 *pPtr;
- int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
- int bNew; /* If non-delete entry should be written */
-
- bNew = (pHash->eDetail==FTS5_DETAIL_FULL);
- /* Attempt to locate an existing hash entry */
- iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
- for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
- char *zKey = fts5EntryKey(p);
- if( zKey[0]==bByte
- && p->nKey==nToken+1
- && memcmp(&zKey[1], pToken, nToken)==0
- ){
- break;
- }
- }
- /* If an existing hash entry cannot be found, create a new one. */
- if( p==0 ){
- /* Figure out how much space to allocate */
- char *zKey;
- sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64;
- if( nByte<128 ) nByte = 128;
- /* Grow the Fts5Hash.aSlot[] array if necessary. */
- if( (pHash->nEntry*2)>=pHash->nSlot ){
- int rc = fts5HashResize(pHash);
- if( rc!=SQLITE_OK ) return rc;
- iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
- }
- /* Allocate new Fts5HashEntry and add it to the hash table. */
- p = (Fts5HashEntry*)sqlite3_malloc64(nByte);
- if( !p ) return SQLITE_NOMEM;
- memset(p, 0, sizeof(Fts5HashEntry));
- p->nAlloc = (int)nByte;
- zKey = fts5EntryKey(p);
- zKey[0] = bByte;
- memcpy(&zKey[1], pToken, nToken);
- assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) );
- p->nKey = nToken+1;
- zKey[nToken+1] = '\0';
- p->nData = nToken+1 + sizeof(Fts5HashEntry);
- p->pHashNext = pHash->aSlot[iHash];
- pHash->aSlot[iHash] = p;
- pHash->nEntry++;
- /* Add the first rowid field to the hash-entry */
- p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
- p->iRowid = iRowid;
- p->iSzPoslist = p->nData;
- if( pHash->eDetail!=FTS5_DETAIL_NONE ){
- p->nData += 1;
- p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
- }
- }else{
- /* Appending to an existing hash-entry. Check that there is enough
- ** space to append the largest possible new entry. Worst case scenario
- ** is:
- **
- ** + 9 bytes for a new rowid,
- ** + 4 byte reserved for the "poslist size" varint.
- ** + 1 byte for a "new column" byte,
- ** + 3 bytes for a new column number (16-bit max) as a varint,
- ** + 5 bytes for the new position offset (32-bit max).
- */
- if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
- sqlite3_int64 nNew = p->nAlloc * 2;
- Fts5HashEntry *pNew;
- Fts5HashEntry **pp;
- pNew = (Fts5HashEntry*)sqlite3_realloc64(p, nNew);
- if( pNew==0 ) return SQLITE_NOMEM;
- pNew->nAlloc = (int)nNew;
- for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
- *pp = pNew;
- p = pNew;
- }
- nIncr -= p->nData;
- }
- assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) );
- pPtr = (u8*)p;
- /* If this is a new rowid, append the 4-byte size field for the previous
- ** entry, and the new rowid for this entry. */
- if( iRowid!=p->iRowid ){
- u64 iDiff = (u64)iRowid - (u64)p->iRowid;
- fts5HashAddPoslistSize(pHash, p, 0);
- p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff);
- p->iRowid = iRowid;
- bNew = 1;
- p->iSzPoslist = p->nData;
- if( pHash->eDetail!=FTS5_DETAIL_NONE ){
- p->nData += 1;
- p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
- p->iPos = 0;
- }
- }
- if( iCol>=0 ){
- if( pHash->eDetail==FTS5_DETAIL_NONE ){
- p->bContent = 1;
- }else{
- /* Append a new column value, if necessary */
- assert_nc( iCol>=p->iCol );
- if( iCol!=p->iCol ){
- if( pHash->eDetail==FTS5_DETAIL_FULL ){
- pPtr[p->nData++] = 0x01;
- p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
- p->iCol = (i16)iCol;
- p->iPos = 0;
- }else{
- bNew = 1;
- p->iCol = (i16)(iPos = iCol);
- }
- }
- /* Append the new position offset, if necessary */
- if( bNew ){
- p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
- p->iPos = iPos;
- }
- }
- }else{
- /* This is a delete. Set the delete flag. */
- p->bDel = 1;
- }
- nIncr += p->nData;
- *pHash->pnByte += nIncr;
- return SQLITE_OK;
- }
- /*
- ** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
- ** each sorted in key order. This function merges the two lists into a
- ** single list and returns a pointer to its first element.
- */
- static Fts5HashEntry *fts5HashEntryMerge(
- Fts5HashEntry *pLeft,
- Fts5HashEntry *pRight
- ){
- Fts5HashEntry *p1 = pLeft;
- Fts5HashEntry *p2 = pRight;
- Fts5HashEntry *pRet = 0;
- Fts5HashEntry **ppOut = &pRet;
- while( p1 || p2 ){
- if( p1==0 ){
- *ppOut = p2;
- p2 = 0;
- }else if( p2==0 ){
- *ppOut = p1;
- p1 = 0;
- }else{
- char *zKey1 = fts5EntryKey(p1);
- char *zKey2 = fts5EntryKey(p2);
- int nMin = MIN(p1->nKey, p2->nKey);
- int cmp = memcmp(zKey1, zKey2, nMin);
- if( cmp==0 ){
- cmp = p1->nKey - p2->nKey;
- }
- assert( cmp!=0 );
- if( cmp>0 ){
- /* p2 is smaller */
- *ppOut = p2;
- ppOut = &p2->pScanNext;
- p2 = p2->pScanNext;
- }else{
- /* p1 is smaller */
- *ppOut = p1;
- ppOut = &p1->pScanNext;
- p1 = p1->pScanNext;
- }
- *ppOut = 0;
- }
- }
- return pRet;
- }
- /*
- ** Link all tokens from hash table iHash into a list in sorted order. The
- ** tokens are not removed from the hash table.
- */
- static int fts5HashEntrySort(
- Fts5Hash *pHash,
- const char *pTerm, int nTerm, /* Query prefix, if any */
- Fts5HashEntry **ppSorted
- ){
- const int nMergeSlot = 32;
- Fts5HashEntry **ap;
- Fts5HashEntry *pList;
- int iSlot;
- int i;
- *ppSorted = 0;
- ap = sqlite3_malloc64(sizeof(Fts5HashEntry*) * nMergeSlot);
- if( !ap ) return SQLITE_NOMEM;
- memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
- for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
- Fts5HashEntry *pIter;
- for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
- if( pTerm==0
- || (pIter->nKey>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm))
- ){
- Fts5HashEntry *pEntry = pIter;
- pEntry->pScanNext = 0;
- for(i=0; ap[i]; i++){
- pEntry = fts5HashEntryMerge(pEntry, ap[i]);
- ap[i] = 0;
- }
- ap[i] = pEntry;
- }
- }
- }
- pList = 0;
- for(i=0; i<nMergeSlot; i++){
- pList = fts5HashEntryMerge(pList, ap[i]);
- }
- sqlite3_free(ap);
- *ppSorted = pList;
- return SQLITE_OK;
- }
- /*
- ** Query the hash table for a doclist associated with term pTerm/nTerm.
- */
- int sqlite3Fts5HashQuery(
- Fts5Hash *pHash, /* Hash table to query */
- int nPre,
- const char *pTerm, int nTerm, /* Query term */
- void **ppOut, /* OUT: Pointer to new object */
- int *pnDoclist /* OUT: Size of doclist in bytes */
- ){
- unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
- char *zKey = 0;
- Fts5HashEntry *p;
- for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
- zKey = fts5EntryKey(p);
- if( nTerm==p->nKey && memcmp(zKey, pTerm, nTerm)==0 ) break;
- }
- if( p ){
- int nHashPre = sizeof(Fts5HashEntry) + nTerm;
- int nList = p->nData - nHashPre;
- u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10));
- if( pRet ){
- Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre];
- memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList);
- nList += fts5HashAddPoslistSize(pHash, p, pFaux);
- *pnDoclist = nList;
- }else{
- *pnDoclist = 0;
- return SQLITE_NOMEM;
- }
- }else{
- *ppOut = 0;
- *pnDoclist = 0;
- }
- return SQLITE_OK;
- }
- int sqlite3Fts5HashScanInit(
- Fts5Hash *p, /* Hash table to query */
- const char *pTerm, int nTerm /* Query prefix */
- ){
- return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
- }
- #ifdef SQLITE_DEBUG
- static int fts5HashCount(Fts5Hash *pHash){
- int nEntry = 0;
- int ii;
- for(ii=0; ii<pHash->nSlot; ii++){
- Fts5HashEntry *p = 0;
- for(p=pHash->aSlot[ii]; p; p=p->pHashNext){
- nEntry++;
- }
- }
- return nEntry;
- }
- #endif
- /*
- ** Return true if the hash table is empty, false otherwise.
- */
- int sqlite3Fts5HashIsEmpty(Fts5Hash *pHash){
- assert( pHash->nEntry==fts5HashCount(pHash) );
- return pHash->nEntry==0;
- }
- void sqlite3Fts5HashScanNext(Fts5Hash *p){
- assert( !sqlite3Fts5HashScanEof(p) );
- p->pScan = p->pScan->pScanNext;
- }
- int sqlite3Fts5HashScanEof(Fts5Hash *p){
- return (p->pScan==0);
- }
- void sqlite3Fts5HashScanEntry(
- Fts5Hash *pHash,
- const char **pzTerm, /* OUT: term (nul-terminated) */
- int *pnTerm, /* OUT: Size of term in bytes */
- const u8 **ppDoclist, /* OUT: pointer to doclist */
- int *pnDoclist /* OUT: size of doclist in bytes */
- ){
- Fts5HashEntry *p;
- if( (p = pHash->pScan) ){
- char *zKey = fts5EntryKey(p);
- int nTerm = p->nKey;
- fts5HashAddPoslistSize(pHash, p, 0);
- *pzTerm = zKey;
- *pnTerm = nTerm;
- *ppDoclist = (const u8*)&zKey[nTerm];
- *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm);
- }else{
- *pzTerm = 0;
- *pnTerm = 0;
- *ppDoclist = 0;
- *pnDoclist = 0;
- }
- }
|