123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- /*
- ** 2014 May 31
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- ******************************************************************************
- */
- #include "fts5Int.h"
- int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
- if( (u32)pBuf->nSpace<nByte ){
- u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
- u8 *pNew;
- while( nNew<nByte ){
- nNew = nNew * 2;
- }
- pNew = sqlite3_realloc64(pBuf->p, nNew);
- if( pNew==0 ){
- *pRc = SQLITE_NOMEM;
- return 1;
- }else{
- pBuf->nSpace = (int)nNew;
- pBuf->p = pNew;
- }
- }
- return 0;
- }
- /*
- ** Encode value iVal as an SQLite varint and append it to the buffer object
- ** pBuf. If an OOM error occurs, set the error code in p.
- */
- void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
- if( fts5BufferGrow(pRc, pBuf, 9) ) return;
- pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
- }
- void sqlite3Fts5Put32(u8 *aBuf, int iVal){
- aBuf[0] = (iVal>>24) & 0x00FF;
- aBuf[1] = (iVal>>16) & 0x00FF;
- aBuf[2] = (iVal>> 8) & 0x00FF;
- aBuf[3] = (iVal>> 0) & 0x00FF;
- }
- int sqlite3Fts5Get32(const u8 *aBuf){
- return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]);
- }
- /*
- ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
- ** the error code in p. If an error has already occurred when this function
- ** is called, it is a no-op.
- */
- void sqlite3Fts5BufferAppendBlob(
- int *pRc,
- Fts5Buffer *pBuf,
- u32 nData,
- const u8 *pData
- ){
- if( nData ){
- if( fts5BufferGrow(pRc, pBuf, nData) ) return;
- assert( pBuf->p!=0 );
- memcpy(&pBuf->p[pBuf->n], pData, nData);
- pBuf->n += nData;
- }
- }
- /*
- ** Append the nul-terminated string zStr to the buffer pBuf. This function
- ** ensures that the byte following the buffer data is set to 0x00, even
- ** though this byte is not included in the pBuf->n count.
- */
- void sqlite3Fts5BufferAppendString(
- int *pRc,
- Fts5Buffer *pBuf,
- const char *zStr
- ){
- int nStr = (int)strlen(zStr);
- sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
- pBuf->n--;
- }
- /*
- ** Argument zFmt is a printf() style format string. This function performs
- ** the printf() style processing, then appends the results to buffer pBuf.
- **
- ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
- ** following the buffer data is set to 0x00, even though this byte is not
- ** included in the pBuf->n count.
- */
- void sqlite3Fts5BufferAppendPrintf(
- int *pRc,
- Fts5Buffer *pBuf,
- char *zFmt, ...
- ){
- if( *pRc==SQLITE_OK ){
- char *zTmp;
- va_list ap;
- va_start(ap, zFmt);
- zTmp = sqlite3_vmprintf(zFmt, ap);
- va_end(ap);
- if( zTmp==0 ){
- *pRc = SQLITE_NOMEM;
- }else{
- sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
- sqlite3_free(zTmp);
- }
- }
- }
- char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
- char *zRet = 0;
- if( *pRc==SQLITE_OK ){
- va_list ap;
- va_start(ap, zFmt);
- zRet = sqlite3_vmprintf(zFmt, ap);
- va_end(ap);
- if( zRet==0 ){
- *pRc = SQLITE_NOMEM;
- }
- }
- return zRet;
- }
-
- /*
- ** Free any buffer allocated by pBuf. Zero the structure before returning.
- */
- void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
- sqlite3_free(pBuf->p);
- memset(pBuf, 0, sizeof(Fts5Buffer));
- }
- /*
- ** Zero the contents of the buffer object. But do not free the associated
- ** memory allocation.
- */
- void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
- pBuf->n = 0;
- }
- /*
- ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
- ** the error code in p. If an error has already occurred when this function
- ** is called, it is a no-op.
- */
- void sqlite3Fts5BufferSet(
- int *pRc,
- Fts5Buffer *pBuf,
- int nData,
- const u8 *pData
- ){
- pBuf->n = 0;
- sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
- }
- int sqlite3Fts5PoslistNext64(
- const u8 *a, int n, /* Buffer containing poslist */
- int *pi, /* IN/OUT: Offset within a[] */
- i64 *piOff /* IN/OUT: Current offset */
- ){
- int i = *pi;
- assert( a!=0 || i==0 );
- if( i>=n ){
- /* EOF */
- *piOff = -1;
- return 1;
- }else{
- i64 iOff = *piOff;
- u32 iVal;
- assert( a!=0 );
- fts5FastGetVarint32(a, i, iVal);
- if( iVal<=1 ){
- if( iVal==0 ){
- *pi = i;
- return 0;
- }
- fts5FastGetVarint32(a, i, iVal);
- iOff = ((i64)iVal) << 32;
- assert( iOff>=0 );
- fts5FastGetVarint32(a, i, iVal);
- if( iVal<2 ){
- /* This is a corrupt record. So stop parsing it here. */
- *piOff = -1;
- return 1;
- }
- *piOff = iOff + ((iVal-2) & 0x7FFFFFFF);
- }else{
- *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF);
- }
- *pi = i;
- assert_nc( *piOff>=iOff );
- return 0;
- }
- }
- /*
- ** Advance the iterator object passed as the only argument. Return true
- ** if the iterator reaches EOF, or false otherwise.
- */
- int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
- if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
- pIter->bEof = 1;
- }
- return pIter->bEof;
- }
- int sqlite3Fts5PoslistReaderInit(
- const u8 *a, int n, /* Poslist buffer to iterate through */
- Fts5PoslistReader *pIter /* Iterator object to initialize */
- ){
- memset(pIter, 0, sizeof(*pIter));
- pIter->a = a;
- pIter->n = n;
- sqlite3Fts5PoslistReaderNext(pIter);
- return pIter->bEof;
- }
- /*
- ** Append position iPos to the position list being accumulated in buffer
- ** pBuf, which must be already be large enough to hold the new data.
- ** The previous position written to this list is *piPrev. *piPrev is set
- ** to iPos before returning.
- */
- void sqlite3Fts5PoslistSafeAppend(
- Fts5Buffer *pBuf,
- i64 *piPrev,
- i64 iPos
- ){
- if( iPos>=*piPrev ){
- static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
- if( (iPos & colmask) != (*piPrev & colmask) ){
- pBuf->p[pBuf->n++] = 1;
- pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
- *piPrev = (iPos & colmask);
- }
- pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
- *piPrev = iPos;
- }
- }
- int sqlite3Fts5PoslistWriterAppend(
- Fts5Buffer *pBuf,
- Fts5PoslistWriter *pWriter,
- i64 iPos
- ){
- int rc = 0; /* Initialized only to suppress erroneous warning from Clang */
- if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
- sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
- return SQLITE_OK;
- }
- void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){
- void *pRet = 0;
- if( *pRc==SQLITE_OK ){
- pRet = sqlite3_malloc64(nByte);
- if( pRet==0 ){
- if( nByte>0 ) *pRc = SQLITE_NOMEM;
- }else{
- memset(pRet, 0, (size_t)nByte);
- }
- }
- return pRet;
- }
- /*
- ** Return a nul-terminated copy of the string indicated by pIn. If nIn
- ** is non-negative, then it is the length of the string in bytes. Otherwise,
- ** the length of the string is determined using strlen().
- **
- ** It is the responsibility of the caller to eventually free the returned
- ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
- */
- char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
- char *zRet = 0;
- if( *pRc==SQLITE_OK ){
- if( nIn<0 ){
- nIn = (int)strlen(pIn);
- }
- zRet = (char*)sqlite3_malloc(nIn+1);
- if( zRet ){
- memcpy(zRet, pIn, nIn);
- zRet[nIn] = '\0';
- }else{
- *pRc = SQLITE_NOMEM;
- }
- }
- return zRet;
- }
- /*
- ** Return true if character 't' may be part of an FTS5 bareword, or false
- ** otherwise. Characters that may be part of barewords:
- **
- ** * All non-ASCII characters,
- ** * The 52 upper and lower case ASCII characters, and
- ** * The 10 integer ASCII characters.
- ** * The underscore character "_" (0x5F).
- ** * The unicode "subsitute" character (0x1A).
- */
- int sqlite3Fts5IsBareword(char t){
- u8 aBareword[128] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */
- };
- return (t & 0x80) || aBareword[(int)t];
- }
- /*************************************************************************
- */
- typedef struct Fts5TermsetEntry Fts5TermsetEntry;
- struct Fts5TermsetEntry {
- char *pTerm;
- int nTerm;
- int iIdx; /* Index (main or aPrefix[] entry) */
- Fts5TermsetEntry *pNext;
- };
- struct Fts5Termset {
- Fts5TermsetEntry *apHash[512];
- };
- int sqlite3Fts5TermsetNew(Fts5Termset **pp){
- int rc = SQLITE_OK;
- *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
- return rc;
- }
- int sqlite3Fts5TermsetAdd(
- Fts5Termset *p,
- int iIdx,
- const char *pTerm, int nTerm,
- int *pbPresent
- ){
- int rc = SQLITE_OK;
- *pbPresent = 0;
- if( p ){
- int i;
- u32 hash = 13;
- Fts5TermsetEntry *pEntry;
- /* Calculate a hash value for this term. This is the same hash checksum
- ** used by the fts5_hash.c module. This is not important for correct
- ** operation of the module, but is necessary to ensure that some tests
- ** designed to produce hash table collisions really do work. */
- for(i=nTerm-1; i>=0; i--){
- hash = (hash << 3) ^ hash ^ pTerm[i];
- }
- hash = (hash << 3) ^ hash ^ iIdx;
- hash = hash % ArraySize(p->apHash);
- for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
- if( pEntry->iIdx==iIdx
- && pEntry->nTerm==nTerm
- && memcmp(pEntry->pTerm, pTerm, nTerm)==0
- ){
- *pbPresent = 1;
- break;
- }
- }
- if( pEntry==0 ){
- pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
- if( pEntry ){
- pEntry->pTerm = (char*)&pEntry[1];
- pEntry->nTerm = nTerm;
- pEntry->iIdx = iIdx;
- memcpy(pEntry->pTerm, pTerm, nTerm);
- pEntry->pNext = p->apHash[hash];
- p->apHash[hash] = pEntry;
- }
- }
- }
- return rc;
- }
- void sqlite3Fts5TermsetFree(Fts5Termset *p){
- if( p ){
- u32 i;
- for(i=0; i<ArraySize(p->apHash); i++){
- Fts5TermsetEntry *pEntry = p->apHash[i];
- while( pEntry ){
- Fts5TermsetEntry *pDel = pEntry;
- pEntry = pEntry->pNext;
- sqlite3_free(pDel);
- }
- }
- sqlite3_free(p);
- }
- }
|