123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345 |
- /*
- ** 2015 May 30
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- ******************************************************************************
- **
- ** Routines for varint serialization and deserialization.
- */
- #include "fts5Int.h"
- /*
- ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
- ** Except, this version does handle the single byte case that the core
- ** version depends on being handled before its function is called.
- */
- int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
- u32 a,b;
- /* The 1-byte case. Overwhelmingly the most common. */
- a = *p;
- /* a: p0 (unmasked) */
- if (!(a&0x80))
- {
- /* Values between 0 and 127 */
- *v = a;
- return 1;
- }
- /* The 2-byte case */
- p++;
- b = *p;
- /* b: p1 (unmasked) */
- if (!(b&0x80))
- {
- /* Values between 128 and 16383 */
- a &= 0x7f;
- a = a<<7;
- *v = a | b;
- return 2;
- }
- /* The 3-byte case */
- p++;
- a = a<<14;
- a |= *p;
- /* a: p0<<14 | p2 (unmasked) */
- if (!(a&0x80))
- {
- /* Values between 16384 and 2097151 */
- a &= (0x7f<<14)|(0x7f);
- b &= 0x7f;
- b = b<<7;
- *v = a | b;
- return 3;
- }
- /* A 32-bit varint is used to store size information in btrees.
- ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
- ** A 3-byte varint is sufficient, for example, to record the size
- ** of a 1048569-byte BLOB or string.
- **
- ** We only unroll the first 1-, 2-, and 3- byte cases. The very
- ** rare larger cases can be handled by the slower 64-bit varint
- ** routine.
- */
- {
- u64 v64;
- u8 n;
- p -= 2;
- n = sqlite3Fts5GetVarint(p, &v64);
- *v = ((u32)v64) & 0x7FFFFFFF;
- assert( n>3 && n<=9 );
- return n;
- }
- }
- /*
- ** Bitmasks used by sqlite3GetVarint(). These precomputed constants
- ** are defined here rather than simply putting the constant expressions
- ** inline in order to work around bugs in the RVT compiler.
- **
- ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
- **
- ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
- */
- #define SLOT_2_0 0x001fc07f
- #define SLOT_4_2_0 0xf01fc07f
- /*
- ** Read a 64-bit variable-length integer from memory starting at p[0].
- ** Return the number of bytes read. The value is stored in *v.
- */
- u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
- u32 a,b,s;
- a = *p;
- /* a: p0 (unmasked) */
- if (!(a&0x80))
- {
- *v = a;
- return 1;
- }
- p++;
- b = *p;
- /* b: p1 (unmasked) */
- if (!(b&0x80))
- {
- a &= 0x7f;
- a = a<<7;
- a |= b;
- *v = a;
- return 2;
- }
- /* Verify that constants are precomputed correctly */
- assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
- assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
- p++;
- a = a<<14;
- a |= *p;
- /* a: p0<<14 | p2 (unmasked) */
- if (!(a&0x80))
- {
- a &= SLOT_2_0;
- b &= 0x7f;
- b = b<<7;
- a |= b;
- *v = a;
- return 3;
- }
- /* CSE1 from below */
- a &= SLOT_2_0;
- p++;
- b = b<<14;
- b |= *p;
- /* b: p1<<14 | p3 (unmasked) */
- if (!(b&0x80))
- {
- b &= SLOT_2_0;
- /* moved CSE1 up */
- /* a &= (0x7f<<14)|(0x7f); */
- a = a<<7;
- a |= b;
- *v = a;
- return 4;
- }
- /* a: p0<<14 | p2 (masked) */
- /* b: p1<<14 | p3 (unmasked) */
- /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
- /* moved CSE1 up */
- /* a &= (0x7f<<14)|(0x7f); */
- b &= SLOT_2_0;
- s = a;
- /* s: p0<<14 | p2 (masked) */
- p++;
- a = a<<14;
- a |= *p;
- /* a: p0<<28 | p2<<14 | p4 (unmasked) */
- if (!(a&0x80))
- {
- /* we can skip these cause they were (effectively) done above in calc'ing s */
- /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
- /* b &= (0x7f<<14)|(0x7f); */
- b = b<<7;
- a |= b;
- s = s>>18;
- *v = ((u64)s)<<32 | a;
- return 5;
- }
- /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
- s = s<<7;
- s |= b;
- /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
- p++;
- b = b<<14;
- b |= *p;
- /* b: p1<<28 | p3<<14 | p5 (unmasked) */
- if (!(b&0x80))
- {
- /* we can skip this cause it was (effectively) done above in calc'ing s */
- /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
- a &= SLOT_2_0;
- a = a<<7;
- a |= b;
- s = s>>18;
- *v = ((u64)s)<<32 | a;
- return 6;
- }
- p++;
- a = a<<14;
- a |= *p;
- /* a: p2<<28 | p4<<14 | p6 (unmasked) */
- if (!(a&0x80))
- {
- a &= SLOT_4_2_0;
- b &= SLOT_2_0;
- b = b<<7;
- a |= b;
- s = s>>11;
- *v = ((u64)s)<<32 | a;
- return 7;
- }
- /* CSE2 from below */
- a &= SLOT_2_0;
- p++;
- b = b<<14;
- b |= *p;
- /* b: p3<<28 | p5<<14 | p7 (unmasked) */
- if (!(b&0x80))
- {
- b &= SLOT_4_2_0;
- /* moved CSE2 up */
- /* a &= (0x7f<<14)|(0x7f); */
- a = a<<7;
- a |= b;
- s = s>>4;
- *v = ((u64)s)<<32 | a;
- return 8;
- }
- p++;
- a = a<<15;
- a |= *p;
- /* a: p4<<29 | p6<<15 | p8 (unmasked) */
- /* moved CSE2 up */
- /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
- b &= SLOT_2_0;
- b = b<<8;
- a |= b;
- s = s<<4;
- b = p[-4];
- b &= 0x7f;
- b = b>>3;
- s |= b;
- *v = ((u64)s)<<32 | a;
- return 9;
- }
- /*
- ** The variable-length integer encoding is as follows:
- **
- ** KEY:
- ** A = 0xxxxxxx 7 bits of data and one flag bit
- ** B = 1xxxxxxx 7 bits of data and one flag bit
- ** C = xxxxxxxx 8 bits of data
- **
- ** 7 bits - A
- ** 14 bits - BA
- ** 21 bits - BBA
- ** 28 bits - BBBA
- ** 35 bits - BBBBA
- ** 42 bits - BBBBBA
- ** 49 bits - BBBBBBA
- ** 56 bits - BBBBBBBA
- ** 64 bits - BBBBBBBBC
- */
- #ifdef SQLITE_NOINLINE
- # define FTS5_NOINLINE SQLITE_NOINLINE
- #else
- # define FTS5_NOINLINE
- #endif
- /*
- ** Write a 64-bit variable-length integer to memory starting at p[0].
- ** The length of data write will be between 1 and 9 bytes. The number
- ** of bytes written is returned.
- **
- ** A variable-length integer consists of the lower 7 bits of each byte
- ** for all bytes that have the 8th bit set and one byte with the 8th
- ** bit clear. Except, if we get to the 9th byte, it stores the full
- ** 8 bits and is the last byte.
- */
- static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
- int i, j, n;
- u8 buf[10];
- if( v & (((u64)0xff000000)<<32) ){
- p[8] = (u8)v;
- v >>= 8;
- for(i=7; i>=0; i--){
- p[i] = (u8)((v & 0x7f) | 0x80);
- v >>= 7;
- }
- return 9;
- }
- n = 0;
- do{
- buf[n++] = (u8)((v & 0x7f) | 0x80);
- v >>= 7;
- }while( v!=0 );
- buf[0] &= 0x7f;
- assert( n<=9 );
- for(i=0, j=n-1; j>=0; j--, i++){
- p[i] = buf[j];
- }
- return n;
- }
- int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
- if( v<=0x7f ){
- p[0] = v&0x7f;
- return 1;
- }
- if( v<=0x3fff ){
- p[0] = ((v>>7)&0x7f)|0x80;
- p[1] = v&0x7f;
- return 2;
- }
- return fts5PutVarint64(p,v);
- }
- int sqlite3Fts5GetVarintLen(u32 iVal){
- #if 0
- if( iVal<(1 << 7 ) ) return 1;
- #endif
- assert( iVal>=(1 << 7) );
- if( iVal<(1 << 14) ) return 2;
- if( iVal<(1 << 21) ) return 3;
- if( iVal<(1 << 28) ) return 4;
- return 5;
- }
|