fts5_varint.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. /*
  2. ** 2015 May 30
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** Routines for varint serialization and deserialization.
  14. */
  15. #include "fts5Int.h"
  16. /*
  17. ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
  18. ** Except, this version does handle the single byte case that the core
  19. ** version depends on being handled before its function is called.
  20. */
  21. int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
  22. u32 a,b;
  23. /* The 1-byte case. Overwhelmingly the most common. */
  24. a = *p;
  25. /* a: p0 (unmasked) */
  26. if (!(a&0x80))
  27. {
  28. /* Values between 0 and 127 */
  29. *v = a;
  30. return 1;
  31. }
  32. /* The 2-byte case */
  33. p++;
  34. b = *p;
  35. /* b: p1 (unmasked) */
  36. if (!(b&0x80))
  37. {
  38. /* Values between 128 and 16383 */
  39. a &= 0x7f;
  40. a = a<<7;
  41. *v = a | b;
  42. return 2;
  43. }
  44. /* The 3-byte case */
  45. p++;
  46. a = a<<14;
  47. a |= *p;
  48. /* a: p0<<14 | p2 (unmasked) */
  49. if (!(a&0x80))
  50. {
  51. /* Values between 16384 and 2097151 */
  52. a &= (0x7f<<14)|(0x7f);
  53. b &= 0x7f;
  54. b = b<<7;
  55. *v = a | b;
  56. return 3;
  57. }
  58. /* A 32-bit varint is used to store size information in btrees.
  59. ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
  60. ** A 3-byte varint is sufficient, for example, to record the size
  61. ** of a 1048569-byte BLOB or string.
  62. **
  63. ** We only unroll the first 1-, 2-, and 3- byte cases. The very
  64. ** rare larger cases can be handled by the slower 64-bit varint
  65. ** routine.
  66. */
  67. {
  68. u64 v64;
  69. u8 n;
  70. p -= 2;
  71. n = sqlite3Fts5GetVarint(p, &v64);
  72. *v = ((u32)v64) & 0x7FFFFFFF;
  73. assert( n>3 && n<=9 );
  74. return n;
  75. }
  76. }
  77. /*
  78. ** Bitmasks used by sqlite3GetVarint(). These precomputed constants
  79. ** are defined here rather than simply putting the constant expressions
  80. ** inline in order to work around bugs in the RVT compiler.
  81. **
  82. ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
  83. **
  84. ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
  85. */
  86. #define SLOT_2_0 0x001fc07f
  87. #define SLOT_4_2_0 0xf01fc07f
  88. /*
  89. ** Read a 64-bit variable-length integer from memory starting at p[0].
  90. ** Return the number of bytes read. The value is stored in *v.
  91. */
  92. u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
  93. u32 a,b,s;
  94. a = *p;
  95. /* a: p0 (unmasked) */
  96. if (!(a&0x80))
  97. {
  98. *v = a;
  99. return 1;
  100. }
  101. p++;
  102. b = *p;
  103. /* b: p1 (unmasked) */
  104. if (!(b&0x80))
  105. {
  106. a &= 0x7f;
  107. a = a<<7;
  108. a |= b;
  109. *v = a;
  110. return 2;
  111. }
  112. /* Verify that constants are precomputed correctly */
  113. assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
  114. assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
  115. p++;
  116. a = a<<14;
  117. a |= *p;
  118. /* a: p0<<14 | p2 (unmasked) */
  119. if (!(a&0x80))
  120. {
  121. a &= SLOT_2_0;
  122. b &= 0x7f;
  123. b = b<<7;
  124. a |= b;
  125. *v = a;
  126. return 3;
  127. }
  128. /* CSE1 from below */
  129. a &= SLOT_2_0;
  130. p++;
  131. b = b<<14;
  132. b |= *p;
  133. /* b: p1<<14 | p3 (unmasked) */
  134. if (!(b&0x80))
  135. {
  136. b &= SLOT_2_0;
  137. /* moved CSE1 up */
  138. /* a &= (0x7f<<14)|(0x7f); */
  139. a = a<<7;
  140. a |= b;
  141. *v = a;
  142. return 4;
  143. }
  144. /* a: p0<<14 | p2 (masked) */
  145. /* b: p1<<14 | p3 (unmasked) */
  146. /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
  147. /* moved CSE1 up */
  148. /* a &= (0x7f<<14)|(0x7f); */
  149. b &= SLOT_2_0;
  150. s = a;
  151. /* s: p0<<14 | p2 (masked) */
  152. p++;
  153. a = a<<14;
  154. a |= *p;
  155. /* a: p0<<28 | p2<<14 | p4 (unmasked) */
  156. if (!(a&0x80))
  157. {
  158. /* we can skip these cause they were (effectively) done above in calc'ing s */
  159. /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
  160. /* b &= (0x7f<<14)|(0x7f); */
  161. b = b<<7;
  162. a |= b;
  163. s = s>>18;
  164. *v = ((u64)s)<<32 | a;
  165. return 5;
  166. }
  167. /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
  168. s = s<<7;
  169. s |= b;
  170. /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
  171. p++;
  172. b = b<<14;
  173. b |= *p;
  174. /* b: p1<<28 | p3<<14 | p5 (unmasked) */
  175. if (!(b&0x80))
  176. {
  177. /* we can skip this cause it was (effectively) done above in calc'ing s */
  178. /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
  179. a &= SLOT_2_0;
  180. a = a<<7;
  181. a |= b;
  182. s = s>>18;
  183. *v = ((u64)s)<<32 | a;
  184. return 6;
  185. }
  186. p++;
  187. a = a<<14;
  188. a |= *p;
  189. /* a: p2<<28 | p4<<14 | p6 (unmasked) */
  190. if (!(a&0x80))
  191. {
  192. a &= SLOT_4_2_0;
  193. b &= SLOT_2_0;
  194. b = b<<7;
  195. a |= b;
  196. s = s>>11;
  197. *v = ((u64)s)<<32 | a;
  198. return 7;
  199. }
  200. /* CSE2 from below */
  201. a &= SLOT_2_0;
  202. p++;
  203. b = b<<14;
  204. b |= *p;
  205. /* b: p3<<28 | p5<<14 | p7 (unmasked) */
  206. if (!(b&0x80))
  207. {
  208. b &= SLOT_4_2_0;
  209. /* moved CSE2 up */
  210. /* a &= (0x7f<<14)|(0x7f); */
  211. a = a<<7;
  212. a |= b;
  213. s = s>>4;
  214. *v = ((u64)s)<<32 | a;
  215. return 8;
  216. }
  217. p++;
  218. a = a<<15;
  219. a |= *p;
  220. /* a: p4<<29 | p6<<15 | p8 (unmasked) */
  221. /* moved CSE2 up */
  222. /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
  223. b &= SLOT_2_0;
  224. b = b<<8;
  225. a |= b;
  226. s = s<<4;
  227. b = p[-4];
  228. b &= 0x7f;
  229. b = b>>3;
  230. s |= b;
  231. *v = ((u64)s)<<32 | a;
  232. return 9;
  233. }
  234. /*
  235. ** The variable-length integer encoding is as follows:
  236. **
  237. ** KEY:
  238. ** A = 0xxxxxxx 7 bits of data and one flag bit
  239. ** B = 1xxxxxxx 7 bits of data and one flag bit
  240. ** C = xxxxxxxx 8 bits of data
  241. **
  242. ** 7 bits - A
  243. ** 14 bits - BA
  244. ** 21 bits - BBA
  245. ** 28 bits - BBBA
  246. ** 35 bits - BBBBA
  247. ** 42 bits - BBBBBA
  248. ** 49 bits - BBBBBBA
  249. ** 56 bits - BBBBBBBA
  250. ** 64 bits - BBBBBBBBC
  251. */
  252. #ifdef SQLITE_NOINLINE
  253. # define FTS5_NOINLINE SQLITE_NOINLINE
  254. #else
  255. # define FTS5_NOINLINE
  256. #endif
  257. /*
  258. ** Write a 64-bit variable-length integer to memory starting at p[0].
  259. ** The length of data write will be between 1 and 9 bytes. The number
  260. ** of bytes written is returned.
  261. **
  262. ** A variable-length integer consists of the lower 7 bits of each byte
  263. ** for all bytes that have the 8th bit set and one byte with the 8th
  264. ** bit clear. Except, if we get to the 9th byte, it stores the full
  265. ** 8 bits and is the last byte.
  266. */
  267. static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
  268. int i, j, n;
  269. u8 buf[10];
  270. if( v & (((u64)0xff000000)<<32) ){
  271. p[8] = (u8)v;
  272. v >>= 8;
  273. for(i=7; i>=0; i--){
  274. p[i] = (u8)((v & 0x7f) | 0x80);
  275. v >>= 7;
  276. }
  277. return 9;
  278. }
  279. n = 0;
  280. do{
  281. buf[n++] = (u8)((v & 0x7f) | 0x80);
  282. v >>= 7;
  283. }while( v!=0 );
  284. buf[0] &= 0x7f;
  285. assert( n<=9 );
  286. for(i=0, j=n-1; j>=0; j--, i++){
  287. p[i] = buf[j];
  288. }
  289. return n;
  290. }
  291. int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
  292. if( v<=0x7f ){
  293. p[0] = v&0x7f;
  294. return 1;
  295. }
  296. if( v<=0x3fff ){
  297. p[0] = ((v>>7)&0x7f)|0x80;
  298. p[1] = v&0x7f;
  299. return 2;
  300. }
  301. return fts5PutVarint64(p,v);
  302. }
  303. int sqlite3Fts5GetVarintLen(u32 iVal){
  304. #if 0
  305. if( iVal<(1 << 7 ) ) return 1;
  306. #endif
  307. assert( iVal>=(1 << 7) );
  308. if( iVal<(1 << 14) ) return 2;
  309. if( iVal<(1 << 21) ) return 3;
  310. if( iVal<(1 << 28) ) return 4;
  311. return 5;
  312. }