fossildelta.c 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094
  1. /*
  2. ** 2019-02-19
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This SQLite extension implements the delta functions used by the RBU
  14. ** extension. Three scalar functions and one table-valued function are
  15. ** implemented here:
  16. **
  17. ** delta_apply(X,D) -- apply delta D to file X and return the result
  18. ** delta_create(X,Y) -- compute and return a delta that carries X into Y
  19. ** delta_output_size(D) -- blob size in bytes output from applying delta D
  20. ** delta_parse(D) -- returns rows describing delta D
  21. **
  22. ** The delta format is the Fossil delta format, described in a comment
  23. ** on the delete_create() function implementation below, and also at
  24. **
  25. ** https://www.fossil-scm.org/fossil/doc/trunk/www/delta_format.wiki
  26. **
  27. ** This delta format is used by the RBU extension, which is the main
  28. ** reason that these routines are included in the extension library.
  29. ** RBU does not use this extension directly. Rather, this extension is
  30. ** provided as a convenience to developers who want to analyze RBU files
  31. ** that contain deltas.
  32. */
  33. #include <string.h>
  34. #include <assert.h>
  35. #include <stdlib.h>
  36. #include "sqlite3ext.h"
  37. SQLITE_EXTENSION_INIT1
  38. #ifndef SQLITE_AMALGAMATION
  39. /*
  40. ** The "u32" type must be an unsigned 32-bit integer. Adjust this
  41. */
  42. typedef unsigned int u32;
  43. /*
  44. ** Must be a 16-bit value
  45. */
  46. typedef short int s16;
  47. typedef unsigned short int u16;
  48. #endif /* SQLITE_AMALGAMATION */
  49. /*
  50. ** The width of a hash window in bytes. The algorithm only works if this
  51. ** is a power of 2.
  52. */
  53. #define NHASH 16
  54. /*
  55. ** The current state of the rolling hash.
  56. **
  57. ** z[] holds the values that have been hashed. z[] is a circular buffer.
  58. ** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
  59. ** the window.
  60. **
  61. ** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted
  62. ** sum. Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1.
  63. ** (Each index for z[] should be module NHASH, of course. The %NHASH operator
  64. ** is omitted in the prior expression for brevity.)
  65. */
  66. typedef struct hash hash;
  67. struct hash {
  68. u16 a, b; /* Hash values */
  69. u16 i; /* Start of the hash window */
  70. char z[NHASH]; /* The values that have been hashed */
  71. };
  72. /*
  73. ** Initialize the rolling hash using the first NHASH characters of z[]
  74. */
  75. static void hash_init(hash *pHash, const char *z){
  76. u16 a, b, i;
  77. a = b = z[0];
  78. for(i=1; i<NHASH; i++){
  79. a += z[i];
  80. b += a;
  81. }
  82. memcpy(pHash->z, z, NHASH);
  83. pHash->a = a & 0xffff;
  84. pHash->b = b & 0xffff;
  85. pHash->i = 0;
  86. }
  87. /*
  88. ** Advance the rolling hash by a single character "c"
  89. */
  90. static void hash_next(hash *pHash, int c){
  91. u16 old = pHash->z[pHash->i];
  92. pHash->z[pHash->i] = c;
  93. pHash->i = (pHash->i+1)&(NHASH-1);
  94. pHash->a = pHash->a - old + c;
  95. pHash->b = pHash->b - NHASH*old + pHash->a;
  96. }
  97. /*
  98. ** Return a 32-bit hash value
  99. */
  100. static u32 hash_32bit(hash *pHash){
  101. return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16);
  102. }
  103. /*
  104. ** Compute a hash on NHASH bytes.
  105. **
  106. ** This routine is intended to be equivalent to:
  107. ** hash h;
  108. ** hash_init(&h, zInput);
  109. ** return hash_32bit(&h);
  110. */
  111. static u32 hash_once(const char *z){
  112. u16 a, b, i;
  113. a = b = z[0];
  114. for(i=1; i<NHASH; i++){
  115. a += z[i];
  116. b += a;
  117. }
  118. return a | (((u32)b)<<16);
  119. }
  120. /*
  121. ** Write an base-64 integer into the given buffer.
  122. */
  123. static void putInt(unsigned int v, char **pz){
  124. static const char zDigits[] =
  125. "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
  126. /* 123456789 123456789 123456789 123456789 123456789 123456789 123 */
  127. int i, j;
  128. char zBuf[20];
  129. if( v==0 ){
  130. *(*pz)++ = '0';
  131. return;
  132. }
  133. for(i=0; v>0; i++, v>>=6){
  134. zBuf[i] = zDigits[v&0x3f];
  135. }
  136. for(j=i-1; j>=0; j--){
  137. *(*pz)++ = zBuf[j];
  138. }
  139. }
  140. /*
  141. ** Read bytes from *pz and convert them into a positive integer. When
  142. ** finished, leave *pz pointing to the first character past the end of
  143. ** the integer. The *pLen parameter holds the length of the string
  144. ** in *pz and is decremented once for each character in the integer.
  145. */
  146. static unsigned int deltaGetInt(const char **pz, int *pLen){
  147. static const signed char zValue[] = {
  148. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  149. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  150. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  151. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
  152. -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
  153. 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36,
  154. -1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
  155. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1,
  156. };
  157. unsigned int v = 0;
  158. int c;
  159. unsigned char *z = (unsigned char*)*pz;
  160. unsigned char *zStart = z;
  161. while( (c = zValue[0x7f&*(z++)])>=0 ){
  162. v = (v<<6) + c;
  163. }
  164. z--;
  165. *pLen -= z - zStart;
  166. *pz = (char*)z;
  167. return v;
  168. }
  169. /*
  170. ** Return the number digits in the base-64 representation of a positive integer
  171. */
  172. static int digit_count(int v){
  173. unsigned int i, x;
  174. for(i=1, x=64; v>=x; i++, x <<= 6){}
  175. return i;
  176. }
  177. #ifdef __GNUC__
  178. # define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
  179. #else
  180. # define GCC_VERSION 0
  181. #endif
  182. /*
  183. ** Compute a 32-bit big-endian checksum on the N-byte buffer. If the
  184. ** buffer is not a multiple of 4 bytes length, compute the sum that would
  185. ** have occurred if the buffer was padded with zeros to the next multiple
  186. ** of four bytes.
  187. */
  188. static unsigned int checksum(const char *zIn, size_t N){
  189. static const int byteOrderTest = 1;
  190. const unsigned char *z = (const unsigned char *)zIn;
  191. const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3];
  192. unsigned sum = 0;
  193. assert( (z - (const unsigned char*)0)%4==0 ); /* Four-byte alignment */
  194. if( 0==*(char*)&byteOrderTest ){
  195. /* This is a big-endian machine */
  196. while( z<zEnd ){
  197. sum += *(unsigned*)z;
  198. z += 4;
  199. }
  200. }else{
  201. /* A little-endian machine */
  202. #if GCC_VERSION>=4003000
  203. while( z<zEnd ){
  204. sum += __builtin_bswap32(*(unsigned*)z);
  205. z += 4;
  206. }
  207. #elif defined(_MSC_VER) && _MSC_VER>=1300
  208. while( z<zEnd ){
  209. sum += _byteswap_ulong(*(unsigned*)z);
  210. z += 4;
  211. }
  212. #else
  213. unsigned sum0 = 0;
  214. unsigned sum1 = 0;
  215. unsigned sum2 = 0;
  216. while(N >= 16){
  217. sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]);
  218. sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]);
  219. sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]);
  220. sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
  221. z += 16;
  222. N -= 16;
  223. }
  224. while(N >= 4){
  225. sum0 += z[0];
  226. sum1 += z[1];
  227. sum2 += z[2];
  228. sum += z[3];
  229. z += 4;
  230. N -= 4;
  231. }
  232. sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24);
  233. #endif
  234. }
  235. switch(N&3){
  236. case 3: sum += (z[2] << 8);
  237. case 2: sum += (z[1] << 16);
  238. case 1: sum += (z[0] << 24);
  239. default: ;
  240. }
  241. return sum;
  242. }
  243. /*
  244. ** Create a new delta.
  245. **
  246. ** The delta is written into a preallocated buffer, zDelta, which
  247. ** should be at least 60 bytes longer than the target file, zOut.
  248. ** The delta string will be NUL-terminated, but it might also contain
  249. ** embedded NUL characters if either the zSrc or zOut files are
  250. ** binary. This function returns the length of the delta string
  251. ** in bytes, excluding the final NUL terminator character.
  252. **
  253. ** Output Format:
  254. **
  255. ** The delta begins with a base64 number followed by a newline. This
  256. ** number is the number of bytes in the TARGET file. Thus, given a
  257. ** delta file z, a program can compute the size of the output file
  258. ** simply by reading the first line and decoding the base-64 number
  259. ** found there. The delta_output_size() routine does exactly this.
  260. **
  261. ** After the initial size number, the delta consists of a series of
  262. ** literal text segments and commands to copy from the SOURCE file.
  263. ** A copy command looks like this:
  264. **
  265. ** NNN@MMM,
  266. **
  267. ** where NNN is the number of bytes to be copied and MMM is the offset
  268. ** into the source file of the first byte (both base-64). If NNN is 0
  269. ** it means copy the rest of the input file. Literal text is like this:
  270. **
  271. ** NNN:TTTTT
  272. **
  273. ** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
  274. **
  275. ** The last term is of the form
  276. **
  277. ** NNN;
  278. **
  279. ** In this case, NNN is a 32-bit bigendian checksum of the output file
  280. ** that can be used to verify that the delta applied correctly. All
  281. ** numbers are in base-64.
  282. **
  283. ** Pure text files generate a pure text delta. Binary files generate a
  284. ** delta that may contain some binary data.
  285. **
  286. ** Algorithm:
  287. **
  288. ** The encoder first builds a hash table to help it find matching
  289. ** patterns in the source file. 16-byte chunks of the source file
  290. ** sampled at evenly spaced intervals are used to populate the hash
  291. ** table.
  292. **
  293. ** Next we begin scanning the target file using a sliding 16-byte
  294. ** window. The hash of the 16-byte window in the target is used to
  295. ** search for a matching section in the source file. When a match
  296. ** is found, a copy command is added to the delta. An effort is
  297. ** made to extend the matching section to regions that come before
  298. ** and after the 16-byte hash window. A copy command is only issued
  299. ** if the result would use less space that just quoting the text
  300. ** literally. Literal text is added to the delta for sections that
  301. ** do not match or which can not be encoded efficiently using copy
  302. ** commands.
  303. */
  304. static int delta_create(
  305. const char *zSrc, /* The source or pattern file */
  306. unsigned int lenSrc, /* Length of the source file */
  307. const char *zOut, /* The target file */
  308. unsigned int lenOut, /* Length of the target file */
  309. char *zDelta /* Write the delta into this buffer */
  310. ){
  311. int i, base;
  312. char *zOrigDelta = zDelta;
  313. hash h;
  314. int nHash; /* Number of hash table entries */
  315. int *landmark; /* Primary hash table */
  316. int *collide; /* Collision chain */
  317. int lastRead = -1; /* Last byte of zSrc read by a COPY command */
  318. /* Add the target file size to the beginning of the delta
  319. */
  320. putInt(lenOut, &zDelta);
  321. *(zDelta++) = '\n';
  322. /* If the source file is very small, it means that we have no
  323. ** chance of ever doing a copy command. Just output a single
  324. ** literal segment for the entire target and exit.
  325. */
  326. if( lenSrc<=NHASH ){
  327. putInt(lenOut, &zDelta);
  328. *(zDelta++) = ':';
  329. memcpy(zDelta, zOut, lenOut);
  330. zDelta += lenOut;
  331. putInt(checksum(zOut, lenOut), &zDelta);
  332. *(zDelta++) = ';';
  333. return zDelta - zOrigDelta;
  334. }
  335. /* Compute the hash table used to locate matching sections in the
  336. ** source file.
  337. */
  338. nHash = lenSrc/NHASH;
  339. collide = sqlite3_malloc64( (sqlite3_int64)nHash*2*sizeof(int) );
  340. memset(collide, -1, nHash*2*sizeof(int));
  341. landmark = &collide[nHash];
  342. for(i=0; i<lenSrc-NHASH; i+=NHASH){
  343. int hv = hash_once(&zSrc[i]) % nHash;
  344. collide[i/NHASH] = landmark[hv];
  345. landmark[hv] = i/NHASH;
  346. }
  347. /* Begin scanning the target file and generating copy commands and
  348. ** literal sections of the delta.
  349. */
  350. base = 0; /* We have already generated everything before zOut[base] */
  351. while( base+NHASH<lenOut ){
  352. int iSrc, iBlock;
  353. unsigned int bestCnt, bestOfst=0, bestLitsz=0;
  354. hash_init(&h, &zOut[base]);
  355. i = 0; /* Trying to match a landmark against zOut[base+i] */
  356. bestCnt = 0;
  357. while( 1 ){
  358. int hv;
  359. int limit = 250;
  360. hv = hash_32bit(&h) % nHash;
  361. iBlock = landmark[hv];
  362. while( iBlock>=0 && (limit--)>0 ){
  363. /*
  364. ** The hash window has identified a potential match against
  365. ** landmark block iBlock. But we need to investigate further.
  366. **
  367. ** Look for a region in zOut that matches zSrc. Anchor the search
  368. ** at zSrc[iSrc] and zOut[base+i]. Do not include anything prior to
  369. ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
  370. **
  371. ** Set cnt equal to the length of the match and set ofst so that
  372. ** zSrc[ofst] is the first element of the match. litsz is the number
  373. ** of characters between zOut[base] and the beginning of the match.
  374. ** sz will be the overhead (in bytes) needed to encode the copy
  375. ** command. Only generate copy command if the overhead of the
  376. ** copy command is less than the amount of literal text to be copied.
  377. */
  378. int cnt, ofst, litsz;
  379. int j, k, x, y;
  380. int sz;
  381. int limitX;
  382. /* Beginning at iSrc, match forwards as far as we can. j counts
  383. ** the number of characters that match */
  384. iSrc = iBlock*NHASH;
  385. y = base+i;
  386. limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
  387. for(x=iSrc; x<limitX; x++, y++){
  388. if( zSrc[x]!=zOut[y] ) break;
  389. }
  390. j = x - iSrc - 1;
  391. /* Beginning at iSrc-1, match backwards as far as we can. k counts
  392. ** the number of characters that match */
  393. for(k=1; k<iSrc && k<=i; k++){
  394. if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
  395. }
  396. k--;
  397. /* Compute the offset and size of the matching region */
  398. ofst = iSrc-k;
  399. cnt = j+k+1;
  400. litsz = i-k; /* Number of bytes of literal text before the copy */
  401. /* sz will hold the number of bytes needed to encode the "insert"
  402. ** command and the copy command, not counting the "insert" text */
  403. sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
  404. if( cnt>=sz && cnt>bestCnt ){
  405. /* Remember this match only if it is the best so far and it
  406. ** does not increase the file size */
  407. bestCnt = cnt;
  408. bestOfst = iSrc-k;
  409. bestLitsz = litsz;
  410. }
  411. /* Check the next matching block */
  412. iBlock = collide[iBlock];
  413. }
  414. /* We have a copy command that does not cause the delta to be larger
  415. ** than a literal insert. So add the copy command to the delta.
  416. */
  417. if( bestCnt>0 ){
  418. if( bestLitsz>0 ){
  419. /* Add an insert command before the copy */
  420. putInt(bestLitsz,&zDelta);
  421. *(zDelta++) = ':';
  422. memcpy(zDelta, &zOut[base], bestLitsz);
  423. zDelta += bestLitsz;
  424. base += bestLitsz;
  425. }
  426. base += bestCnt;
  427. putInt(bestCnt, &zDelta);
  428. *(zDelta++) = '@';
  429. putInt(bestOfst, &zDelta);
  430. *(zDelta++) = ',';
  431. if( bestOfst + bestCnt -1 > lastRead ){
  432. lastRead = bestOfst + bestCnt - 1;
  433. }
  434. bestCnt = 0;
  435. break;
  436. }
  437. /* If we reach this point, it means no match is found so far */
  438. if( base+i+NHASH>=lenOut ){
  439. /* We have reached the end of the file and have not found any
  440. ** matches. Do an "insert" for everything that does not match */
  441. putInt(lenOut-base, &zDelta);
  442. *(zDelta++) = ':';
  443. memcpy(zDelta, &zOut[base], lenOut-base);
  444. zDelta += lenOut-base;
  445. base = lenOut;
  446. break;
  447. }
  448. /* Advance the hash by one character. Keep looking for a match */
  449. hash_next(&h, zOut[base+i+NHASH]);
  450. i++;
  451. }
  452. }
  453. /* Output a final "insert" record to get all the text at the end of
  454. ** the file that does not match anything in the source file.
  455. */
  456. if( base<lenOut ){
  457. putInt(lenOut-base, &zDelta);
  458. *(zDelta++) = ':';
  459. memcpy(zDelta, &zOut[base], lenOut-base);
  460. zDelta += lenOut-base;
  461. }
  462. /* Output the final checksum record. */
  463. putInt(checksum(zOut, lenOut), &zDelta);
  464. *(zDelta++) = ';';
  465. sqlite3_free(collide);
  466. return zDelta - zOrigDelta;
  467. }
  468. /*
  469. ** Return the size (in bytes) of the output from applying
  470. ** a delta.
  471. **
  472. ** This routine is provided so that an procedure that is able
  473. ** to call delta_apply() can learn how much space is required
  474. ** for the output and hence allocate nor more space that is really
  475. ** needed.
  476. */
  477. static int delta_output_size(const char *zDelta, int lenDelta){
  478. int size;
  479. size = deltaGetInt(&zDelta, &lenDelta);
  480. if( *zDelta!='\n' ){
  481. /* ERROR: size integer not terminated by "\n" */
  482. return -1;
  483. }
  484. return size;
  485. }
  486. /*
  487. ** Apply a delta.
  488. **
  489. ** The output buffer should be big enough to hold the whole output
  490. ** file and a NUL terminator at the end. The delta_output_size()
  491. ** routine will determine this size for you.
  492. **
  493. ** The delta string should be null-terminated. But the delta string
  494. ** may contain embedded NUL characters (if the input and output are
  495. ** binary files) so we also have to pass in the length of the delta in
  496. ** the lenDelta parameter.
  497. **
  498. ** This function returns the size of the output file in bytes (excluding
  499. ** the final NUL terminator character). Except, if the delta string is
  500. ** malformed or intended for use with a source file other than zSrc,
  501. ** then this routine returns -1.
  502. **
  503. ** Refer to the delta_create() documentation above for a description
  504. ** of the delta file format.
  505. */
  506. static int delta_apply(
  507. const char *zSrc, /* The source or pattern file */
  508. int lenSrc, /* Length of the source file */
  509. const char *zDelta, /* Delta to apply to the pattern */
  510. int lenDelta, /* Length of the delta */
  511. char *zOut /* Write the output into this preallocated buffer */
  512. ){
  513. unsigned int limit;
  514. unsigned int total = 0;
  515. #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
  516. char *zOrigOut = zOut;
  517. #endif
  518. limit = deltaGetInt(&zDelta, &lenDelta);
  519. if( *zDelta!='\n' ){
  520. /* ERROR: size integer not terminated by "\n" */
  521. return -1;
  522. }
  523. zDelta++; lenDelta--;
  524. while( *zDelta && lenDelta>0 ){
  525. unsigned int cnt, ofst;
  526. cnt = deltaGetInt(&zDelta, &lenDelta);
  527. switch( zDelta[0] ){
  528. case '@': {
  529. zDelta++; lenDelta--;
  530. ofst = deltaGetInt(&zDelta, &lenDelta);
  531. if( lenDelta>0 && zDelta[0]!=',' ){
  532. /* ERROR: copy command not terminated by ',' */
  533. return -1;
  534. }
  535. zDelta++; lenDelta--;
  536. total += cnt;
  537. if( total>limit ){
  538. /* ERROR: copy exceeds output file size */
  539. return -1;
  540. }
  541. if( ofst+cnt > lenSrc ){
  542. /* ERROR: copy extends past end of input */
  543. return -1;
  544. }
  545. memcpy(zOut, &zSrc[ofst], cnt);
  546. zOut += cnt;
  547. break;
  548. }
  549. case ':': {
  550. zDelta++; lenDelta--;
  551. total += cnt;
  552. if( total>limit ){
  553. /* ERROR: insert command gives an output larger than predicted */
  554. return -1;
  555. }
  556. if( cnt>lenDelta ){
  557. /* ERROR: insert count exceeds size of delta */
  558. return -1;
  559. }
  560. memcpy(zOut, zDelta, cnt);
  561. zOut += cnt;
  562. zDelta += cnt;
  563. lenDelta -= cnt;
  564. break;
  565. }
  566. case ';': {
  567. zDelta++; lenDelta--;
  568. zOut[0] = 0;
  569. #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
  570. if( cnt!=checksum(zOrigOut, total) ){
  571. /* ERROR: bad checksum */
  572. return -1;
  573. }
  574. #endif
  575. if( total!=limit ){
  576. /* ERROR: generated size does not match predicted size */
  577. return -1;
  578. }
  579. return total;
  580. }
  581. default: {
  582. /* ERROR: unknown delta operator */
  583. return -1;
  584. }
  585. }
  586. }
  587. /* ERROR: unterminated delta */
  588. return -1;
  589. }
  590. /*
  591. ** SQL functions: delta_create(X,Y)
  592. **
  593. ** Return a delta for carrying X into Y.
  594. */
  595. static void deltaCreateFunc(
  596. sqlite3_context *context,
  597. int argc,
  598. sqlite3_value **argv
  599. ){
  600. const char *aOrig; int nOrig; /* old blob */
  601. const char *aNew; int nNew; /* new blob */
  602. char *aOut; int nOut; /* output delta */
  603. assert( argc==2 );
  604. if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
  605. if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
  606. nOrig = sqlite3_value_bytes(argv[0]);
  607. aOrig = (const char*)sqlite3_value_blob(argv[0]);
  608. nNew = sqlite3_value_bytes(argv[1]);
  609. aNew = (const char*)sqlite3_value_blob(argv[1]);
  610. aOut = sqlite3_malloc64(nNew+70);
  611. if( aOut==0 ){
  612. sqlite3_result_error_nomem(context);
  613. }else{
  614. nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut);
  615. if( nOut<0 ){
  616. sqlite3_free(aOut);
  617. sqlite3_result_error(context, "cannot create fossil delta", -1);
  618. }else{
  619. sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
  620. }
  621. }
  622. }
  623. /*
  624. ** SQL functions: delta_apply(X,D)
  625. **
  626. ** Return the result of applying delta D to input X.
  627. */
  628. static void deltaApplyFunc(
  629. sqlite3_context *context,
  630. int argc,
  631. sqlite3_value **argv
  632. ){
  633. const char *aOrig; int nOrig; /* The X input */
  634. const char *aDelta; int nDelta; /* The input delta (D) */
  635. char *aOut; int nOut, nOut2; /* The output */
  636. assert( argc==2 );
  637. if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
  638. if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
  639. nOrig = sqlite3_value_bytes(argv[0]);
  640. aOrig = (const char*)sqlite3_value_blob(argv[0]);
  641. nDelta = sqlite3_value_bytes(argv[1]);
  642. aDelta = (const char*)sqlite3_value_blob(argv[1]);
  643. /* Figure out the size of the output */
  644. nOut = delta_output_size(aDelta, nDelta);
  645. if( nOut<0 ){
  646. sqlite3_result_error(context, "corrupt fossil delta", -1);
  647. return;
  648. }
  649. aOut = sqlite3_malloc64((sqlite3_int64)nOut+1);
  650. if( aOut==0 ){
  651. sqlite3_result_error_nomem(context);
  652. }else{
  653. nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut);
  654. if( nOut2!=nOut ){
  655. sqlite3_free(aOut);
  656. sqlite3_result_error(context, "corrupt fossil delta", -1);
  657. }else{
  658. sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
  659. }
  660. }
  661. }
  662. /*
  663. ** SQL functions: delta_output_size(D)
  664. **
  665. ** Return the size of the output that results from applying delta D.
  666. */
  667. static void deltaOutputSizeFunc(
  668. sqlite3_context *context,
  669. int argc,
  670. sqlite3_value **argv
  671. ){
  672. const char *aDelta; int nDelta; /* The input delta (D) */
  673. int nOut; /* Size of output */
  674. assert( argc==1 );
  675. if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
  676. nDelta = sqlite3_value_bytes(argv[0]);
  677. aDelta = (const char*)sqlite3_value_blob(argv[0]);
  678. /* Figure out the size of the output */
  679. nOut = delta_output_size(aDelta, nDelta);
  680. if( nOut<0 ){
  681. sqlite3_result_error(context, "corrupt fossil delta", -1);
  682. return;
  683. }else{
  684. sqlite3_result_int(context, nOut);
  685. }
  686. }
  687. /*****************************************************************************
  688. ** Table-valued SQL function: delta_parse(DELTA)
  689. **
  690. ** Schema:
  691. **
  692. ** CREATE TABLE delta_parse(
  693. ** op TEXT,
  694. ** a1 INT,
  695. ** a2 ANY,
  696. ** delta HIDDEN BLOB
  697. ** );
  698. **
  699. ** Given an input DELTA, this function parses the delta and returns
  700. ** rows for each entry in the delta. The op column has one of the
  701. ** values SIZE, COPY, INSERT, CHECKSUM, ERROR.
  702. **
  703. ** Assuming no errors, the first row has op='SIZE'. a1 is the size of
  704. ** the output in bytes and a2 is NULL.
  705. **
  706. ** After the initial SIZE row, there are zero or more 'COPY' and/or 'INSERT'
  707. ** rows. A COPY row means content is copied from the source into the
  708. ** output. Column a1 is the number of bytes to copy and a2 is the offset
  709. ** into source from which to begin copying. An INSERT row means to
  710. ** insert text into the output stream. Column a1 is the number of bytes
  711. ** to insert and column is a BLOB that contains the text to be inserted.
  712. **
  713. ** The last row of a well-formed delta will have an op value of 'CHECKSUM'.
  714. ** The a1 column will be the value of the checksum and a2 will be NULL.
  715. **
  716. ** If the input delta is not well-formed, then a row with an op value
  717. ** of 'ERROR' is returned. The a1 value of the ERROR row is the offset
  718. ** into the delta where the error was encountered and a2 is NULL.
  719. */
  720. typedef struct deltaparsevtab_vtab deltaparsevtab_vtab;
  721. typedef struct deltaparsevtab_cursor deltaparsevtab_cursor;
  722. struct deltaparsevtab_vtab {
  723. sqlite3_vtab base; /* Base class - must be first */
  724. /* No additional information needed */
  725. };
  726. struct deltaparsevtab_cursor {
  727. sqlite3_vtab_cursor base; /* Base class - must be first */
  728. char *aDelta; /* The delta being parsed */
  729. int nDelta; /* Number of bytes in the delta */
  730. int iCursor; /* Current cursor location */
  731. int eOp; /* Name of current operator */
  732. unsigned int a1, a2; /* Arguments to current operator */
  733. int iNext; /* Next cursor value */
  734. };
  735. /* Operator names:
  736. */
  737. static const char *azOp[] = {
  738. "SIZE", "COPY", "INSERT", "CHECKSUM", "ERROR", "EOF"
  739. };
  740. #define DELTAPARSE_OP_SIZE 0
  741. #define DELTAPARSE_OP_COPY 1
  742. #define DELTAPARSE_OP_INSERT 2
  743. #define DELTAPARSE_OP_CHECKSUM 3
  744. #define DELTAPARSE_OP_ERROR 4
  745. #define DELTAPARSE_OP_EOF 5
  746. /*
  747. ** The deltaparsevtabConnect() method is invoked to create a new
  748. ** deltaparse virtual table.
  749. **
  750. ** Think of this routine as the constructor for deltaparsevtab_vtab objects.
  751. **
  752. ** All this routine needs to do is:
  753. **
  754. ** (1) Allocate the deltaparsevtab_vtab object and initialize all fields.
  755. **
  756. ** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the
  757. ** result set of queries against the virtual table will look like.
  758. */
  759. static int deltaparsevtabConnect(
  760. sqlite3 *db,
  761. void *pAux,
  762. int argc, const char *const*argv,
  763. sqlite3_vtab **ppVtab,
  764. char **pzErr
  765. ){
  766. deltaparsevtab_vtab *pNew;
  767. int rc;
  768. rc = sqlite3_declare_vtab(db,
  769. "CREATE TABLE x(op,a1,a2,delta HIDDEN)"
  770. );
  771. /* For convenience, define symbolic names for the index to each column. */
  772. #define DELTAPARSEVTAB_OP 0
  773. #define DELTAPARSEVTAB_A1 1
  774. #define DELTAPARSEVTAB_A2 2
  775. #define DELTAPARSEVTAB_DELTA 3
  776. if( rc==SQLITE_OK ){
  777. pNew = sqlite3_malloc64( sizeof(*pNew) );
  778. *ppVtab = (sqlite3_vtab*)pNew;
  779. if( pNew==0 ) return SQLITE_NOMEM;
  780. memset(pNew, 0, sizeof(*pNew));
  781. sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
  782. }
  783. return rc;
  784. }
  785. /*
  786. ** This method is the destructor for deltaparsevtab_vtab objects.
  787. */
  788. static int deltaparsevtabDisconnect(sqlite3_vtab *pVtab){
  789. deltaparsevtab_vtab *p = (deltaparsevtab_vtab*)pVtab;
  790. sqlite3_free(p);
  791. return SQLITE_OK;
  792. }
  793. /*
  794. ** Constructor for a new deltaparsevtab_cursor object.
  795. */
  796. static int deltaparsevtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
  797. deltaparsevtab_cursor *pCur;
  798. pCur = sqlite3_malloc( sizeof(*pCur) );
  799. if( pCur==0 ) return SQLITE_NOMEM;
  800. memset(pCur, 0, sizeof(*pCur));
  801. *ppCursor = &pCur->base;
  802. return SQLITE_OK;
  803. }
  804. /*
  805. ** Destructor for a deltaparsevtab_cursor.
  806. */
  807. static int deltaparsevtabClose(sqlite3_vtab_cursor *cur){
  808. deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
  809. sqlite3_free(pCur->aDelta);
  810. sqlite3_free(pCur);
  811. return SQLITE_OK;
  812. }
  813. /*
  814. ** Advance a deltaparsevtab_cursor to its next row of output.
  815. */
  816. static int deltaparsevtabNext(sqlite3_vtab_cursor *cur){
  817. deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
  818. const char *z;
  819. int i = 0;
  820. pCur->iCursor = pCur->iNext;
  821. z = pCur->aDelta + pCur->iCursor;
  822. pCur->a1 = deltaGetInt(&z, &i);
  823. switch( z[0] ){
  824. case '@': {
  825. z++;
  826. pCur->a2 = deltaGetInt(&z, &i);
  827. pCur->eOp = DELTAPARSE_OP_COPY;
  828. pCur->iNext = (int)(&z[1] - pCur->aDelta);
  829. break;
  830. }
  831. case ':': {
  832. z++;
  833. pCur->a2 = (unsigned int)(z - pCur->aDelta);
  834. pCur->eOp = DELTAPARSE_OP_INSERT;
  835. pCur->iNext = (int)(&z[pCur->a1] - pCur->aDelta);
  836. break;
  837. }
  838. case ';': {
  839. pCur->eOp = DELTAPARSE_OP_CHECKSUM;
  840. pCur->iNext = pCur->nDelta;
  841. break;
  842. }
  843. default: {
  844. if( pCur->iNext==pCur->nDelta ){
  845. pCur->eOp = DELTAPARSE_OP_EOF;
  846. }else{
  847. pCur->eOp = DELTAPARSE_OP_ERROR;
  848. pCur->iNext = pCur->nDelta;
  849. }
  850. break;
  851. }
  852. }
  853. return SQLITE_OK;
  854. }
  855. /*
  856. ** Return values of columns for the row at which the deltaparsevtab_cursor
  857. ** is currently pointing.
  858. */
  859. static int deltaparsevtabColumn(
  860. sqlite3_vtab_cursor *cur, /* The cursor */
  861. sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
  862. int i /* Which column to return */
  863. ){
  864. deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
  865. switch( i ){
  866. case DELTAPARSEVTAB_OP: {
  867. sqlite3_result_text(ctx, azOp[pCur->eOp], -1, SQLITE_STATIC);
  868. break;
  869. }
  870. case DELTAPARSEVTAB_A1: {
  871. sqlite3_result_int(ctx, pCur->a1);
  872. break;
  873. }
  874. case DELTAPARSEVTAB_A2: {
  875. if( pCur->eOp==DELTAPARSE_OP_COPY ){
  876. sqlite3_result_int(ctx, pCur->a2);
  877. }else if( pCur->eOp==DELTAPARSE_OP_INSERT ){
  878. sqlite3_result_blob(ctx, pCur->aDelta+pCur->a2, pCur->a1,
  879. SQLITE_TRANSIENT);
  880. }
  881. break;
  882. }
  883. case DELTAPARSEVTAB_DELTA: {
  884. sqlite3_result_blob(ctx, pCur->aDelta, pCur->nDelta, SQLITE_TRANSIENT);
  885. break;
  886. }
  887. }
  888. return SQLITE_OK;
  889. }
  890. /*
  891. ** Return the rowid for the current row. In this implementation, the
  892. ** rowid is the same as the output value.
  893. */
  894. static int deltaparsevtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
  895. deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
  896. *pRowid = pCur->iCursor;
  897. return SQLITE_OK;
  898. }
  899. /*
  900. ** Return TRUE if the cursor has been moved off of the last
  901. ** row of output.
  902. */
  903. static int deltaparsevtabEof(sqlite3_vtab_cursor *cur){
  904. deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
  905. return pCur->eOp==DELTAPARSE_OP_EOF;
  906. }
  907. /*
  908. ** This method is called to "rewind" the deltaparsevtab_cursor object back
  909. ** to the first row of output. This method is always called at least
  910. ** once prior to any call to deltaparsevtabColumn() or deltaparsevtabRowid() or
  911. ** deltaparsevtabEof().
  912. */
  913. static int deltaparsevtabFilter(
  914. sqlite3_vtab_cursor *pVtabCursor,
  915. int idxNum, const char *idxStr,
  916. int argc, sqlite3_value **argv
  917. ){
  918. deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor *)pVtabCursor;
  919. const char *a;
  920. int i = 0;
  921. pCur->eOp = DELTAPARSE_OP_ERROR;
  922. if( idxNum!=1 ){
  923. return SQLITE_OK;
  924. }
  925. pCur->nDelta = sqlite3_value_bytes(argv[0]);
  926. a = (const char*)sqlite3_value_blob(argv[0]);
  927. if( pCur->nDelta==0 || a==0 ){
  928. return SQLITE_OK;
  929. }
  930. pCur->aDelta = sqlite3_malloc64( pCur->nDelta+1 );
  931. if( pCur->aDelta==0 ){
  932. pCur->nDelta = 0;
  933. return SQLITE_NOMEM;
  934. }
  935. memcpy(pCur->aDelta, a, pCur->nDelta);
  936. pCur->aDelta[pCur->nDelta] = 0;
  937. a = pCur->aDelta;
  938. pCur->eOp = DELTAPARSE_OP_SIZE;
  939. pCur->a1 = deltaGetInt(&a, &i);
  940. if( a[0]!='\n' ){
  941. pCur->eOp = DELTAPARSE_OP_ERROR;
  942. pCur->a1 = pCur->a2 = 0;
  943. pCur->iNext = pCur->nDelta;
  944. return SQLITE_OK;
  945. }
  946. a++;
  947. pCur->iNext = (unsigned int)(a - pCur->aDelta);
  948. return SQLITE_OK;
  949. }
  950. /*
  951. ** SQLite will invoke this method one or more times while planning a query
  952. ** that uses the virtual table. This routine needs to create
  953. ** a query plan for each invocation and compute an estimated cost for that
  954. ** plan.
  955. */
  956. static int deltaparsevtabBestIndex(
  957. sqlite3_vtab *tab,
  958. sqlite3_index_info *pIdxInfo
  959. ){
  960. int i;
  961. for(i=0; i<pIdxInfo->nConstraint; i++){
  962. if( pIdxInfo->aConstraint[i].iColumn != DELTAPARSEVTAB_DELTA ) continue;
  963. if( pIdxInfo->aConstraint[i].usable==0 ) continue;
  964. if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue;
  965. pIdxInfo->aConstraintUsage[i].argvIndex = 1;
  966. pIdxInfo->aConstraintUsage[i].omit = 1;
  967. pIdxInfo->estimatedCost = (double)1;
  968. pIdxInfo->estimatedRows = 10;
  969. pIdxInfo->idxNum = 1;
  970. return SQLITE_OK;
  971. }
  972. pIdxInfo->idxNum = 0;
  973. pIdxInfo->estimatedCost = (double)0x7fffffff;
  974. pIdxInfo->estimatedRows = 0x7fffffff;
  975. return SQLITE_CONSTRAINT;
  976. }
  977. /*
  978. ** This following structure defines all the methods for the
  979. ** virtual table.
  980. */
  981. static sqlite3_module deltaparsevtabModule = {
  982. /* iVersion */ 0,
  983. /* xCreate */ 0,
  984. /* xConnect */ deltaparsevtabConnect,
  985. /* xBestIndex */ deltaparsevtabBestIndex,
  986. /* xDisconnect */ deltaparsevtabDisconnect,
  987. /* xDestroy */ 0,
  988. /* xOpen */ deltaparsevtabOpen,
  989. /* xClose */ deltaparsevtabClose,
  990. /* xFilter */ deltaparsevtabFilter,
  991. /* xNext */ deltaparsevtabNext,
  992. /* xEof */ deltaparsevtabEof,
  993. /* xColumn */ deltaparsevtabColumn,
  994. /* xRowid */ deltaparsevtabRowid,
  995. /* xUpdate */ 0,
  996. /* xBegin */ 0,
  997. /* xSync */ 0,
  998. /* xCommit */ 0,
  999. /* xRollback */ 0,
  1000. /* xFindMethod */ 0,
  1001. /* xRename */ 0,
  1002. /* xSavepoint */ 0,
  1003. /* xRelease */ 0,
  1004. /* xRollbackTo */ 0,
  1005. /* xShadowName */ 0,
  1006. /* xIntegrity */ 0
  1007. };
  1008. #ifdef _WIN32
  1009. __declspec(dllexport)
  1010. #endif
  1011. int sqlite3_fossildelta_init(
  1012. sqlite3 *db,
  1013. char **pzErrMsg,
  1014. const sqlite3_api_routines *pApi
  1015. ){
  1016. static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS;
  1017. int rc = SQLITE_OK;
  1018. SQLITE_EXTENSION_INIT2(pApi);
  1019. (void)pzErrMsg; /* Unused parameter */
  1020. rc = sqlite3_create_function(db, "delta_create", 2, enc, 0,
  1021. deltaCreateFunc, 0, 0);
  1022. if( rc==SQLITE_OK ){
  1023. rc = sqlite3_create_function(db, "delta_apply", 2, enc, 0,
  1024. deltaApplyFunc, 0, 0);
  1025. }
  1026. if( rc==SQLITE_OK ){
  1027. rc = sqlite3_create_function(db, "delta_output_size", 1, enc, 0,
  1028. deltaOutputSizeFunc, 0, 0);
  1029. }
  1030. if( rc==SQLITE_OK ){
  1031. rc = sqlite3_create_module(db, "delta_parse", &deltaparsevtabModule, 0);
  1032. }
  1033. return rc;
  1034. }