12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094 |
- /*
- ** 2019-02-19
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- ******************************************************************************
- **
- ** This SQLite extension implements the delta functions used by the RBU
- ** extension. Three scalar functions and one table-valued function are
- ** implemented here:
- **
- ** delta_apply(X,D) -- apply delta D to file X and return the result
- ** delta_create(X,Y) -- compute and return a delta that carries X into Y
- ** delta_output_size(D) -- blob size in bytes output from applying delta D
- ** delta_parse(D) -- returns rows describing delta D
- **
- ** The delta format is the Fossil delta format, described in a comment
- ** on the delete_create() function implementation below, and also at
- **
- ** https://www.fossil-scm.org/fossil/doc/trunk/www/delta_format.wiki
- **
- ** This delta format is used by the RBU extension, which is the main
- ** reason that these routines are included in the extension library.
- ** RBU does not use this extension directly. Rather, this extension is
- ** provided as a convenience to developers who want to analyze RBU files
- ** that contain deltas.
- */
- #include <string.h>
- #include <assert.h>
- #include <stdlib.h>
- #include "sqlite3ext.h"
- SQLITE_EXTENSION_INIT1
- #ifndef SQLITE_AMALGAMATION
- /*
- ** The "u32" type must be an unsigned 32-bit integer. Adjust this
- */
- typedef unsigned int u32;
- /*
- ** Must be a 16-bit value
- */
- typedef short int s16;
- typedef unsigned short int u16;
- #endif /* SQLITE_AMALGAMATION */
- /*
- ** The width of a hash window in bytes. The algorithm only works if this
- ** is a power of 2.
- */
- #define NHASH 16
- /*
- ** The current state of the rolling hash.
- **
- ** z[] holds the values that have been hashed. z[] is a circular buffer.
- ** z[i] is the first entry and z[(i+NHASH-1)%NHASH] is the last entry of
- ** the window.
- **
- ** Hash.a is the sum of all elements of hash.z[]. Hash.b is a weighted
- ** sum. Hash.b is z[i]*NHASH + z[i+1]*(NHASH-1) + ... + z[i+NHASH-1]*1.
- ** (Each index for z[] should be module NHASH, of course. The %NHASH operator
- ** is omitted in the prior expression for brevity.)
- */
- typedef struct hash hash;
- struct hash {
- u16 a, b; /* Hash values */
- u16 i; /* Start of the hash window */
- char z[NHASH]; /* The values that have been hashed */
- };
- /*
- ** Initialize the rolling hash using the first NHASH characters of z[]
- */
- static void hash_init(hash *pHash, const char *z){
- u16 a, b, i;
- a = b = z[0];
- for(i=1; i<NHASH; i++){
- a += z[i];
- b += a;
- }
- memcpy(pHash->z, z, NHASH);
- pHash->a = a & 0xffff;
- pHash->b = b & 0xffff;
- pHash->i = 0;
- }
- /*
- ** Advance the rolling hash by a single character "c"
- */
- static void hash_next(hash *pHash, int c){
- u16 old = pHash->z[pHash->i];
- pHash->z[pHash->i] = c;
- pHash->i = (pHash->i+1)&(NHASH-1);
- pHash->a = pHash->a - old + c;
- pHash->b = pHash->b - NHASH*old + pHash->a;
- }
- /*
- ** Return a 32-bit hash value
- */
- static u32 hash_32bit(hash *pHash){
- return (pHash->a & 0xffff) | (((u32)(pHash->b & 0xffff))<<16);
- }
- /*
- ** Compute a hash on NHASH bytes.
- **
- ** This routine is intended to be equivalent to:
- ** hash h;
- ** hash_init(&h, zInput);
- ** return hash_32bit(&h);
- */
- static u32 hash_once(const char *z){
- u16 a, b, i;
- a = b = z[0];
- for(i=1; i<NHASH; i++){
- a += z[i];
- b += a;
- }
- return a | (((u32)b)<<16);
- }
- /*
- ** Write an base-64 integer into the given buffer.
- */
- static void putInt(unsigned int v, char **pz){
- static const char zDigits[] =
- "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
- /* 123456789 123456789 123456789 123456789 123456789 123456789 123 */
- int i, j;
- char zBuf[20];
- if( v==0 ){
- *(*pz)++ = '0';
- return;
- }
- for(i=0; v>0; i++, v>>=6){
- zBuf[i] = zDigits[v&0x3f];
- }
- for(j=i-1; j>=0; j--){
- *(*pz)++ = zBuf[j];
- }
- }
- /*
- ** Read bytes from *pz and convert them into a positive integer. When
- ** finished, leave *pz pointing to the first character past the end of
- ** the integer. The *pLen parameter holds the length of the string
- ** in *pz and is decremented once for each character in the integer.
- */
- static unsigned int deltaGetInt(const char **pz, int *pLen){
- static const signed char zValue[] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
- -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, 36,
- -1, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
- 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, -1, -1, -1, 63, -1,
- };
- unsigned int v = 0;
- int c;
- unsigned char *z = (unsigned char*)*pz;
- unsigned char *zStart = z;
- while( (c = zValue[0x7f&*(z++)])>=0 ){
- v = (v<<6) + c;
- }
- z--;
- *pLen -= z - zStart;
- *pz = (char*)z;
- return v;
- }
- /*
- ** Return the number digits in the base-64 representation of a positive integer
- */
- static int digit_count(int v){
- unsigned int i, x;
- for(i=1, x=64; v>=x; i++, x <<= 6){}
- return i;
- }
- #ifdef __GNUC__
- # define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__)
- #else
- # define GCC_VERSION 0
- #endif
- /*
- ** Compute a 32-bit big-endian checksum on the N-byte buffer. If the
- ** buffer is not a multiple of 4 bytes length, compute the sum that would
- ** have occurred if the buffer was padded with zeros to the next multiple
- ** of four bytes.
- */
- static unsigned int checksum(const char *zIn, size_t N){
- static const int byteOrderTest = 1;
- const unsigned char *z = (const unsigned char *)zIn;
- const unsigned char *zEnd = (const unsigned char*)&zIn[N&~3];
- unsigned sum = 0;
- assert( (z - (const unsigned char*)0)%4==0 ); /* Four-byte alignment */
- if( 0==*(char*)&byteOrderTest ){
- /* This is a big-endian machine */
- while( z<zEnd ){
- sum += *(unsigned*)z;
- z += 4;
- }
- }else{
- /* A little-endian machine */
- #if GCC_VERSION>=4003000
- while( z<zEnd ){
- sum += __builtin_bswap32(*(unsigned*)z);
- z += 4;
- }
- #elif defined(_MSC_VER) && _MSC_VER>=1300
- while( z<zEnd ){
- sum += _byteswap_ulong(*(unsigned*)z);
- z += 4;
- }
- #else
- unsigned sum0 = 0;
- unsigned sum1 = 0;
- unsigned sum2 = 0;
- while(N >= 16){
- sum0 += ((unsigned)z[0] + z[4] + z[8] + z[12]);
- sum1 += ((unsigned)z[1] + z[5] + z[9] + z[13]);
- sum2 += ((unsigned)z[2] + z[6] + z[10]+ z[14]);
- sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
- z += 16;
- N -= 16;
- }
- while(N >= 4){
- sum0 += z[0];
- sum1 += z[1];
- sum2 += z[2];
- sum += z[3];
- z += 4;
- N -= 4;
- }
- sum += (sum2 << 8) + (sum1 << 16) + (sum0 << 24);
- #endif
- }
- switch(N&3){
- case 3: sum += (z[2] << 8);
- case 2: sum += (z[1] << 16);
- case 1: sum += (z[0] << 24);
- default: ;
- }
- return sum;
- }
- /*
- ** Create a new delta.
- **
- ** The delta is written into a preallocated buffer, zDelta, which
- ** should be at least 60 bytes longer than the target file, zOut.
- ** The delta string will be NUL-terminated, but it might also contain
- ** embedded NUL characters if either the zSrc or zOut files are
- ** binary. This function returns the length of the delta string
- ** in bytes, excluding the final NUL terminator character.
- **
- ** Output Format:
- **
- ** The delta begins with a base64 number followed by a newline. This
- ** number is the number of bytes in the TARGET file. Thus, given a
- ** delta file z, a program can compute the size of the output file
- ** simply by reading the first line and decoding the base-64 number
- ** found there. The delta_output_size() routine does exactly this.
- **
- ** After the initial size number, the delta consists of a series of
- ** literal text segments and commands to copy from the SOURCE file.
- ** A copy command looks like this:
- **
- ** NNN@MMM,
- **
- ** where NNN is the number of bytes to be copied and MMM is the offset
- ** into the source file of the first byte (both base-64). If NNN is 0
- ** it means copy the rest of the input file. Literal text is like this:
- **
- ** NNN:TTTTT
- **
- ** where NNN is the number of bytes of text (base-64) and TTTTT is the text.
- **
- ** The last term is of the form
- **
- ** NNN;
- **
- ** In this case, NNN is a 32-bit bigendian checksum of the output file
- ** that can be used to verify that the delta applied correctly. All
- ** numbers are in base-64.
- **
- ** Pure text files generate a pure text delta. Binary files generate a
- ** delta that may contain some binary data.
- **
- ** Algorithm:
- **
- ** The encoder first builds a hash table to help it find matching
- ** patterns in the source file. 16-byte chunks of the source file
- ** sampled at evenly spaced intervals are used to populate the hash
- ** table.
- **
- ** Next we begin scanning the target file using a sliding 16-byte
- ** window. The hash of the 16-byte window in the target is used to
- ** search for a matching section in the source file. When a match
- ** is found, a copy command is added to the delta. An effort is
- ** made to extend the matching section to regions that come before
- ** and after the 16-byte hash window. A copy command is only issued
- ** if the result would use less space that just quoting the text
- ** literally. Literal text is added to the delta for sections that
- ** do not match or which can not be encoded efficiently using copy
- ** commands.
- */
- static int delta_create(
- const char *zSrc, /* The source or pattern file */
- unsigned int lenSrc, /* Length of the source file */
- const char *zOut, /* The target file */
- unsigned int lenOut, /* Length of the target file */
- char *zDelta /* Write the delta into this buffer */
- ){
- int i, base;
- char *zOrigDelta = zDelta;
- hash h;
- int nHash; /* Number of hash table entries */
- int *landmark; /* Primary hash table */
- int *collide; /* Collision chain */
- int lastRead = -1; /* Last byte of zSrc read by a COPY command */
- /* Add the target file size to the beginning of the delta
- */
- putInt(lenOut, &zDelta);
- *(zDelta++) = '\n';
- /* If the source file is very small, it means that we have no
- ** chance of ever doing a copy command. Just output a single
- ** literal segment for the entire target and exit.
- */
- if( lenSrc<=NHASH ){
- putInt(lenOut, &zDelta);
- *(zDelta++) = ':';
- memcpy(zDelta, zOut, lenOut);
- zDelta += lenOut;
- putInt(checksum(zOut, lenOut), &zDelta);
- *(zDelta++) = ';';
- return zDelta - zOrigDelta;
- }
- /* Compute the hash table used to locate matching sections in the
- ** source file.
- */
- nHash = lenSrc/NHASH;
- collide = sqlite3_malloc64( (sqlite3_int64)nHash*2*sizeof(int) );
- memset(collide, -1, nHash*2*sizeof(int));
- landmark = &collide[nHash];
- for(i=0; i<lenSrc-NHASH; i+=NHASH){
- int hv = hash_once(&zSrc[i]) % nHash;
- collide[i/NHASH] = landmark[hv];
- landmark[hv] = i/NHASH;
- }
- /* Begin scanning the target file and generating copy commands and
- ** literal sections of the delta.
- */
- base = 0; /* We have already generated everything before zOut[base] */
- while( base+NHASH<lenOut ){
- int iSrc, iBlock;
- unsigned int bestCnt, bestOfst=0, bestLitsz=0;
- hash_init(&h, &zOut[base]);
- i = 0; /* Trying to match a landmark against zOut[base+i] */
- bestCnt = 0;
- while( 1 ){
- int hv;
- int limit = 250;
- hv = hash_32bit(&h) % nHash;
- iBlock = landmark[hv];
- while( iBlock>=0 && (limit--)>0 ){
- /*
- ** The hash window has identified a potential match against
- ** landmark block iBlock. But we need to investigate further.
- **
- ** Look for a region in zOut that matches zSrc. Anchor the search
- ** at zSrc[iSrc] and zOut[base+i]. Do not include anything prior to
- ** zOut[base] or after zOut[outLen] nor anything after zSrc[srcLen].
- **
- ** Set cnt equal to the length of the match and set ofst so that
- ** zSrc[ofst] is the first element of the match. litsz is the number
- ** of characters between zOut[base] and the beginning of the match.
- ** sz will be the overhead (in bytes) needed to encode the copy
- ** command. Only generate copy command if the overhead of the
- ** copy command is less than the amount of literal text to be copied.
- */
- int cnt, ofst, litsz;
- int j, k, x, y;
- int sz;
- int limitX;
- /* Beginning at iSrc, match forwards as far as we can. j counts
- ** the number of characters that match */
- iSrc = iBlock*NHASH;
- y = base+i;
- limitX = ( lenSrc-iSrc <= lenOut-y ) ? lenSrc : iSrc + lenOut - y;
- for(x=iSrc; x<limitX; x++, y++){
- if( zSrc[x]!=zOut[y] ) break;
- }
- j = x - iSrc - 1;
- /* Beginning at iSrc-1, match backwards as far as we can. k counts
- ** the number of characters that match */
- for(k=1; k<iSrc && k<=i; k++){
- if( zSrc[iSrc-k]!=zOut[base+i-k] ) break;
- }
- k--;
- /* Compute the offset and size of the matching region */
- ofst = iSrc-k;
- cnt = j+k+1;
- litsz = i-k; /* Number of bytes of literal text before the copy */
- /* sz will hold the number of bytes needed to encode the "insert"
- ** command and the copy command, not counting the "insert" text */
- sz = digit_count(i-k)+digit_count(cnt)+digit_count(ofst)+3;
- if( cnt>=sz && cnt>bestCnt ){
- /* Remember this match only if it is the best so far and it
- ** does not increase the file size */
- bestCnt = cnt;
- bestOfst = iSrc-k;
- bestLitsz = litsz;
- }
- /* Check the next matching block */
- iBlock = collide[iBlock];
- }
- /* We have a copy command that does not cause the delta to be larger
- ** than a literal insert. So add the copy command to the delta.
- */
- if( bestCnt>0 ){
- if( bestLitsz>0 ){
- /* Add an insert command before the copy */
- putInt(bestLitsz,&zDelta);
- *(zDelta++) = ':';
- memcpy(zDelta, &zOut[base], bestLitsz);
- zDelta += bestLitsz;
- base += bestLitsz;
- }
- base += bestCnt;
- putInt(bestCnt, &zDelta);
- *(zDelta++) = '@';
- putInt(bestOfst, &zDelta);
- *(zDelta++) = ',';
- if( bestOfst + bestCnt -1 > lastRead ){
- lastRead = bestOfst + bestCnt - 1;
- }
- bestCnt = 0;
- break;
- }
- /* If we reach this point, it means no match is found so far */
- if( base+i+NHASH>=lenOut ){
- /* We have reached the end of the file and have not found any
- ** matches. Do an "insert" for everything that does not match */
- putInt(lenOut-base, &zDelta);
- *(zDelta++) = ':';
- memcpy(zDelta, &zOut[base], lenOut-base);
- zDelta += lenOut-base;
- base = lenOut;
- break;
- }
- /* Advance the hash by one character. Keep looking for a match */
- hash_next(&h, zOut[base+i+NHASH]);
- i++;
- }
- }
- /* Output a final "insert" record to get all the text at the end of
- ** the file that does not match anything in the source file.
- */
- if( base<lenOut ){
- putInt(lenOut-base, &zDelta);
- *(zDelta++) = ':';
- memcpy(zDelta, &zOut[base], lenOut-base);
- zDelta += lenOut-base;
- }
- /* Output the final checksum record. */
- putInt(checksum(zOut, lenOut), &zDelta);
- *(zDelta++) = ';';
- sqlite3_free(collide);
- return zDelta - zOrigDelta;
- }
- /*
- ** Return the size (in bytes) of the output from applying
- ** a delta.
- **
- ** This routine is provided so that an procedure that is able
- ** to call delta_apply() can learn how much space is required
- ** for the output and hence allocate nor more space that is really
- ** needed.
- */
- static int delta_output_size(const char *zDelta, int lenDelta){
- int size;
- size = deltaGetInt(&zDelta, &lenDelta);
- if( *zDelta!='\n' ){
- /* ERROR: size integer not terminated by "\n" */
- return -1;
- }
- return size;
- }
- /*
- ** Apply a delta.
- **
- ** The output buffer should be big enough to hold the whole output
- ** file and a NUL terminator at the end. The delta_output_size()
- ** routine will determine this size for you.
- **
- ** The delta string should be null-terminated. But the delta string
- ** may contain embedded NUL characters (if the input and output are
- ** binary files) so we also have to pass in the length of the delta in
- ** the lenDelta parameter.
- **
- ** This function returns the size of the output file in bytes (excluding
- ** the final NUL terminator character). Except, if the delta string is
- ** malformed or intended for use with a source file other than zSrc,
- ** then this routine returns -1.
- **
- ** Refer to the delta_create() documentation above for a description
- ** of the delta file format.
- */
- static int delta_apply(
- const char *zSrc, /* The source or pattern file */
- int lenSrc, /* Length of the source file */
- const char *zDelta, /* Delta to apply to the pattern */
- int lenDelta, /* Length of the delta */
- char *zOut /* Write the output into this preallocated buffer */
- ){
- unsigned int limit;
- unsigned int total = 0;
- #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
- char *zOrigOut = zOut;
- #endif
- limit = deltaGetInt(&zDelta, &lenDelta);
- if( *zDelta!='\n' ){
- /* ERROR: size integer not terminated by "\n" */
- return -1;
- }
- zDelta++; lenDelta--;
- while( *zDelta && lenDelta>0 ){
- unsigned int cnt, ofst;
- cnt = deltaGetInt(&zDelta, &lenDelta);
- switch( zDelta[0] ){
- case '@': {
- zDelta++; lenDelta--;
- ofst = deltaGetInt(&zDelta, &lenDelta);
- if( lenDelta>0 && zDelta[0]!=',' ){
- /* ERROR: copy command not terminated by ',' */
- return -1;
- }
- zDelta++; lenDelta--;
- total += cnt;
- if( total>limit ){
- /* ERROR: copy exceeds output file size */
- return -1;
- }
- if( ofst+cnt > lenSrc ){
- /* ERROR: copy extends past end of input */
- return -1;
- }
- memcpy(zOut, &zSrc[ofst], cnt);
- zOut += cnt;
- break;
- }
- case ':': {
- zDelta++; lenDelta--;
- total += cnt;
- if( total>limit ){
- /* ERROR: insert command gives an output larger than predicted */
- return -1;
- }
- if( cnt>lenDelta ){
- /* ERROR: insert count exceeds size of delta */
- return -1;
- }
- memcpy(zOut, zDelta, cnt);
- zOut += cnt;
- zDelta += cnt;
- lenDelta -= cnt;
- break;
- }
- case ';': {
- zDelta++; lenDelta--;
- zOut[0] = 0;
- #ifdef FOSSIL_ENABLE_DELTA_CKSUM_TEST
- if( cnt!=checksum(zOrigOut, total) ){
- /* ERROR: bad checksum */
- return -1;
- }
- #endif
- if( total!=limit ){
- /* ERROR: generated size does not match predicted size */
- return -1;
- }
- return total;
- }
- default: {
- /* ERROR: unknown delta operator */
- return -1;
- }
- }
- }
- /* ERROR: unterminated delta */
- return -1;
- }
- /*
- ** SQL functions: delta_create(X,Y)
- **
- ** Return a delta for carrying X into Y.
- */
- static void deltaCreateFunc(
- sqlite3_context *context,
- int argc,
- sqlite3_value **argv
- ){
- const char *aOrig; int nOrig; /* old blob */
- const char *aNew; int nNew; /* new blob */
- char *aOut; int nOut; /* output delta */
- assert( argc==2 );
- if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
- if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
- nOrig = sqlite3_value_bytes(argv[0]);
- aOrig = (const char*)sqlite3_value_blob(argv[0]);
- nNew = sqlite3_value_bytes(argv[1]);
- aNew = (const char*)sqlite3_value_blob(argv[1]);
- aOut = sqlite3_malloc64(nNew+70);
- if( aOut==0 ){
- sqlite3_result_error_nomem(context);
- }else{
- nOut = delta_create(aOrig, nOrig, aNew, nNew, aOut);
- if( nOut<0 ){
- sqlite3_free(aOut);
- sqlite3_result_error(context, "cannot create fossil delta", -1);
- }else{
- sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
- }
- }
- }
- /*
- ** SQL functions: delta_apply(X,D)
- **
- ** Return the result of applying delta D to input X.
- */
- static void deltaApplyFunc(
- sqlite3_context *context,
- int argc,
- sqlite3_value **argv
- ){
- const char *aOrig; int nOrig; /* The X input */
- const char *aDelta; int nDelta; /* The input delta (D) */
- char *aOut; int nOut, nOut2; /* The output */
- assert( argc==2 );
- if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
- if( sqlite3_value_type(argv[1])==SQLITE_NULL ) return;
- nOrig = sqlite3_value_bytes(argv[0]);
- aOrig = (const char*)sqlite3_value_blob(argv[0]);
- nDelta = sqlite3_value_bytes(argv[1]);
- aDelta = (const char*)sqlite3_value_blob(argv[1]);
- /* Figure out the size of the output */
- nOut = delta_output_size(aDelta, nDelta);
- if( nOut<0 ){
- sqlite3_result_error(context, "corrupt fossil delta", -1);
- return;
- }
- aOut = sqlite3_malloc64((sqlite3_int64)nOut+1);
- if( aOut==0 ){
- sqlite3_result_error_nomem(context);
- }else{
- nOut2 = delta_apply(aOrig, nOrig, aDelta, nDelta, aOut);
- if( nOut2!=nOut ){
- sqlite3_free(aOut);
- sqlite3_result_error(context, "corrupt fossil delta", -1);
- }else{
- sqlite3_result_blob(context, aOut, nOut, sqlite3_free);
- }
- }
- }
- /*
- ** SQL functions: delta_output_size(D)
- **
- ** Return the size of the output that results from applying delta D.
- */
- static void deltaOutputSizeFunc(
- sqlite3_context *context,
- int argc,
- sqlite3_value **argv
- ){
- const char *aDelta; int nDelta; /* The input delta (D) */
- int nOut; /* Size of output */
- assert( argc==1 );
- if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return;
- nDelta = sqlite3_value_bytes(argv[0]);
- aDelta = (const char*)sqlite3_value_blob(argv[0]);
- /* Figure out the size of the output */
- nOut = delta_output_size(aDelta, nDelta);
- if( nOut<0 ){
- sqlite3_result_error(context, "corrupt fossil delta", -1);
- return;
- }else{
- sqlite3_result_int(context, nOut);
- }
- }
- /*****************************************************************************
- ** Table-valued SQL function: delta_parse(DELTA)
- **
- ** Schema:
- **
- ** CREATE TABLE delta_parse(
- ** op TEXT,
- ** a1 INT,
- ** a2 ANY,
- ** delta HIDDEN BLOB
- ** );
- **
- ** Given an input DELTA, this function parses the delta and returns
- ** rows for each entry in the delta. The op column has one of the
- ** values SIZE, COPY, INSERT, CHECKSUM, ERROR.
- **
- ** Assuming no errors, the first row has op='SIZE'. a1 is the size of
- ** the output in bytes and a2 is NULL.
- **
- ** After the initial SIZE row, there are zero or more 'COPY' and/or 'INSERT'
- ** rows. A COPY row means content is copied from the source into the
- ** output. Column a1 is the number of bytes to copy and a2 is the offset
- ** into source from which to begin copying. An INSERT row means to
- ** insert text into the output stream. Column a1 is the number of bytes
- ** to insert and column is a BLOB that contains the text to be inserted.
- **
- ** The last row of a well-formed delta will have an op value of 'CHECKSUM'.
- ** The a1 column will be the value of the checksum and a2 will be NULL.
- **
- ** If the input delta is not well-formed, then a row with an op value
- ** of 'ERROR' is returned. The a1 value of the ERROR row is the offset
- ** into the delta where the error was encountered and a2 is NULL.
- */
- typedef struct deltaparsevtab_vtab deltaparsevtab_vtab;
- typedef struct deltaparsevtab_cursor deltaparsevtab_cursor;
- struct deltaparsevtab_vtab {
- sqlite3_vtab base; /* Base class - must be first */
- /* No additional information needed */
- };
- struct deltaparsevtab_cursor {
- sqlite3_vtab_cursor base; /* Base class - must be first */
- char *aDelta; /* The delta being parsed */
- int nDelta; /* Number of bytes in the delta */
- int iCursor; /* Current cursor location */
- int eOp; /* Name of current operator */
- unsigned int a1, a2; /* Arguments to current operator */
- int iNext; /* Next cursor value */
- };
- /* Operator names:
- */
- static const char *azOp[] = {
- "SIZE", "COPY", "INSERT", "CHECKSUM", "ERROR", "EOF"
- };
- #define DELTAPARSE_OP_SIZE 0
- #define DELTAPARSE_OP_COPY 1
- #define DELTAPARSE_OP_INSERT 2
- #define DELTAPARSE_OP_CHECKSUM 3
- #define DELTAPARSE_OP_ERROR 4
- #define DELTAPARSE_OP_EOF 5
- /*
- ** The deltaparsevtabConnect() method is invoked to create a new
- ** deltaparse virtual table.
- **
- ** Think of this routine as the constructor for deltaparsevtab_vtab objects.
- **
- ** All this routine needs to do is:
- **
- ** (1) Allocate the deltaparsevtab_vtab object and initialize all fields.
- **
- ** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the
- ** result set of queries against the virtual table will look like.
- */
- static int deltaparsevtabConnect(
- sqlite3 *db,
- void *pAux,
- int argc, const char *const*argv,
- sqlite3_vtab **ppVtab,
- char **pzErr
- ){
- deltaparsevtab_vtab *pNew;
- int rc;
- rc = sqlite3_declare_vtab(db,
- "CREATE TABLE x(op,a1,a2,delta HIDDEN)"
- );
- /* For convenience, define symbolic names for the index to each column. */
- #define DELTAPARSEVTAB_OP 0
- #define DELTAPARSEVTAB_A1 1
- #define DELTAPARSEVTAB_A2 2
- #define DELTAPARSEVTAB_DELTA 3
- if( rc==SQLITE_OK ){
- pNew = sqlite3_malloc64( sizeof(*pNew) );
- *ppVtab = (sqlite3_vtab*)pNew;
- if( pNew==0 ) return SQLITE_NOMEM;
- memset(pNew, 0, sizeof(*pNew));
- sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
- }
- return rc;
- }
- /*
- ** This method is the destructor for deltaparsevtab_vtab objects.
- */
- static int deltaparsevtabDisconnect(sqlite3_vtab *pVtab){
- deltaparsevtab_vtab *p = (deltaparsevtab_vtab*)pVtab;
- sqlite3_free(p);
- return SQLITE_OK;
- }
- /*
- ** Constructor for a new deltaparsevtab_cursor object.
- */
- static int deltaparsevtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
- deltaparsevtab_cursor *pCur;
- pCur = sqlite3_malloc( sizeof(*pCur) );
- if( pCur==0 ) return SQLITE_NOMEM;
- memset(pCur, 0, sizeof(*pCur));
- *ppCursor = &pCur->base;
- return SQLITE_OK;
- }
- /*
- ** Destructor for a deltaparsevtab_cursor.
- */
- static int deltaparsevtabClose(sqlite3_vtab_cursor *cur){
- deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
- sqlite3_free(pCur->aDelta);
- sqlite3_free(pCur);
- return SQLITE_OK;
- }
- /*
- ** Advance a deltaparsevtab_cursor to its next row of output.
- */
- static int deltaparsevtabNext(sqlite3_vtab_cursor *cur){
- deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
- const char *z;
- int i = 0;
- pCur->iCursor = pCur->iNext;
- z = pCur->aDelta + pCur->iCursor;
- pCur->a1 = deltaGetInt(&z, &i);
- switch( z[0] ){
- case '@': {
- z++;
- pCur->a2 = deltaGetInt(&z, &i);
- pCur->eOp = DELTAPARSE_OP_COPY;
- pCur->iNext = (int)(&z[1] - pCur->aDelta);
- break;
- }
- case ':': {
- z++;
- pCur->a2 = (unsigned int)(z - pCur->aDelta);
- pCur->eOp = DELTAPARSE_OP_INSERT;
- pCur->iNext = (int)(&z[pCur->a1] - pCur->aDelta);
- break;
- }
- case ';': {
- pCur->eOp = DELTAPARSE_OP_CHECKSUM;
- pCur->iNext = pCur->nDelta;
- break;
- }
- default: {
- if( pCur->iNext==pCur->nDelta ){
- pCur->eOp = DELTAPARSE_OP_EOF;
- }else{
- pCur->eOp = DELTAPARSE_OP_ERROR;
- pCur->iNext = pCur->nDelta;
- }
- break;
- }
- }
- return SQLITE_OK;
- }
- /*
- ** Return values of columns for the row at which the deltaparsevtab_cursor
- ** is currently pointing.
- */
- static int deltaparsevtabColumn(
- sqlite3_vtab_cursor *cur, /* The cursor */
- sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
- int i /* Which column to return */
- ){
- deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
- switch( i ){
- case DELTAPARSEVTAB_OP: {
- sqlite3_result_text(ctx, azOp[pCur->eOp], -1, SQLITE_STATIC);
- break;
- }
- case DELTAPARSEVTAB_A1: {
- sqlite3_result_int(ctx, pCur->a1);
- break;
- }
- case DELTAPARSEVTAB_A2: {
- if( pCur->eOp==DELTAPARSE_OP_COPY ){
- sqlite3_result_int(ctx, pCur->a2);
- }else if( pCur->eOp==DELTAPARSE_OP_INSERT ){
- sqlite3_result_blob(ctx, pCur->aDelta+pCur->a2, pCur->a1,
- SQLITE_TRANSIENT);
- }
- break;
- }
- case DELTAPARSEVTAB_DELTA: {
- sqlite3_result_blob(ctx, pCur->aDelta, pCur->nDelta, SQLITE_TRANSIENT);
- break;
- }
- }
- return SQLITE_OK;
- }
- /*
- ** Return the rowid for the current row. In this implementation, the
- ** rowid is the same as the output value.
- */
- static int deltaparsevtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
- deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
- *pRowid = pCur->iCursor;
- return SQLITE_OK;
- }
- /*
- ** Return TRUE if the cursor has been moved off of the last
- ** row of output.
- */
- static int deltaparsevtabEof(sqlite3_vtab_cursor *cur){
- deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor*)cur;
- return pCur->eOp==DELTAPARSE_OP_EOF;
- }
- /*
- ** This method is called to "rewind" the deltaparsevtab_cursor object back
- ** to the first row of output. This method is always called at least
- ** once prior to any call to deltaparsevtabColumn() or deltaparsevtabRowid() or
- ** deltaparsevtabEof().
- */
- static int deltaparsevtabFilter(
- sqlite3_vtab_cursor *pVtabCursor,
- int idxNum, const char *idxStr,
- int argc, sqlite3_value **argv
- ){
- deltaparsevtab_cursor *pCur = (deltaparsevtab_cursor *)pVtabCursor;
- const char *a;
- int i = 0;
- pCur->eOp = DELTAPARSE_OP_ERROR;
- if( idxNum!=1 ){
- return SQLITE_OK;
- }
- pCur->nDelta = sqlite3_value_bytes(argv[0]);
- a = (const char*)sqlite3_value_blob(argv[0]);
- if( pCur->nDelta==0 || a==0 ){
- return SQLITE_OK;
- }
- pCur->aDelta = sqlite3_malloc64( pCur->nDelta+1 );
- if( pCur->aDelta==0 ){
- pCur->nDelta = 0;
- return SQLITE_NOMEM;
- }
- memcpy(pCur->aDelta, a, pCur->nDelta);
- pCur->aDelta[pCur->nDelta] = 0;
- a = pCur->aDelta;
- pCur->eOp = DELTAPARSE_OP_SIZE;
- pCur->a1 = deltaGetInt(&a, &i);
- if( a[0]!='\n' ){
- pCur->eOp = DELTAPARSE_OP_ERROR;
- pCur->a1 = pCur->a2 = 0;
- pCur->iNext = pCur->nDelta;
- return SQLITE_OK;
- }
- a++;
- pCur->iNext = (unsigned int)(a - pCur->aDelta);
- return SQLITE_OK;
- }
- /*
- ** SQLite will invoke this method one or more times while planning a query
- ** that uses the virtual table. This routine needs to create
- ** a query plan for each invocation and compute an estimated cost for that
- ** plan.
- */
- static int deltaparsevtabBestIndex(
- sqlite3_vtab *tab,
- sqlite3_index_info *pIdxInfo
- ){
- int i;
- for(i=0; i<pIdxInfo->nConstraint; i++){
- if( pIdxInfo->aConstraint[i].iColumn != DELTAPARSEVTAB_DELTA ) continue;
- if( pIdxInfo->aConstraint[i].usable==0 ) continue;
- if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue;
- pIdxInfo->aConstraintUsage[i].argvIndex = 1;
- pIdxInfo->aConstraintUsage[i].omit = 1;
- pIdxInfo->estimatedCost = (double)1;
- pIdxInfo->estimatedRows = 10;
- pIdxInfo->idxNum = 1;
- return SQLITE_OK;
- }
- pIdxInfo->idxNum = 0;
- pIdxInfo->estimatedCost = (double)0x7fffffff;
- pIdxInfo->estimatedRows = 0x7fffffff;
- return SQLITE_CONSTRAINT;
- }
- /*
- ** This following structure defines all the methods for the
- ** virtual table.
- */
- static sqlite3_module deltaparsevtabModule = {
- /* iVersion */ 0,
- /* xCreate */ 0,
- /* xConnect */ deltaparsevtabConnect,
- /* xBestIndex */ deltaparsevtabBestIndex,
- /* xDisconnect */ deltaparsevtabDisconnect,
- /* xDestroy */ 0,
- /* xOpen */ deltaparsevtabOpen,
- /* xClose */ deltaparsevtabClose,
- /* xFilter */ deltaparsevtabFilter,
- /* xNext */ deltaparsevtabNext,
- /* xEof */ deltaparsevtabEof,
- /* xColumn */ deltaparsevtabColumn,
- /* xRowid */ deltaparsevtabRowid,
- /* xUpdate */ 0,
- /* xBegin */ 0,
- /* xSync */ 0,
- /* xCommit */ 0,
- /* xRollback */ 0,
- /* xFindMethod */ 0,
- /* xRename */ 0,
- /* xSavepoint */ 0,
- /* xRelease */ 0,
- /* xRollbackTo */ 0,
- /* xShadowName */ 0,
- /* xIntegrity */ 0
- };
- #ifdef _WIN32
- __declspec(dllexport)
- #endif
- int sqlite3_fossildelta_init(
- sqlite3 *db,
- char **pzErrMsg,
- const sqlite3_api_routines *pApi
- ){
- static const int enc = SQLITE_UTF8|SQLITE_INNOCUOUS;
- int rc = SQLITE_OK;
- SQLITE_EXTENSION_INIT2(pApi);
- (void)pzErrMsg; /* Unused parameter */
- rc = sqlite3_create_function(db, "delta_create", 2, enc, 0,
- deltaCreateFunc, 0, 0);
- if( rc==SQLITE_OK ){
- rc = sqlite3_create_function(db, "delta_apply", 2, enc, 0,
- deltaApplyFunc, 0, 0);
- }
- if( rc==SQLITE_OK ){
- rc = sqlite3_create_function(db, "delta_output_size", 1, enc, 0,
- deltaOutputSizeFunc, 0, 0);
- }
- if( rc==SQLITE_OK ){
- rc = sqlite3_create_module(db, "delta_parse", &deltaparsevtabModule, 0);
- }
- return rc;
- }
|