1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240 |
- /*
- ** 2011-09-11
- **
- ** The author disclaims copyright to this source code. In place of
- ** a legal notice, here is a blessing:
- **
- ** May you do good and not evil.
- ** May you find forgiveness for yourself and forgive others.
- ** May you share freely, never taking more than you give.
- **
- *************************************************************************
- **
- ** This file contains code to read and write checkpoints.
- **
- ** A checkpoint represents the database layout at a single point in time.
- ** It includes a log offset. When an existing database is opened, the
- ** current state is determined by reading the newest checkpoint and updating
- ** it with all committed transactions from the log that follow the specified
- ** offset.
- */
- #include "lsmInt.h"
- /*
- ** CHECKPOINT BLOB FORMAT:
- **
- ** A checkpoint blob is a series of unsigned 32-bit integers stored in
- ** big-endian byte order. As follows:
- **
- ** Checkpoint header (see the CKPT_HDR_XXX #defines):
- **
- ** 1. The checkpoint id MSW.
- ** 2. The checkpoint id LSW.
- ** 3. The number of integer values in the entire checkpoint, including
- ** the two checksum values.
- ** 4. The compression scheme id.
- ** 5. The total number of blocks in the database.
- ** 6. The block size.
- ** 7. The number of levels.
- ** 8. The nominal database page size.
- ** 9. The number of pages (in total) written to the database file.
- **
- ** Log pointer:
- **
- ** 1. The log offset MSW.
- ** 2. The log offset LSW.
- ** 3. Log checksum 0.
- ** 4. Log checksum 1.
- **
- ** Note that the "log offset" is not the literal byte offset. Instead,
- ** it is the byte offset multiplied by 2, with least significant bit
- ** toggled each time the log pointer value is changed. This is to make
- ** sure that this field changes each time the log pointer is updated,
- ** even if the log file itself is disabled. See lsmTreeMakeOld().
- **
- ** See ckptExportLog() and ckptImportLog().
- **
- ** Append points:
- **
- ** 8 integers (4 * 64-bit page numbers). See ckptExportAppendlist().
- **
- ** For each level in the database, a level record. Formatted as follows:
- **
- ** 0. Age of the level (least significant 16-bits). And flags mask (most
- ** significant 16-bits).
- ** 1. The number of right-hand segments (nRight, possibly 0),
- ** 2. Segment record for left-hand segment (8 integers defined below),
- ** 3. Segment record for each right-hand segment (8 integers defined below),
- ** 4. If nRight>0, The number of segments involved in the merge
- ** 5. if nRight>0, Current nSkip value (see Merge structure defn.),
- ** 6. For each segment in the merge:
- ** 5a. Page number of next cell to read during merge (this field
- ** is 64-bits - 2 integers)
- ** 5b. Cell number of next cell to read during merge
- ** 7. Page containing current split-key (64-bits - 2 integers).
- ** 8. Cell within page containing current split-key.
- ** 9. Current pointer value (64-bits - 2 integers).
- **
- ** The block redirect array:
- **
- ** 1. Number of redirections (maximum LSM_MAX_BLOCK_REDIRECTS).
- ** 2. For each redirection:
- ** a. "from" block number
- ** b. "to" block number
- **
- ** The in-memory freelist entries. Each entry is either an insert or a
- ** delete. The in-memory freelist is to the free-block-list as the
- ** in-memory tree is to the users database content.
- **
- ** 1. Number of free-list entries stored in checkpoint header.
- ** 2. Number of free blocks (in total).
- ** 3. Total number of blocks freed during database lifetime.
- ** 4. For each entry:
- ** 2a. Block number of free block.
- ** 2b. A 64-bit integer (MSW followed by LSW). -1 for a delete entry,
- ** or the associated checkpoint id for an insert.
- **
- ** The checksum:
- **
- ** 1. Checksum value 1.
- ** 2. Checksum value 2.
- **
- ** In the above, a segment record consists of the following four 64-bit
- ** fields (converted to 2 * u32 by storing the MSW followed by LSW):
- **
- ** 1. First page of array,
- ** 2. Last page of array,
- ** 3. Root page of array (or 0),
- ** 4. Size of array in pages.
- */
- /*
- ** LARGE NUMBERS OF LEVEL RECORDS:
- **
- ** A limit on the number of rhs segments that may be present in the database
- ** file. Defining this limit ensures that all level records fit within
- ** the 4096 byte limit for checkpoint blobs.
- **
- ** The number of right-hand-side segments in a database is counted as
- ** follows:
- **
- ** * For each level in the database not undergoing a merge, add 1.
- **
- ** * For each level in the database that is undergoing a merge, add
- ** the number of segments on the rhs of the level.
- **
- ** A level record not undergoing a merge is 10 integers. A level record
- ** with nRhs rhs segments and (nRhs+1) input segments (i.e. including the
- ** separators from the next level) is (11*nRhs+20) integers. The maximum
- ** per right-hand-side level is therefore 21 integers. So the maximum
- ** size of all level records in a checkpoint is 21*40=820 integers.
- **
- ** TODO: Before pointer values were changed from 32 to 64 bits, the above
- ** used to come to 420 bytes - leaving significant space for a free-list
- ** prefix. No more. To fix this, reduce the size of the level records in
- ** a db snapshot, and improve management of the free-list tail in
- ** lsm_sorted.c.
- */
- #define LSM_MAX_RHS_SEGMENTS 40
- /*
- ** LARGE NUMBERS OF FREELIST ENTRIES:
- **
- ** There is also a limit (LSM_MAX_FREELIST_ENTRIES - defined in lsmInt.h)
- ** on the number of free-list entries stored in a checkpoint. Since each
- ** free-list entry consists of 3 integers, the maximum free-list size is
- ** 3*100=300 integers. Combined with the limit on rhs segments defined
- ** above, this ensures that a checkpoint always fits within a 4096 byte
- ** meta page.
- **
- ** If the database contains more than 100 free blocks, the "overflow" flag
- ** in the checkpoint header is set and the remainder are stored in the
- ** system FREELIST entry in the LSM (along with user data). The value
- ** accompanying the FREELIST key in the LSM is, like a checkpoint, an array
- ** of 32-bit big-endian integers. As follows:
- **
- ** For each entry:
- ** a. Block number of free block.
- ** b. MSW of associated checkpoint id.
- ** c. LSW of associated checkpoint id.
- **
- ** The number of entries is not required - it is implied by the size of the
- ** value blob containing the integer array.
- **
- ** Note that the limit defined by LSM_MAX_FREELIST_ENTRIES is a hard limit.
- ** The actual value used may be configured using LSM_CONFIG_MAX_FREELIST.
- */
- /*
- ** The argument to this macro must be of type u32. On a little-endian
- ** architecture, it returns the u32 value that results from interpreting
- ** the 4 bytes as a big-endian value. On a big-endian architecture, it
- ** returns the value that would be produced by interpreting the 4 bytes
- ** of the input value as a little-endian integer.
- */
- #define BYTESWAP32(x) ( \
- (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \
- + (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \
- )
- static const int one = 1;
- #define LSM_LITTLE_ENDIAN (*(u8 *)(&one))
- /* Sizes, in integers, of various parts of the checkpoint. */
- #define CKPT_HDR_SIZE 9
- #define CKPT_LOGPTR_SIZE 4
- #define CKPT_APPENDLIST_SIZE (LSM_APPLIST_SZ * 2)
- /* A #define to describe each integer in the checkpoint header. */
- #define CKPT_HDR_ID_MSW 0
- #define CKPT_HDR_ID_LSW 1
- #define CKPT_HDR_NCKPT 2
- #define CKPT_HDR_CMPID 3
- #define CKPT_HDR_NBLOCK 4
- #define CKPT_HDR_BLKSZ 5
- #define CKPT_HDR_NLEVEL 6
- #define CKPT_HDR_PGSZ 7
- #define CKPT_HDR_NWRITE 8
- #define CKPT_HDR_LO_MSW 9
- #define CKPT_HDR_LO_LSW 10
- #define CKPT_HDR_LO_CKSUM1 11
- #define CKPT_HDR_LO_CKSUM2 12
- typedef struct CkptBuffer CkptBuffer;
- /*
- ** Dynamic buffer used to accumulate data for a checkpoint.
- */
- struct CkptBuffer {
- lsm_env *pEnv;
- int nAlloc;
- u32 *aCkpt;
- };
- /*
- ** Calculate the checksum of the checkpoint specified by arguments aCkpt and
- ** nCkpt. Store the checksum in *piCksum1 and *piCksum2 before returning.
- **
- ** The value of the nCkpt parameter includes the two checksum values at
- ** the end of the checkpoint. They are not used as inputs to the checksum
- ** calculation. The checksum is based on the array of (nCkpt-2) integers
- ** at aCkpt[].
- */
- static void ckptChecksum(u32 *aCkpt, u32 nCkpt, u32 *piCksum1, u32 *piCksum2){
- u32 i;
- u32 cksum1 = 1;
- u32 cksum2 = 2;
- if( nCkpt % 2 ){
- cksum1 += aCkpt[nCkpt-3] & 0x0000FFFF;
- cksum2 += aCkpt[nCkpt-3] & 0xFFFF0000;
- }
- for(i=0; (i+3)<nCkpt; i+=2){
- cksum1 += cksum2 + aCkpt[i];
- cksum2 += cksum1 + aCkpt[i+1];
- }
- *piCksum1 = cksum1;
- *piCksum2 = cksum2;
- }
- /*
- ** Set integer iIdx of the checkpoint accumulating in buffer *p to iVal.
- */
- static void ckptSetValue(CkptBuffer *p, int iIdx, u32 iVal, int *pRc){
- if( *pRc ) return;
- if( iIdx>=p->nAlloc ){
- int nNew = LSM_MAX(8, iIdx*2);
- p->aCkpt = (u32 *)lsmReallocOrFree(p->pEnv, p->aCkpt, nNew*sizeof(u32));
- if( !p->aCkpt ){
- *pRc = LSM_NOMEM_BKPT;
- return;
- }
- p->nAlloc = nNew;
- }
- p->aCkpt[iIdx] = iVal;
- }
- /*
- ** Argument aInt points to an array nInt elements in size. Switch the
- ** endian-ness of each element of the array.
- */
- static void ckptChangeEndianness(u32 *aInt, int nInt){
- if( LSM_LITTLE_ENDIAN ){
- int i;
- for(i=0; i<nInt; i++) aInt[i] = BYTESWAP32(aInt[i]);
- }
- }
- /*
- ** Object *p contains a checkpoint in native byte-order. The checkpoint is
- ** nCkpt integers in size, not including any checksum. This function sets
- ** the two checksum elements of the checkpoint accordingly.
- */
- static void ckptAddChecksum(CkptBuffer *p, int nCkpt, int *pRc){
- if( *pRc==LSM_OK ){
- u32 aCksum[2] = {0, 0};
- ckptChecksum(p->aCkpt, nCkpt+2, &aCksum[0], &aCksum[1]);
- ckptSetValue(p, nCkpt, aCksum[0], pRc);
- ckptSetValue(p, nCkpt+1, aCksum[1], pRc);
- }
- }
- static void ckptAppend64(CkptBuffer *p, int *piOut, i64 iVal, int *pRc){
- int iOut = *piOut;
- ckptSetValue(p, iOut++, (iVal >> 32) & 0xFFFFFFFF, pRc);
- ckptSetValue(p, iOut++, (iVal & 0xFFFFFFFF), pRc);
- *piOut = iOut;
- }
- static i64 ckptRead64(u32 *a){
- return (((i64)a[0]) << 32) + (i64)a[1];
- }
- static i64 ckptGobble64(u32 *a, int *piIn){
- int iIn = *piIn;
- *piIn += 2;
- return ckptRead64(&a[iIn]);
- }
- /*
- ** Append a 6-value segment record corresponding to pSeg to the checkpoint
- ** buffer passed as the third argument.
- */
- static void ckptExportSegment(
- Segment *pSeg,
- CkptBuffer *p,
- int *piOut,
- int *pRc
- ){
- ckptAppend64(p, piOut, pSeg->iFirst, pRc);
- ckptAppend64(p, piOut, pSeg->iLastPg, pRc);
- ckptAppend64(p, piOut, pSeg->iRoot, pRc);
- ckptAppend64(p, piOut, pSeg->nSize, pRc);
- }
- static void ckptExportLevel(
- Level *pLevel, /* Level object to serialize */
- CkptBuffer *p, /* Append new level record to this ckpt */
- int *piOut, /* IN/OUT: Size of checkpoint so far */
- int *pRc /* IN/OUT: Error code */
- ){
- int iOut = *piOut;
- Merge *pMerge;
- pMerge = pLevel->pMerge;
- ckptSetValue(p, iOut++, (u32)pLevel->iAge + (u32)(pLevel->flags<<16), pRc);
- ckptSetValue(p, iOut++, pLevel->nRight, pRc);
- ckptExportSegment(&pLevel->lhs, p, &iOut, pRc);
- assert( (pLevel->nRight>0)==(pMerge!=0) );
- if( pMerge ){
- int i;
- for(i=0; i<pLevel->nRight; i++){
- ckptExportSegment(&pLevel->aRhs[i], p, &iOut, pRc);
- }
- assert( pMerge->nInput==pLevel->nRight
- || pMerge->nInput==pLevel->nRight+1
- );
- ckptSetValue(p, iOut++, pMerge->nInput, pRc);
- ckptSetValue(p, iOut++, pMerge->nSkip, pRc);
- for(i=0; i<pMerge->nInput; i++){
- ckptAppend64(p, &iOut, pMerge->aInput[i].iPg, pRc);
- ckptSetValue(p, iOut++, pMerge->aInput[i].iCell, pRc);
- }
- ckptAppend64(p, &iOut, pMerge->splitkey.iPg, pRc);
- ckptSetValue(p, iOut++, pMerge->splitkey.iCell, pRc);
- ckptAppend64(p, &iOut, pMerge->iCurrentPtr, pRc);
- }
- *piOut = iOut;
- }
- /*
- ** Populate the log offset fields of the checkpoint buffer. 4 values.
- */
- static void ckptExportLog(
- lsm_db *pDb,
- int bFlush,
- CkptBuffer *p,
- int *piOut,
- int *pRc
- ){
- int iOut = *piOut;
- assert( iOut==CKPT_HDR_LO_MSW );
- if( bFlush ){
- i64 iOff = pDb->treehdr.iOldLog;
- ckptAppend64(p, &iOut, iOff, pRc);
- ckptSetValue(p, iOut++, pDb->treehdr.oldcksum0, pRc);
- ckptSetValue(p, iOut++, pDb->treehdr.oldcksum1, pRc);
- }else{
- for(; iOut<=CKPT_HDR_LO_CKSUM2; iOut++){
- ckptSetValue(p, iOut, pDb->pShmhdr->aSnap2[iOut], pRc);
- }
- }
- assert( *pRc || iOut==CKPT_HDR_LO_CKSUM2+1 );
- *piOut = iOut;
- }
- static void ckptExportAppendlist(
- lsm_db *db, /* Database connection */
- CkptBuffer *p, /* Checkpoint buffer to write to */
- int *piOut, /* IN/OUT: Offset within checkpoint buffer */
- int *pRc /* IN/OUT: Error code */
- ){
- int i;
- LsmPgno *aiAppend = db->pWorker->aiAppend;
- for(i=0; i<LSM_APPLIST_SZ; i++){
- ckptAppend64(p, piOut, aiAppend[i], pRc);
- }
- };
- static int ckptExportSnapshot(
- lsm_db *pDb, /* Connection handle */
- int bLog, /* True to update log-offset fields */
- i64 iId, /* Checkpoint id */
- int bCksum, /* If true, include checksums */
- void **ppCkpt, /* OUT: Buffer containing checkpoint */
- int *pnCkpt /* OUT: Size of checkpoint in bytes */
- ){
- int rc = LSM_OK; /* Return Code */
- FileSystem *pFS = pDb->pFS; /* File system object */
- Snapshot *pSnap = pDb->pWorker; /* Worker snapshot */
- int nLevel = 0; /* Number of levels in checkpoint */
- int iLevel; /* Used to count out nLevel levels */
- int iOut = 0; /* Current offset in aCkpt[] */
- Level *pLevel; /* Level iterator */
- int i; /* Iterator used while serializing freelist */
- CkptBuffer ckpt;
- /* Initialize the output buffer */
- memset(&ckpt, 0, sizeof(CkptBuffer));
- ckpt.pEnv = pDb->pEnv;
- iOut = CKPT_HDR_SIZE;
- /* Write the log offset into the checkpoint. */
- ckptExportLog(pDb, bLog, &ckpt, &iOut, &rc);
- /* Write the append-point list */
- ckptExportAppendlist(pDb, &ckpt, &iOut, &rc);
- /* Figure out how many levels will be written to the checkpoint. */
- for(pLevel=lsmDbSnapshotLevel(pSnap); pLevel; pLevel=pLevel->pNext) nLevel++;
- /* Serialize nLevel levels. */
- iLevel = 0;
- for(pLevel=lsmDbSnapshotLevel(pSnap); iLevel<nLevel; pLevel=pLevel->pNext){
- ckptExportLevel(pLevel, &ckpt, &iOut, &rc);
- iLevel++;
- }
- /* Write the block-redirect list */
- ckptSetValue(&ckpt, iOut++, pSnap->redirect.n, &rc);
- for(i=0; i<pSnap->redirect.n; i++){
- ckptSetValue(&ckpt, iOut++, pSnap->redirect.a[i].iFrom, &rc);
- ckptSetValue(&ckpt, iOut++, pSnap->redirect.a[i].iTo, &rc);
- }
- /* Write the freelist */
- assert( pSnap->freelist.nEntry<=pDb->nMaxFreelist );
- if( rc==LSM_OK ){
- int nFree = pSnap->freelist.nEntry;
- ckptSetValue(&ckpt, iOut++, nFree, &rc);
- for(i=0; i<nFree; i++){
- FreelistEntry *p = &pSnap->freelist.aEntry[i];
- ckptSetValue(&ckpt, iOut++, p->iBlk, &rc);
- ckptSetValue(&ckpt, iOut++, (p->iId >> 32) & 0xFFFFFFFF, &rc);
- ckptSetValue(&ckpt, iOut++, p->iId & 0xFFFFFFFF, &rc);
- }
- }
- /* Write the checkpoint header */
- assert( iId>=0 );
- assert( pSnap->iCmpId==pDb->compress.iId
- || pSnap->iCmpId==LSM_COMPRESSION_EMPTY
- );
- ckptSetValue(&ckpt, CKPT_HDR_ID_MSW, (u32)(iId>>32), &rc);
- ckptSetValue(&ckpt, CKPT_HDR_ID_LSW, (u32)(iId&0xFFFFFFFF), &rc);
- ckptSetValue(&ckpt, CKPT_HDR_NCKPT, iOut+2, &rc);
- ckptSetValue(&ckpt, CKPT_HDR_CMPID, pDb->compress.iId, &rc);
- ckptSetValue(&ckpt, CKPT_HDR_NBLOCK, pSnap->nBlock, &rc);
- ckptSetValue(&ckpt, CKPT_HDR_BLKSZ, lsmFsBlockSize(pFS), &rc);
- ckptSetValue(&ckpt, CKPT_HDR_NLEVEL, nLevel, &rc);
- ckptSetValue(&ckpt, CKPT_HDR_PGSZ, lsmFsPageSize(pFS), &rc);
- ckptSetValue(&ckpt, CKPT_HDR_NWRITE, pSnap->nWrite, &rc);
- if( bCksum ){
- ckptAddChecksum(&ckpt, iOut, &rc);
- }else{
- ckptSetValue(&ckpt, iOut, 0, &rc);
- ckptSetValue(&ckpt, iOut+1, 0, &rc);
- }
- iOut += 2;
- assert( iOut<=1024 );
- #ifdef LSM_LOG_FREELIST
- lsmLogMessage(pDb, rc,
- "ckptExportSnapshot(): id=%lld freelist: %d", iId, pSnap->freelist.nEntry
- );
- for(i=0; i<pSnap->freelist.nEntry; i++){
- lsmLogMessage(pDb, rc,
- "ckptExportSnapshot(): iBlk=%d id=%lld",
- pSnap->freelist.aEntry[i].iBlk,
- pSnap->freelist.aEntry[i].iId
- );
- }
- #endif
- *ppCkpt = (void *)ckpt.aCkpt;
- if( pnCkpt ) *pnCkpt = sizeof(u32)*iOut;
- return rc;
- }
- /*
- ** Helper function for ckptImport().
- */
- static void ckptNewSegment(
- u32 *aIn,
- int *piIn,
- Segment *pSegment /* Populate this structure */
- ){
- assert( pSegment->iFirst==0 && pSegment->iLastPg==0 );
- assert( pSegment->nSize==0 && pSegment->iRoot==0 );
- pSegment->iFirst = ckptGobble64(aIn, piIn);
- pSegment->iLastPg = ckptGobble64(aIn, piIn);
- pSegment->iRoot = ckptGobble64(aIn, piIn);
- pSegment->nSize = ckptGobble64(aIn, piIn);
- assert( pSegment->iFirst );
- }
- static int ckptSetupMerge(lsm_db *pDb, u32 *aInt, int *piIn, Level *pLevel){
- Merge *pMerge; /* Allocated Merge object */
- int nInput; /* Number of input segments in merge */
- int iIn = *piIn; /* Next value to read from aInt[] */
- int i; /* Iterator variable */
- int nByte; /* Number of bytes to allocate */
- /* Allocate the Merge object. If malloc() fails, return LSM_NOMEM. */
- nInput = (int)aInt[iIn++];
- nByte = sizeof(Merge) + sizeof(MergeInput) * nInput;
- pMerge = (Merge *)lsmMallocZero(pDb->pEnv, nByte);
- if( !pMerge ) return LSM_NOMEM_BKPT;
- pLevel->pMerge = pMerge;
- /* Populate the Merge object. */
- pMerge->aInput = (MergeInput *)&pMerge[1];
- pMerge->nInput = nInput;
- pMerge->iOutputOff = -1;
- pMerge->nSkip = (int)aInt[iIn++];
- for(i=0; i<nInput; i++){
- pMerge->aInput[i].iPg = ckptGobble64(aInt, &iIn);
- pMerge->aInput[i].iCell = (int)aInt[iIn++];
- }
- pMerge->splitkey.iPg = ckptGobble64(aInt, &iIn);
- pMerge->splitkey.iCell = (int)aInt[iIn++];
- pMerge->iCurrentPtr = ckptGobble64(aInt, &iIn);
- /* Set *piIn and return LSM_OK. */
- *piIn = iIn;
- return LSM_OK;
- }
- static int ckptLoadLevels(
- lsm_db *pDb,
- u32 *aIn,
- int *piIn,
- int nLevel,
- Level **ppLevel
- ){
- int i;
- int rc = LSM_OK;
- Level *pRet = 0;
- Level **ppNext;
- int iIn = *piIn;
- ppNext = &pRet;
- for(i=0; rc==LSM_OK && i<nLevel; i++){
- int iRight;
- Level *pLevel;
- /* Allocate space for the Level structure and Level.apRight[] array */
- pLevel = (Level *)lsmMallocZeroRc(pDb->pEnv, sizeof(Level), &rc);
- if( rc==LSM_OK ){
- pLevel->iAge = (u16)(aIn[iIn] & 0x0000FFFF);
- pLevel->flags = (u16)((aIn[iIn]>>16) & 0x0000FFFF);
- iIn++;
- pLevel->nRight = aIn[iIn++];
- if( pLevel->nRight ){
- int nByte = sizeof(Segment) * pLevel->nRight;
- pLevel->aRhs = (Segment *)lsmMallocZeroRc(pDb->pEnv, nByte, &rc);
- }
- if( rc==LSM_OK ){
- *ppNext = pLevel;
- ppNext = &pLevel->pNext;
- /* Allocate the main segment */
- ckptNewSegment(aIn, &iIn, &pLevel->lhs);
- /* Allocate each of the right-hand segments, if any */
- for(iRight=0; iRight<pLevel->nRight; iRight++){
- ckptNewSegment(aIn, &iIn, &pLevel->aRhs[iRight]);
- }
- /* Set up the Merge object, if required */
- if( pLevel->nRight>0 ){
- rc = ckptSetupMerge(pDb, aIn, &iIn, pLevel);
- }
- }
- }
- }
- if( rc!=LSM_OK ){
- /* An OOM must have occurred. Free any level structures allocated and
- ** return the error to the caller. */
- lsmSortedFreeLevel(pDb->pEnv, pRet);
- pRet = 0;
- }
- *ppLevel = pRet;
- *piIn = iIn;
- return rc;
- }
- int lsmCheckpointLoadLevels(lsm_db *pDb, void *pVal, int nVal){
- int rc = LSM_OK;
- if( nVal>0 ){
- u32 *aIn;
- aIn = lsmMallocRc(pDb->pEnv, nVal, &rc);
- if( aIn ){
- Level *pLevel = 0;
- Level *pParent;
- int nIn;
- int nLevel;
- int iIn = 1;
- memcpy(aIn, pVal, nVal);
- nIn = nVal / sizeof(u32);
- ckptChangeEndianness(aIn, nIn);
- nLevel = aIn[0];
- rc = ckptLoadLevels(pDb, aIn, &iIn, nLevel, &pLevel);
- lsmFree(pDb->pEnv, aIn);
- assert( rc==LSM_OK || pLevel==0 );
- if( rc==LSM_OK ){
- pParent = lsmDbSnapshotLevel(pDb->pWorker);
- assert( pParent );
- while( pParent->pNext ) pParent = pParent->pNext;
- pParent->pNext = pLevel;
- }
- }
- }
- return rc;
- }
- /*
- ** Return the data for the LEVELS record.
- **
- ** The size of the checkpoint that can be stored in the database header
- ** must not exceed 1024 32-bit integers. Normally, it does not. However,
- ** if it does, part of the checkpoint must be stored in the LSM. This
- ** routine returns that part.
- */
- int lsmCheckpointLevels(
- lsm_db *pDb, /* Database handle */
- int nLevel, /* Number of levels to write to blob */
- void **paVal, /* OUT: Pointer to LEVELS blob */
- int *pnVal /* OUT: Size of LEVELS blob in bytes */
- ){
- Level *p; /* Used to iterate through levels */
- int nAll= 0;
- int rc;
- int i;
- int iOut;
- CkptBuffer ckpt;
- assert( nLevel>0 );
- for(p=lsmDbSnapshotLevel(pDb->pWorker); p; p=p->pNext) nAll++;
- assert( nAll>nLevel );
- nAll -= nLevel;
- for(p=lsmDbSnapshotLevel(pDb->pWorker); p && nAll>0; p=p->pNext) nAll--;
- memset(&ckpt, 0, sizeof(CkptBuffer));
- ckpt.pEnv = pDb->pEnv;
- ckptSetValue(&ckpt, 0, nLevel, &rc);
- iOut = 1;
- for(i=0; rc==LSM_OK && i<nLevel; i++){
- ckptExportLevel(p, &ckpt, &iOut, &rc);
- p = p->pNext;
- }
- assert( rc!=LSM_OK || p==0 );
- if( rc==LSM_OK ){
- ckptChangeEndianness(ckpt.aCkpt, iOut);
- *paVal = (void *)ckpt.aCkpt;
- *pnVal = iOut * sizeof(u32);
- }else{
- *pnVal = 0;
- *paVal = 0;
- }
- return rc;
- }
- /*
- ** Read the checkpoint id from meta-page pPg.
- */
- static i64 ckptLoadId(MetaPage *pPg){
- i64 ret = 0;
- if( pPg ){
- int nData;
- u8 *aData = lsmFsMetaPageData(pPg, &nData);
- ret = (((i64)lsmGetU32(&aData[CKPT_HDR_ID_MSW*4])) << 32) +
- ((i64)lsmGetU32(&aData[CKPT_HDR_ID_LSW*4]));
- }
- return ret;
- }
- /*
- ** Return true if the buffer passed as an argument contains a valid
- ** checkpoint.
- */
- static int ckptChecksumOk(u32 *aCkpt){
- u32 nCkpt = aCkpt[CKPT_HDR_NCKPT];
- u32 cksum1;
- u32 cksum2;
- if( nCkpt<CKPT_HDR_NCKPT || nCkpt>(LSM_META_RW_PAGE_SIZE)/sizeof(u32) ){
- return 0;
- }
- ckptChecksum(aCkpt, nCkpt, &cksum1, &cksum2);
- return (cksum1==aCkpt[nCkpt-2] && cksum2==aCkpt[nCkpt-1]);
- }
- /*
- ** Attempt to load a checkpoint from meta page iMeta.
- **
- ** This function is a no-op if *pRc is set to any value other than LSM_OK
- ** when it is called. If an error occurs, *pRc is set to an LSM error code
- ** before returning.
- **
- ** If no error occurs and the checkpoint is successfully loaded, copy it to
- ** ShmHeader.aSnap1[] and ShmHeader.aSnap2[], and set ShmHeader.iMetaPage
- ** to indicate its origin. In this case return 1. Or, if the checkpoint
- ** cannot be loaded (because the checksum does not compute), return 0.
- */
- static int ckptTryLoad(lsm_db *pDb, MetaPage *pPg, u32 iMeta, int *pRc){
- int bLoaded = 0; /* Return value */
- if( *pRc==LSM_OK ){
- int rc = LSM_OK; /* Error code */
- u32 *aCkpt = 0; /* Pointer to buffer containing checkpoint */
- u32 nCkpt; /* Number of elements in aCkpt[] */
- int nData; /* Bytes of data in aData[] */
- u8 *aData; /* Meta page data */
-
- aData = lsmFsMetaPageData(pPg, &nData);
- nCkpt = (u32)lsmGetU32(&aData[CKPT_HDR_NCKPT*sizeof(u32)]);
- if( nCkpt<=nData/sizeof(u32) && nCkpt>CKPT_HDR_NCKPT ){
- aCkpt = (u32 *)lsmMallocRc(pDb->pEnv, nCkpt*sizeof(u32), &rc);
- }
- if( aCkpt ){
- memcpy(aCkpt, aData, nCkpt*sizeof(u32));
- ckptChangeEndianness(aCkpt, nCkpt);
- if( ckptChecksumOk(aCkpt) ){
- ShmHeader *pShm = pDb->pShmhdr;
- memcpy(pShm->aSnap1, aCkpt, nCkpt*sizeof(u32));
- memcpy(pShm->aSnap2, aCkpt, nCkpt*sizeof(u32));
- memcpy(pDb->aSnapshot, aCkpt, nCkpt*sizeof(u32));
- pShm->iMetaPage = iMeta;
- bLoaded = 1;
- }
- }
- lsmFree(pDb->pEnv, aCkpt);
- *pRc = rc;
- }
- return bLoaded;
- }
- /*
- ** Initialize the shared-memory header with an empty snapshot. This function
- ** is called when no valid snapshot can be found in the database header.
- */
- static void ckptLoadEmpty(lsm_db *pDb){
- u32 aCkpt[] = {
- 0, /* CKPT_HDR_ID_MSW */
- 10, /* CKPT_HDR_ID_LSW */
- 0, /* CKPT_HDR_NCKPT */
- LSM_COMPRESSION_EMPTY, /* CKPT_HDR_CMPID */
- 0, /* CKPT_HDR_NBLOCK */
- 0, /* CKPT_HDR_BLKSZ */
- 0, /* CKPT_HDR_NLEVEL */
- 0, /* CKPT_HDR_PGSZ */
- 0, /* CKPT_HDR_NWRITE */
- 0, 0, 1234, 5678, /* The log pointer and initial checksum */
- 0,0,0,0, 0,0,0,0, /* The append list */
- 0, /* The redirected block list */
- 0, /* The free block list */
- 0, 0 /* Space for checksum values */
- };
- u32 nCkpt = array_size(aCkpt);
- ShmHeader *pShm = pDb->pShmhdr;
- aCkpt[CKPT_HDR_NCKPT] = nCkpt;
- aCkpt[CKPT_HDR_BLKSZ] = pDb->nDfltBlksz;
- aCkpt[CKPT_HDR_PGSZ] = pDb->nDfltPgsz;
- ckptChecksum(aCkpt, array_size(aCkpt), &aCkpt[nCkpt-2], &aCkpt[nCkpt-1]);
- memcpy(pShm->aSnap1, aCkpt, nCkpt*sizeof(u32));
- memcpy(pShm->aSnap2, aCkpt, nCkpt*sizeof(u32));
- memcpy(pDb->aSnapshot, aCkpt, nCkpt*sizeof(u32));
- }
- /*
- ** This function is called as part of database recovery to initialize the
- ** ShmHeader.aSnap1[] and ShmHeader.aSnap2[] snapshots.
- */
- int lsmCheckpointRecover(lsm_db *pDb){
- int rc = LSM_OK; /* Return Code */
- i64 iId1; /* Id of checkpoint on meta-page 1 */
- i64 iId2; /* Id of checkpoint on meta-page 2 */
- int bLoaded = 0; /* True once checkpoint has been loaded */
- int cmp; /* True if (iId2>iId1) */
- MetaPage *apPg[2] = {0, 0}; /* Meta-pages 1 and 2 */
- rc = lsmFsMetaPageGet(pDb->pFS, 0, 1, &apPg[0]);
- if( rc==LSM_OK ) rc = lsmFsMetaPageGet(pDb->pFS, 0, 2, &apPg[1]);
- iId1 = ckptLoadId(apPg[0]);
- iId2 = ckptLoadId(apPg[1]);
- cmp = (iId2 > iId1);
- bLoaded = ckptTryLoad(pDb, apPg[cmp?1:0], (cmp?2:1), &rc);
- if( bLoaded==0 ){
- bLoaded = ckptTryLoad(pDb, apPg[cmp?0:1], (cmp?1:2), &rc);
- }
- /* The database does not contain a valid checkpoint. Initialize the shared
- ** memory header with an empty checkpoint. */
- if( bLoaded==0 ){
- ckptLoadEmpty(pDb);
- }
- lsmFsMetaPageRelease(apPg[0]);
- lsmFsMetaPageRelease(apPg[1]);
- return rc;
- }
- /*
- ** Store the snapshot in pDb->aSnapshot[] in meta-page iMeta.
- */
- int lsmCheckpointStore(lsm_db *pDb, int iMeta){
- MetaPage *pPg = 0;
- int rc;
- assert( iMeta==1 || iMeta==2 );
- rc = lsmFsMetaPageGet(pDb->pFS, 1, iMeta, &pPg);
- if( rc==LSM_OK ){
- u8 *aData;
- int nData;
- int nCkpt;
- nCkpt = (int)pDb->aSnapshot[CKPT_HDR_NCKPT];
- aData = lsmFsMetaPageData(pPg, &nData);
- memcpy(aData, pDb->aSnapshot, nCkpt*sizeof(u32));
- ckptChangeEndianness((u32 *)aData, nCkpt);
- rc = lsmFsMetaPageRelease(pPg);
- }
-
- return rc;
- }
- /*
- ** Copy the current client snapshot from shared-memory to pDb->aSnapshot[].
- */
- int lsmCheckpointLoad(lsm_db *pDb, int *piRead){
- int nRem = LSM_ATTEMPTS_BEFORE_PROTOCOL;
- ShmHeader *pShm = pDb->pShmhdr;
- while( (nRem--)>0 ){
- int nInt;
- nInt = pShm->aSnap1[CKPT_HDR_NCKPT];
- if( nInt<=(LSM_META_RW_PAGE_SIZE / sizeof(u32)) ){
- memcpy(pDb->aSnapshot, pShm->aSnap1, nInt*sizeof(u32));
- if( ckptChecksumOk(pDb->aSnapshot) ){
- if( piRead ) *piRead = 1;
- return LSM_OK;
- }
- }
- nInt = pShm->aSnap2[CKPT_HDR_NCKPT];
- if( nInt<=(LSM_META_RW_PAGE_SIZE / sizeof(u32)) ){
- memcpy(pDb->aSnapshot, pShm->aSnap2, nInt*sizeof(u32));
- if( ckptChecksumOk(pDb->aSnapshot) ){
- if( piRead ) *piRead = 2;
- return LSM_OK;
- }
- }
- lsmShmBarrier(pDb);
- }
- return LSM_PROTOCOL_BKPT;
- }
- int lsmInfoCompressionId(lsm_db *db, u32 *piCmpId){
- int rc;
- assert( db->pClient==0 && db->pWorker==0 );
- rc = lsmCheckpointLoad(db, 0);
- if( rc==LSM_OK ){
- *piCmpId = db->aSnapshot[CKPT_HDR_CMPID];
- }
- return rc;
- }
- int lsmCheckpointLoadOk(lsm_db *pDb, int iSnap){
- u32 *aShm;
- assert( iSnap==1 || iSnap==2 );
- aShm = (iSnap==1) ? pDb->pShmhdr->aSnap1 : pDb->pShmhdr->aSnap2;
- return (lsmCheckpointId(pDb->aSnapshot, 0)==lsmCheckpointId(aShm, 0) );
- }
- int lsmCheckpointClientCacheOk(lsm_db *pDb){
- return ( pDb->pClient
- && pDb->pClient->iId==lsmCheckpointId(pDb->aSnapshot, 0)
- && pDb->pClient->iId==lsmCheckpointId(pDb->pShmhdr->aSnap1, 0)
- && pDb->pClient->iId==lsmCheckpointId(pDb->pShmhdr->aSnap2, 0)
- );
- }
- int lsmCheckpointLoadWorker(lsm_db *pDb){
- int rc;
- ShmHeader *pShm = pDb->pShmhdr;
- int nInt1;
- int nInt2;
- /* Must be holding the WORKER lock to do this. Or DMS2. */
- assert(
- lsmShmAssertLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_EXCL)
- || lsmShmAssertLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL)
- );
- /* Check that the two snapshots match. If not, repair them. */
- nInt1 = pShm->aSnap1[CKPT_HDR_NCKPT];
- nInt2 = pShm->aSnap2[CKPT_HDR_NCKPT];
- if( nInt1!=nInt2 || memcmp(pShm->aSnap1, pShm->aSnap2, nInt2*sizeof(u32)) ){
- if( ckptChecksumOk(pShm->aSnap1) ){
- memcpy(pShm->aSnap2, pShm->aSnap1, sizeof(u32)*nInt1);
- }else if( ckptChecksumOk(pShm->aSnap2) ){
- memcpy(pShm->aSnap1, pShm->aSnap2, sizeof(u32)*nInt2);
- }else{
- return LSM_PROTOCOL_BKPT;
- }
- }
- rc = lsmCheckpointDeserialize(pDb, 1, pShm->aSnap1, &pDb->pWorker);
- if( pDb->pWorker ) pDb->pWorker->pDatabase = pDb->pDatabase;
- if( rc==LSM_OK ){
- rc = lsmCheckCompressionId(pDb, pDb->pWorker->iCmpId);
- }
- #if 0
- assert( rc!=LSM_OK || lsmFsIntegrityCheck(pDb) );
- #endif
- return rc;
- }
- int lsmCheckpointDeserialize(
- lsm_db *pDb,
- int bInclFreelist, /* If true, deserialize free-list */
- u32 *aCkpt,
- Snapshot **ppSnap
- ){
- int rc = LSM_OK;
- Snapshot *pNew;
- pNew = (Snapshot *)lsmMallocZeroRc(pDb->pEnv, sizeof(Snapshot), &rc);
- if( rc==LSM_OK ){
- Level *pLvl;
- int nFree;
- int i;
- int nLevel = (int)aCkpt[CKPT_HDR_NLEVEL];
- int iIn = CKPT_HDR_SIZE + CKPT_APPENDLIST_SIZE + CKPT_LOGPTR_SIZE;
- pNew->iId = lsmCheckpointId(aCkpt, 0);
- pNew->nBlock = aCkpt[CKPT_HDR_NBLOCK];
- pNew->nWrite = aCkpt[CKPT_HDR_NWRITE];
- rc = ckptLoadLevels(pDb, aCkpt, &iIn, nLevel, &pNew->pLevel);
- pNew->iLogOff = lsmCheckpointLogOffset(aCkpt);
- pNew->iCmpId = aCkpt[CKPT_HDR_CMPID];
- /* Make a copy of the append-list */
- for(i=0; i<LSM_APPLIST_SZ; i++){
- u32 *a = &aCkpt[CKPT_HDR_SIZE + CKPT_LOGPTR_SIZE + i*2];
- pNew->aiAppend[i] = ckptRead64(a);
- }
- /* Read the block-redirect list */
- pNew->redirect.n = aCkpt[iIn++];
- if( pNew->redirect.n ){
- pNew->redirect.a = lsmMallocZeroRc(pDb->pEnv,
- (sizeof(struct RedirectEntry) * LSM_MAX_BLOCK_REDIRECTS), &rc
- );
- if( rc==LSM_OK ){
- for(i=0; i<pNew->redirect.n; i++){
- pNew->redirect.a[i].iFrom = aCkpt[iIn++];
- pNew->redirect.a[i].iTo = aCkpt[iIn++];
- }
- }
- for(pLvl=pNew->pLevel; pLvl->pNext; pLvl=pLvl->pNext);
- if( pLvl->nRight ){
- pLvl->aRhs[pLvl->nRight-1].pRedirect = &pNew->redirect;
- }else{
- pLvl->lhs.pRedirect = &pNew->redirect;
- }
- }
- /* Copy the free-list */
- if( rc==LSM_OK && bInclFreelist ){
- nFree = aCkpt[iIn++];
- if( nFree ){
- pNew->freelist.aEntry = (FreelistEntry *)lsmMallocZeroRc(
- pDb->pEnv, sizeof(FreelistEntry)*nFree, &rc
- );
- if( rc==LSM_OK ){
- int j;
- for(j=0; j<nFree; j++){
- FreelistEntry *p = &pNew->freelist.aEntry[j];
- p->iBlk = aCkpt[iIn++];
- p->iId = ((i64)(aCkpt[iIn])<<32) + aCkpt[iIn+1];
- iIn += 2;
- }
- pNew->freelist.nEntry = pNew->freelist.nAlloc = nFree;
- }
- }
- }
- }
- if( rc!=LSM_OK ){
- lsmFreeSnapshot(pDb->pEnv, pNew);
- pNew = 0;
- }
- *ppSnap = pNew;
- return rc;
- }
- /*
- ** Connection pDb must be the worker connection in order to call this
- ** function. It returns true if the database already contains the maximum
- ** number of levels or false otherwise.
- **
- ** This is used when flushing the in-memory tree to disk. If the database
- ** is already full, then the caller should invoke lsm_work() or similar
- ** until it is not full before creating a new level by flushing the in-memory
- ** tree to disk. Limiting the number of levels in the database ensures that
- ** the records describing them always fit within the checkpoint blob.
- */
- int lsmDatabaseFull(lsm_db *pDb){
- Level *p;
- int nRhs = 0;
- assert( lsmShmAssertLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_EXCL) );
- assert( pDb->pWorker );
- for(p=pDb->pWorker->pLevel; p; p=p->pNext){
- nRhs += (p->nRight ? p->nRight : 1);
- }
- return (nRhs >= LSM_MAX_RHS_SEGMENTS);
- }
- /*
- ** The connection passed as the only argument is currently the worker
- ** connection. Some work has been performed on the database by the connection,
- ** but no new snapshot has been written into shared memory.
- **
- ** This function updates the shared-memory worker and client snapshots with
- ** the new snapshot produced by the work performed by pDb.
- **
- ** If successful, LSM_OK is returned. Otherwise, if an error occurs, an LSM
- ** error code is returned.
- */
- int lsmCheckpointSaveWorker(lsm_db *pDb, int bFlush){
- Snapshot *pSnap = pDb->pWorker;
- ShmHeader *pShm = pDb->pShmhdr;
- void *p = 0;
- int n = 0;
- int rc;
- pSnap->iId++;
- rc = ckptExportSnapshot(pDb, bFlush, pSnap->iId, 1, &p, &n);
- if( rc!=LSM_OK ) return rc;
- assert( ckptChecksumOk((u32 *)p) );
- assert( n<=LSM_META_RW_PAGE_SIZE );
- memcpy(pShm->aSnap2, p, n);
- lsmShmBarrier(pDb);
- memcpy(pShm->aSnap1, p, n);
- lsmFree(pDb->pEnv, p);
- /* assert( lsmFsIntegrityCheck(pDb) ); */
- return LSM_OK;
- }
- /*
- ** This function is used to determine the snapshot-id of the most recently
- ** checkpointed snapshot. Variable ShmHeader.iMetaPage indicates which of
- ** the two meta-pages said snapshot resides on (if any).
- **
- ** If successful, this function loads the snapshot from the meta-page,
- ** verifies its checksum and sets *piId to the snapshot-id before returning
- ** LSM_OK. Or, if the checksum attempt fails, *piId is set to zero and
- ** LSM_OK returned. If an error occurs, an LSM error code is returned and
- ** the final value of *piId is undefined.
- */
- int lsmCheckpointSynced(lsm_db *pDb, i64 *piId, i64 *piLog, u32 *pnWrite){
- int rc = LSM_OK;
- MetaPage *pPg;
- u32 iMeta;
- iMeta = pDb->pShmhdr->iMetaPage;
- if( iMeta==1 || iMeta==2 ){
- rc = lsmFsMetaPageGet(pDb->pFS, 0, iMeta, &pPg);
- if( rc==LSM_OK ){
- int nCkpt;
- int nData;
- u8 *aData;
- aData = lsmFsMetaPageData(pPg, &nData);
- assert( nData==LSM_META_RW_PAGE_SIZE );
- nCkpt = lsmGetU32(&aData[CKPT_HDR_NCKPT*sizeof(u32)]);
- if( nCkpt<(LSM_META_RW_PAGE_SIZE/sizeof(u32)) ){
- u32 *aCopy = lsmMallocRc(pDb->pEnv, sizeof(u32) * nCkpt, &rc);
- if( aCopy ){
- memcpy(aCopy, aData, nCkpt*sizeof(u32));
- ckptChangeEndianness(aCopy, nCkpt);
- if( ckptChecksumOk(aCopy) ){
- if( piId ) *piId = lsmCheckpointId(aCopy, 0);
- if( piLog ) *piLog = (lsmCheckpointLogOffset(aCopy) >> 1);
- if( pnWrite ) *pnWrite = aCopy[CKPT_HDR_NWRITE];
- }
- lsmFree(pDb->pEnv, aCopy);
- }
- }
- lsmFsMetaPageRelease(pPg);
- }
- }
- if( (iMeta!=1 && iMeta!=2) || rc!=LSM_OK || pDb->pShmhdr->iMetaPage!=iMeta ){
- if( piId ) *piId = 0;
- if( piLog ) *piLog = 0;
- if( pnWrite ) *pnWrite = 0;
- }
- return rc;
- }
- /*
- ** Return the checkpoint-id of the checkpoint array passed as the first
- ** argument to this function. If the second argument is true, then assume
- ** that the checkpoint is made up of 32-bit big-endian integers. If it
- ** is false, assume that the integers are in machine byte order.
- */
- i64 lsmCheckpointId(u32 *aCkpt, int bDisk){
- i64 iId;
- if( bDisk ){
- u8 *aData = (u8 *)aCkpt;
- iId = (((i64)lsmGetU32(&aData[CKPT_HDR_ID_MSW*4])) << 32);
- iId += ((i64)lsmGetU32(&aData[CKPT_HDR_ID_LSW*4]));
- }else{
- iId = ((i64)aCkpt[CKPT_HDR_ID_MSW] << 32) + (i64)aCkpt[CKPT_HDR_ID_LSW];
- }
- return iId;
- }
- u32 lsmCheckpointNBlock(u32 *aCkpt){
- return aCkpt[CKPT_HDR_NBLOCK];
- }
- u32 lsmCheckpointNWrite(u32 *aCkpt, int bDisk){
- if( bDisk ){
- return lsmGetU32((u8 *)&aCkpt[CKPT_HDR_NWRITE]);
- }else{
- return aCkpt[CKPT_HDR_NWRITE];
- }
- }
- i64 lsmCheckpointLogOffset(u32 *aCkpt){
- return ((i64)aCkpt[CKPT_HDR_LO_MSW] << 32) + (i64)aCkpt[CKPT_HDR_LO_LSW];
- }
- int lsmCheckpointPgsz(u32 *aCkpt){ return (int)aCkpt[CKPT_HDR_PGSZ]; }
- int lsmCheckpointBlksz(u32 *aCkpt){ return (int)aCkpt[CKPT_HDR_BLKSZ]; }
- void lsmCheckpointLogoffset(
- u32 *aCkpt,
- DbLog *pLog
- ){
- pLog->aRegion[2].iStart = (lsmCheckpointLogOffset(aCkpt) >> 1);
- pLog->cksum0 = aCkpt[CKPT_HDR_LO_CKSUM1];
- pLog->cksum1 = aCkpt[CKPT_HDR_LO_CKSUM2];
- pLog->iSnapshotId = lsmCheckpointId(aCkpt, 0);
- }
- void lsmCheckpointZeroLogoffset(lsm_db *pDb){
- u32 nCkpt;
- nCkpt = pDb->aSnapshot[CKPT_HDR_NCKPT];
- assert( nCkpt>CKPT_HDR_NCKPT );
- assert( nCkpt==pDb->pShmhdr->aSnap1[CKPT_HDR_NCKPT] );
- assert( 0==memcmp(pDb->aSnapshot, pDb->pShmhdr->aSnap1, nCkpt*sizeof(u32)) );
- assert( 0==memcmp(pDb->aSnapshot, pDb->pShmhdr->aSnap2, nCkpt*sizeof(u32)) );
- pDb->aSnapshot[CKPT_HDR_LO_MSW] = 0;
- pDb->aSnapshot[CKPT_HDR_LO_LSW] = 0;
- ckptChecksum(pDb->aSnapshot, nCkpt,
- &pDb->aSnapshot[nCkpt-2], &pDb->aSnapshot[nCkpt-1]
- );
- memcpy(pDb->pShmhdr->aSnap1, pDb->aSnapshot, nCkpt*sizeof(u32));
- memcpy(pDb->pShmhdr->aSnap2, pDb->aSnapshot, nCkpt*sizeof(u32));
- }
- /*
- ** Set the output variable to the number of KB of data written into the
- ** database file since the most recent checkpoint.
- */
- int lsmCheckpointSize(lsm_db *db, int *pnKB){
- int rc = LSM_OK;
- u32 nSynced;
- /* Set nSynced to the number of pages that had been written when the
- ** database was last checkpointed. */
- rc = lsmCheckpointSynced(db, 0, 0, &nSynced);
- if( rc==LSM_OK ){
- u32 nPgsz = db->pShmhdr->aSnap1[CKPT_HDR_PGSZ];
- u32 nWrite = db->pShmhdr->aSnap1[CKPT_HDR_NWRITE];
- *pnKB = (int)(( ((i64)(nWrite - nSynced) * nPgsz) + 1023) / 1024);
- }
- return rc;
- }
|