btree.c 212 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909
  1. /*
  2. ** 2004 April 6
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. *************************************************************************
  12. ** $Id: btree.c,v 1.433 2007/12/13 21:54:11 drh Exp $
  13. **
  14. ** This file implements a external (disk-based) database using BTrees.
  15. ** See the header comment on "btreeInt.h" for additional information.
  16. ** Including a description of file format and an overview of operation.
  17. */
  18. #include "btreeInt.h"
  19. /*
  20. ** The header string that appears at the beginning of every
  21. ** SQLite database.
  22. */
  23. static const char zMagicHeader[] = SQLITE_FILE_HEADER;
  24. /*
  25. ** Set this global variable to 1 to enable tracing using the TRACE
  26. ** macro.
  27. */
  28. #if SQLITE_TEST
  29. int sqlite3_btree_trace=0; /* True to enable tracing */
  30. #endif
  31. #ifndef SQLITE_OMIT_SHARED_CACHE
  32. /*
  33. ** A flag to indicate whether or not shared cache is enabled. Also,
  34. ** a list of BtShared objects that are eligible for participation
  35. ** in shared cache. The variables have file scope during normal builds,
  36. ** but the test harness needs to access these variables so we make them
  37. ** global for test builds.
  38. */
  39. #ifdef SQLITE_TEST
  40. BtShared *sqlite3SharedCacheList = 0;
  41. int sqlite3SharedCacheEnabled = 0;
  42. #else
  43. static BtShared *sqlite3SharedCacheList = 0;
  44. static int sqlite3SharedCacheEnabled = 0;
  45. #endif
  46. #endif /* SQLITE_OMIT_SHARED_CACHE */
  47. #ifndef SQLITE_OMIT_SHARED_CACHE
  48. /*
  49. ** Enable or disable the shared pager and schema features.
  50. **
  51. ** This routine has no effect on existing database connections.
  52. ** The shared cache setting effects only future calls to
  53. ** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2().
  54. */
  55. int sqlite3_enable_shared_cache(int enable){
  56. sqlite3SharedCacheEnabled = enable;
  57. return SQLITE_OK;
  58. }
  59. #endif
  60. /*
  61. ** Forward declaration
  62. */
  63. static int checkReadLocks(Btree*,Pgno,BtCursor*);
  64. #ifdef SQLITE_OMIT_SHARED_CACHE
  65. /*
  66. ** The functions queryTableLock(), lockTable() and unlockAllTables()
  67. ** manipulate entries in the BtShared.pLock linked list used to store
  68. ** shared-cache table level locks. If the library is compiled with the
  69. ** shared-cache feature disabled, then there is only ever one user
  70. ** of each BtShared structure and so this locking is not necessary.
  71. ** So define the lock related functions as no-ops.
  72. */
  73. #define queryTableLock(a,b,c) SQLITE_OK
  74. #define lockTable(a,b,c) SQLITE_OK
  75. #define unlockAllTables(a)
  76. #endif
  77. #ifndef SQLITE_OMIT_SHARED_CACHE
  78. /*
  79. ** Query to see if btree handle p may obtain a lock of type eLock
  80. ** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
  81. ** SQLITE_OK if the lock may be obtained (by calling lockTable()), or
  82. ** SQLITE_LOCKED if not.
  83. */
  84. static int queryTableLock(Btree *p, Pgno iTab, u8 eLock){
  85. BtShared *pBt = p->pBt;
  86. BtLock *pIter;
  87. assert( sqlite3BtreeHoldsMutex(p) );
  88. /* This is a no-op if the shared-cache is not enabled */
  89. if( !p->sharable ){
  90. return SQLITE_OK;
  91. }
  92. /* This (along with lockTable()) is where the ReadUncommitted flag is
  93. ** dealt with. If the caller is querying for a read-lock and the flag is
  94. ** set, it is unconditionally granted - even if there are write-locks
  95. ** on the table. If a write-lock is requested, the ReadUncommitted flag
  96. ** is not considered.
  97. **
  98. ** In function lockTable(), if a read-lock is demanded and the
  99. ** ReadUncommitted flag is set, no entry is added to the locks list
  100. ** (BtShared.pLock).
  101. **
  102. ** To summarize: If the ReadUncommitted flag is set, then read cursors do
  103. ** not create or respect table locks. The locking procedure for a
  104. ** write-cursor does not change.
  105. */
  106. if(
  107. !p->db ||
  108. 0==(p->db->flags&SQLITE_ReadUncommitted) ||
  109. eLock==WRITE_LOCK ||
  110. iTab==MASTER_ROOT
  111. ){
  112. for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
  113. if( pIter->pBtree!=p && pIter->iTable==iTab &&
  114. (pIter->eLock!=eLock || eLock!=READ_LOCK) ){
  115. return SQLITE_LOCKED;
  116. }
  117. }
  118. }
  119. return SQLITE_OK;
  120. }
  121. #endif /* !SQLITE_OMIT_SHARED_CACHE */
  122. #ifndef SQLITE_OMIT_SHARED_CACHE
  123. /*
  124. ** Add a lock on the table with root-page iTable to the shared-btree used
  125. ** by Btree handle p. Parameter eLock must be either READ_LOCK or
  126. ** WRITE_LOCK.
  127. **
  128. ** SQLITE_OK is returned if the lock is added successfully. SQLITE_BUSY and
  129. ** SQLITE_NOMEM may also be returned.
  130. */
  131. static int lockTable(Btree *p, Pgno iTable, u8 eLock){
  132. BtShared *pBt = p->pBt;
  133. BtLock *pLock = 0;
  134. BtLock *pIter;
  135. assert( sqlite3BtreeHoldsMutex(p) );
  136. /* This is a no-op if the shared-cache is not enabled */
  137. if( !p->sharable ){
  138. return SQLITE_OK;
  139. }
  140. assert( SQLITE_OK==queryTableLock(p, iTable, eLock) );
  141. /* If the read-uncommitted flag is set and a read-lock is requested,
  142. ** return early without adding an entry to the BtShared.pLock list. See
  143. ** comment in function queryTableLock() for more info on handling
  144. ** the ReadUncommitted flag.
  145. */
  146. if(
  147. (p->db) &&
  148. (p->db->flags&SQLITE_ReadUncommitted) &&
  149. (eLock==READ_LOCK) &&
  150. iTable!=MASTER_ROOT
  151. ){
  152. return SQLITE_OK;
  153. }
  154. /* First search the list for an existing lock on this table. */
  155. for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
  156. if( pIter->iTable==iTable && pIter->pBtree==p ){
  157. pLock = pIter;
  158. break;
  159. }
  160. }
  161. /* If the above search did not find a BtLock struct associating Btree p
  162. ** with table iTable, allocate one and link it into the list.
  163. */
  164. if( !pLock ){
  165. pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock));
  166. if( !pLock ){
  167. return SQLITE_NOMEM;
  168. }
  169. pLock->iTable = iTable;
  170. pLock->pBtree = p;
  171. pLock->pNext = pBt->pLock;
  172. pBt->pLock = pLock;
  173. }
  174. /* Set the BtLock.eLock variable to the maximum of the current lock
  175. ** and the requested lock. This means if a write-lock was already held
  176. ** and a read-lock requested, we don't incorrectly downgrade the lock.
  177. */
  178. assert( WRITE_LOCK>READ_LOCK );
  179. if( eLock>pLock->eLock ){
  180. pLock->eLock = eLock;
  181. }
  182. return SQLITE_OK;
  183. }
  184. #endif /* !SQLITE_OMIT_SHARED_CACHE */
  185. #ifndef SQLITE_OMIT_SHARED_CACHE
  186. /*
  187. ** Release all the table locks (locks obtained via calls to the lockTable()
  188. ** procedure) held by Btree handle p.
  189. */
  190. static void unlockAllTables(Btree *p){
  191. BtLock **ppIter = &p->pBt->pLock;
  192. assert( sqlite3BtreeHoldsMutex(p) );
  193. assert( p->sharable || 0==*ppIter );
  194. while( *ppIter ){
  195. BtLock *pLock = *ppIter;
  196. if( pLock->pBtree==p ){
  197. *ppIter = pLock->pNext;
  198. sqlite3_free(pLock);
  199. }else{
  200. ppIter = &pLock->pNext;
  201. }
  202. }
  203. }
  204. #endif /* SQLITE_OMIT_SHARED_CACHE */
  205. static void releasePage(MemPage *pPage); /* Forward reference */
  206. /*
  207. ** Verify that the cursor holds a mutex on the BtShared
  208. */
  209. #ifndef NDEBUG
  210. static int cursorHoldsMutex(BtCursor *p){
  211. return sqlite3_mutex_held(p->pBt->mutex);
  212. }
  213. #endif
  214. #ifndef SQLITE_OMIT_INCRBLOB
  215. /*
  216. ** Invalidate the overflow page-list cache for cursor pCur, if any.
  217. */
  218. static void invalidateOverflowCache(BtCursor *pCur){
  219. assert( cursorHoldsMutex(pCur) );
  220. sqlite3_free(pCur->aOverflow);
  221. pCur->aOverflow = 0;
  222. }
  223. /*
  224. ** Invalidate the overflow page-list cache for all cursors opened
  225. ** on the shared btree structure pBt.
  226. */
  227. static void invalidateAllOverflowCache(BtShared *pBt){
  228. BtCursor *p;
  229. assert( sqlite3_mutex_held(pBt->mutex) );
  230. for(p=pBt->pCursor; p; p=p->pNext){
  231. invalidateOverflowCache(p);
  232. }
  233. }
  234. #else
  235. #define invalidateOverflowCache(x)
  236. #define invalidateAllOverflowCache(x)
  237. #endif
  238. /*
  239. ** Save the current cursor position in the variables BtCursor.nKey
  240. ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
  241. */
  242. static int saveCursorPosition(BtCursor *pCur){
  243. int rc;
  244. assert( CURSOR_VALID==pCur->eState );
  245. assert( 0==pCur->pKey );
  246. assert( cursorHoldsMutex(pCur) );
  247. rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
  248. /* If this is an intKey table, then the above call to BtreeKeySize()
  249. ** stores the integer key in pCur->nKey. In this case this value is
  250. ** all that is required. Otherwise, if pCur is not open on an intKey
  251. ** table, then malloc space for and store the pCur->nKey bytes of key
  252. ** data.
  253. */
  254. if( rc==SQLITE_OK && 0==pCur->pPage->intKey){
  255. void *pKey = sqlite3_malloc(pCur->nKey);
  256. if( pKey ){
  257. rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey);
  258. if( rc==SQLITE_OK ){
  259. pCur->pKey = pKey;
  260. }else{
  261. sqlite3_free(pKey);
  262. }
  263. }else{
  264. rc = SQLITE_NOMEM;
  265. }
  266. }
  267. assert( !pCur->pPage->intKey || !pCur->pKey );
  268. if( rc==SQLITE_OK ){
  269. releasePage(pCur->pPage);
  270. pCur->pPage = 0;
  271. pCur->eState = CURSOR_REQUIRESEEK;
  272. }
  273. invalidateOverflowCache(pCur);
  274. return rc;
  275. }
  276. /*
  277. ** Save the positions of all cursors except pExcept open on the table
  278. ** with root-page iRoot. Usually, this is called just before cursor
  279. ** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()).
  280. */
  281. static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
  282. BtCursor *p;
  283. assert( sqlite3_mutex_held(pBt->mutex) );
  284. assert( pExcept==0 || pExcept->pBt==pBt );
  285. for(p=pBt->pCursor; p; p=p->pNext){
  286. if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) &&
  287. p->eState==CURSOR_VALID ){
  288. int rc = saveCursorPosition(p);
  289. if( SQLITE_OK!=rc ){
  290. return rc;
  291. }
  292. }
  293. }
  294. return SQLITE_OK;
  295. }
  296. /*
  297. ** Clear the current cursor position.
  298. */
  299. static void clearCursorPosition(BtCursor *pCur){
  300. assert( cursorHoldsMutex(pCur) );
  301. sqlite3_free(pCur->pKey);
  302. pCur->pKey = 0;
  303. pCur->eState = CURSOR_INVALID;
  304. }
  305. /*
  306. ** Restore the cursor to the position it was in (or as close to as possible)
  307. ** when saveCursorPosition() was called. Note that this call deletes the
  308. ** saved position info stored by saveCursorPosition(), so there can be
  309. ** at most one effective restoreOrClearCursorPosition() call after each
  310. ** saveCursorPosition().
  311. **
  312. ** If the second argument argument - doSeek - is false, then instead of
  313. ** returning the cursor to its saved position, any saved position is deleted
  314. ** and the cursor state set to CURSOR_INVALID.
  315. */
  316. int sqlite3BtreeRestoreOrClearCursorPosition(BtCursor *pCur){
  317. int rc;
  318. assert( cursorHoldsMutex(pCur) );
  319. assert( pCur->eState>=CURSOR_REQUIRESEEK );
  320. if( pCur->eState==CURSOR_FAULT ){
  321. return pCur->skip;
  322. }
  323. #ifndef SQLITE_OMIT_INCRBLOB
  324. if( pCur->isIncrblobHandle ){
  325. return SQLITE_ABORT;
  326. }
  327. #endif
  328. pCur->eState = CURSOR_INVALID;
  329. rc = sqlite3BtreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &pCur->skip);
  330. if( rc==SQLITE_OK ){
  331. sqlite3_free(pCur->pKey);
  332. pCur->pKey = 0;
  333. assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
  334. }
  335. return rc;
  336. }
  337. #define restoreOrClearCursorPosition(p) \
  338. (p->eState>=CURSOR_REQUIRESEEK ? \
  339. sqlite3BtreeRestoreOrClearCursorPosition(p) : \
  340. SQLITE_OK)
  341. #ifndef SQLITE_OMIT_AUTOVACUUM
  342. /*
  343. ** Given a page number of a regular database page, return the page
  344. ** number for the pointer-map page that contains the entry for the
  345. ** input page number.
  346. */
  347. static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
  348. int nPagesPerMapPage, iPtrMap, ret;
  349. assert( sqlite3_mutex_held(pBt->mutex) );
  350. nPagesPerMapPage = (pBt->usableSize/5)+1;
  351. iPtrMap = (pgno-2)/nPagesPerMapPage;
  352. ret = (iPtrMap*nPagesPerMapPage) + 2;
  353. if( ret==PENDING_BYTE_PAGE(pBt) ){
  354. ret++;
  355. }
  356. return ret;
  357. }
  358. /*
  359. ** Write an entry into the pointer map.
  360. **
  361. ** This routine updates the pointer map entry for page number 'key'
  362. ** so that it maps to type 'eType' and parent page number 'pgno'.
  363. ** An error code is returned if something goes wrong, otherwise SQLITE_OK.
  364. */
  365. static int ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent){
  366. DbPage *pDbPage; /* The pointer map page */
  367. u8 *pPtrmap; /* The pointer map data */
  368. Pgno iPtrmap; /* The pointer map page number */
  369. int offset; /* Offset in pointer map page */
  370. int rc;
  371. assert( sqlite3_mutex_held(pBt->mutex) );
  372. /* The master-journal page number must never be used as a pointer map page */
  373. assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
  374. assert( pBt->autoVacuum );
  375. if( key==0 ){
  376. return SQLITE_CORRUPT_BKPT;
  377. }
  378. iPtrmap = PTRMAP_PAGENO(pBt, key);
  379. rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
  380. if( rc!=SQLITE_OK ){
  381. return rc;
  382. }
  383. offset = PTRMAP_PTROFFSET(pBt, key);
  384. pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
  385. if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
  386. TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
  387. rc = sqlite3PagerWrite(pDbPage);
  388. if( rc==SQLITE_OK ){
  389. pPtrmap[offset] = eType;
  390. put4byte(&pPtrmap[offset+1], parent);
  391. }
  392. }
  393. sqlite3PagerUnref(pDbPage);
  394. return rc;
  395. }
  396. /*
  397. ** Read an entry from the pointer map.
  398. **
  399. ** This routine retrieves the pointer map entry for page 'key', writing
  400. ** the type and parent page number to *pEType and *pPgno respectively.
  401. ** An error code is returned if something goes wrong, otherwise SQLITE_OK.
  402. */
  403. static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
  404. DbPage *pDbPage; /* The pointer map page */
  405. int iPtrmap; /* Pointer map page index */
  406. u8 *pPtrmap; /* Pointer map page data */
  407. int offset; /* Offset of entry in pointer map */
  408. int rc;
  409. assert( sqlite3_mutex_held(pBt->mutex) );
  410. iPtrmap = PTRMAP_PAGENO(pBt, key);
  411. rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
  412. if( rc!=0 ){
  413. return rc;
  414. }
  415. pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
  416. offset = PTRMAP_PTROFFSET(pBt, key);
  417. assert( pEType!=0 );
  418. *pEType = pPtrmap[offset];
  419. if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]);
  420. sqlite3PagerUnref(pDbPage);
  421. if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT;
  422. return SQLITE_OK;
  423. }
  424. #endif /* SQLITE_OMIT_AUTOVACUUM */
  425. /*
  426. ** Given a btree page and a cell index (0 means the first cell on
  427. ** the page, 1 means the second cell, and so forth) return a pointer
  428. ** to the cell content.
  429. **
  430. ** This routine works only for pages that do not contain overflow cells.
  431. */
  432. #define findCell(pPage, iCell) \
  433. ((pPage)->aData + get2byte(&(pPage)->aData[(pPage)->cellOffset+2*(iCell)]))
  434. #ifdef SQLITE_TEST
  435. u8 *sqlite3BtreeFindCell(MemPage *pPage, int iCell){
  436. assert( iCell>=0 );
  437. assert( iCell<get2byte(&pPage->aData[pPage->hdrOffset+3]) );
  438. return findCell(pPage, iCell);
  439. }
  440. #endif
  441. /*
  442. ** This a more complex version of sqlite3BtreeFindCell() that works for
  443. ** pages that do contain overflow cells. See insert
  444. */
  445. static u8 *findOverflowCell(MemPage *pPage, int iCell){
  446. int i;
  447. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  448. for(i=pPage->nOverflow-1; i>=0; i--){
  449. int k;
  450. struct _OvflCell *pOvfl;
  451. pOvfl = &pPage->aOvfl[i];
  452. k = pOvfl->idx;
  453. if( k<=iCell ){
  454. if( k==iCell ){
  455. return pOvfl->pCell;
  456. }
  457. iCell--;
  458. }
  459. }
  460. return findCell(pPage, iCell);
  461. }
  462. /*
  463. ** Parse a cell content block and fill in the CellInfo structure. There
  464. ** are two versions of this function. sqlite3BtreeParseCell() takes a
  465. ** cell index as the second argument and sqlite3BtreeParseCellPtr()
  466. ** takes a pointer to the body of the cell as its second argument.
  467. **
  468. ** Within this file, the parseCell() macro can be called instead of
  469. ** sqlite3BtreeParseCellPtr(). Using some compilers, this will be faster.
  470. */
  471. void sqlite3BtreeParseCellPtr(
  472. MemPage *pPage, /* Page containing the cell */
  473. u8 *pCell, /* Pointer to the cell text. */
  474. CellInfo *pInfo /* Fill in this structure */
  475. ){
  476. int n; /* Number bytes in cell content header */
  477. u32 nPayload; /* Number of bytes of cell payload */
  478. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  479. pInfo->pCell = pCell;
  480. assert( pPage->leaf==0 || pPage->leaf==1 );
  481. n = pPage->childPtrSize;
  482. assert( n==4-4*pPage->leaf );
  483. if( pPage->hasData ){
  484. n += getVarint32(&pCell[n], &nPayload);
  485. }else{
  486. nPayload = 0;
  487. }
  488. pInfo->nData = nPayload;
  489. if( pPage->intKey ){
  490. n += getVarint(&pCell[n], (u64 *)&pInfo->nKey);
  491. }else{
  492. u32 x;
  493. n += getVarint32(&pCell[n], &x);
  494. pInfo->nKey = x;
  495. nPayload += x;
  496. }
  497. pInfo->nPayload = nPayload;
  498. pInfo->nHeader = n;
  499. if( nPayload<=pPage->maxLocal ){
  500. /* This is the (easy) common case where the entire payload fits
  501. ** on the local page. No overflow is required.
  502. */
  503. int nSize; /* Total size of cell content in bytes */
  504. pInfo->nLocal = nPayload;
  505. pInfo->iOverflow = 0;
  506. nSize = nPayload + n;
  507. if( nSize<4 ){
  508. nSize = 4; /* Minimum cell size is 4 */
  509. }
  510. pInfo->nSize = nSize;
  511. }else{
  512. /* If the payload will not fit completely on the local page, we have
  513. ** to decide how much to store locally and how much to spill onto
  514. ** overflow pages. The strategy is to minimize the amount of unused
  515. ** space on overflow pages while keeping the amount of local storage
  516. ** in between minLocal and maxLocal.
  517. **
  518. ** Warning: changing the way overflow payload is distributed in any
  519. ** way will result in an incompatible file format.
  520. */
  521. int minLocal; /* Minimum amount of payload held locally */
  522. int maxLocal; /* Maximum amount of payload held locally */
  523. int surplus; /* Overflow payload available for local storage */
  524. minLocal = pPage->minLocal;
  525. maxLocal = pPage->maxLocal;
  526. surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4);
  527. if( surplus <= maxLocal ){
  528. pInfo->nLocal = surplus;
  529. }else{
  530. pInfo->nLocal = minLocal;
  531. }
  532. pInfo->iOverflow = pInfo->nLocal + n;
  533. pInfo->nSize = pInfo->iOverflow + 4;
  534. }
  535. }
  536. #define parseCell(pPage, iCell, pInfo) \
  537. sqlite3BtreeParseCellPtr((pPage), findCell((pPage), (iCell)), (pInfo))
  538. void sqlite3BtreeParseCell(
  539. MemPage *pPage, /* Page containing the cell */
  540. int iCell, /* The cell index. First cell is 0 */
  541. CellInfo *pInfo /* Fill in this structure */
  542. ){
  543. parseCell(pPage, iCell, pInfo);
  544. }
  545. /*
  546. ** Compute the total number of bytes that a Cell needs in the cell
  547. ** data area of the btree-page. The return number includes the cell
  548. ** data header and the local payload, but not any overflow page or
  549. ** the space used by the cell pointer.
  550. */
  551. #ifndef NDEBUG
  552. static int cellSize(MemPage *pPage, int iCell){
  553. CellInfo info;
  554. sqlite3BtreeParseCell(pPage, iCell, &info);
  555. return info.nSize;
  556. }
  557. #endif
  558. static int cellSizePtr(MemPage *pPage, u8 *pCell){
  559. CellInfo info;
  560. sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  561. return info.nSize;
  562. }
  563. #ifndef SQLITE_OMIT_AUTOVACUUM
  564. /*
  565. ** If the cell pCell, part of page pPage contains a pointer
  566. ** to an overflow page, insert an entry into the pointer-map
  567. ** for the overflow page.
  568. */
  569. static int ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell){
  570. if( pCell ){
  571. CellInfo info;
  572. sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  573. assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
  574. if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
  575. Pgno ovfl = get4byte(&pCell[info.iOverflow]);
  576. return ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno);
  577. }
  578. }
  579. return SQLITE_OK;
  580. }
  581. /*
  582. ** If the cell with index iCell on page pPage contains a pointer
  583. ** to an overflow page, insert an entry into the pointer-map
  584. ** for the overflow page.
  585. */
  586. static int ptrmapPutOvfl(MemPage *pPage, int iCell){
  587. u8 *pCell;
  588. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  589. pCell = findOverflowCell(pPage, iCell);
  590. return ptrmapPutOvflPtr(pPage, pCell);
  591. }
  592. #endif
  593. /*
  594. ** Defragment the page given. All Cells are moved to the
  595. ** end of the page and all free space is collected into one
  596. ** big FreeBlk that occurs in between the header and cell
  597. ** pointer array and the cell content area.
  598. */
  599. static int defragmentPage(MemPage *pPage){
  600. int i; /* Loop counter */
  601. int pc; /* Address of a i-th cell */
  602. int addr; /* Offset of first byte after cell pointer array */
  603. int hdr; /* Offset to the page header */
  604. int size; /* Size of a cell */
  605. int usableSize; /* Number of usable bytes on a page */
  606. int cellOffset; /* Offset to the cell pointer array */
  607. int brk; /* Offset to the cell content area */
  608. int nCell; /* Number of cells on the page */
  609. unsigned char *data; /* The page data */
  610. unsigned char *temp; /* Temp area for cell content */
  611. assert( sqlite3PagerIswriteable(pPage->pDbPage) );
  612. assert( pPage->pBt!=0 );
  613. assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
  614. assert( pPage->nOverflow==0 );
  615. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  616. temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
  617. data = pPage->aData;
  618. hdr = pPage->hdrOffset;
  619. cellOffset = pPage->cellOffset;
  620. nCell = pPage->nCell;
  621. assert( nCell==get2byte(&data[hdr+3]) );
  622. usableSize = pPage->pBt->usableSize;
  623. brk = get2byte(&data[hdr+5]);
  624. memcpy(&temp[brk], &data[brk], usableSize - brk);
  625. brk = usableSize;
  626. for(i=0; i<nCell; i++){
  627. u8 *pAddr; /* The i-th cell pointer */
  628. pAddr = &data[cellOffset + i*2];
  629. pc = get2byte(pAddr);
  630. assert( pc<pPage->pBt->usableSize );
  631. size = cellSizePtr(pPage, &temp[pc]);
  632. brk -= size;
  633. memcpy(&data[brk], &temp[pc], size);
  634. put2byte(pAddr, brk);
  635. }
  636. assert( brk>=cellOffset+2*nCell );
  637. put2byte(&data[hdr+5], brk);
  638. data[hdr+1] = 0;
  639. data[hdr+2] = 0;
  640. data[hdr+7] = 0;
  641. addr = cellOffset+2*nCell;
  642. memset(&data[addr], 0, brk-addr);
  643. return SQLITE_OK;
  644. }
  645. /*
  646. ** Allocate nByte bytes of space on a page.
  647. **
  648. ** Return the index into pPage->aData[] of the first byte of
  649. ** the new allocation. Or return 0 if there is not enough free
  650. ** space on the page to satisfy the allocation request.
  651. **
  652. ** If the page contains nBytes of free space but does not contain
  653. ** nBytes of contiguous free space, then this routine automatically
  654. ** calls defragementPage() to consolidate all free space before
  655. ** allocating the new chunk.
  656. */
  657. static int allocateSpace(MemPage *pPage, int nByte){
  658. int addr, pc, hdr;
  659. int size;
  660. int nFrag;
  661. int top;
  662. int nCell;
  663. int cellOffset;
  664. unsigned char *data;
  665. data = pPage->aData;
  666. assert( sqlite3PagerIswriteable(pPage->pDbPage) );
  667. assert( pPage->pBt );
  668. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  669. if( nByte<4 ) nByte = 4;
  670. if( pPage->nFree<nByte || pPage->nOverflow>0 ) return 0;
  671. pPage->nFree -= nByte;
  672. hdr = pPage->hdrOffset;
  673. nFrag = data[hdr+7];
  674. if( nFrag<60 ){
  675. /* Search the freelist looking for a slot big enough to satisfy the
  676. ** space request. */
  677. addr = hdr+1;
  678. while( (pc = get2byte(&data[addr]))>0 ){
  679. size = get2byte(&data[pc+2]);
  680. if( size>=nByte ){
  681. if( size<nByte+4 ){
  682. memcpy(&data[addr], &data[pc], 2);
  683. data[hdr+7] = nFrag + size - nByte;
  684. return pc;
  685. }else{
  686. put2byte(&data[pc+2], size-nByte);
  687. return pc + size - nByte;
  688. }
  689. }
  690. addr = pc;
  691. }
  692. }
  693. /* Allocate memory from the gap in between the cell pointer array
  694. ** and the cell content area.
  695. */
  696. top = get2byte(&data[hdr+5]);
  697. nCell = get2byte(&data[hdr+3]);
  698. cellOffset = pPage->cellOffset;
  699. if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){
  700. if( defragmentPage(pPage) ) return 0;
  701. top = get2byte(&data[hdr+5]);
  702. }
  703. top -= nByte;
  704. assert( cellOffset + 2*nCell <= top );
  705. put2byte(&data[hdr+5], top);
  706. return top;
  707. }
  708. /*
  709. ** Return a section of the pPage->aData to the freelist.
  710. ** The first byte of the new free block is pPage->aDisk[start]
  711. ** and the size of the block is "size" bytes.
  712. **
  713. ** Most of the effort here is involved in coalesing adjacent
  714. ** free blocks into a single big free block.
  715. */
  716. static void freeSpace(MemPage *pPage, int start, int size){
  717. int addr, pbegin, hdr;
  718. unsigned char *data = pPage->aData;
  719. assert( pPage->pBt!=0 );
  720. assert( sqlite3PagerIswriteable(pPage->pDbPage) );
  721. assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
  722. assert( (start + size)<=pPage->pBt->usableSize );
  723. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  724. if( size<4 ) size = 4;
  725. #ifdef SQLITE_SECURE_DELETE
  726. /* Overwrite deleted information with zeros when the SECURE_DELETE
  727. ** option is enabled at compile-time */
  728. memset(&data[start], 0, size);
  729. #endif
  730. /* Add the space back into the linked list of freeblocks */
  731. hdr = pPage->hdrOffset;
  732. addr = hdr + 1;
  733. while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
  734. assert( pbegin<=pPage->pBt->usableSize-4 );
  735. assert( pbegin>addr );
  736. addr = pbegin;
  737. }
  738. assert( pbegin<=pPage->pBt->usableSize-4 );
  739. assert( pbegin>addr || pbegin==0 );
  740. put2byte(&data[addr], start);
  741. put2byte(&data[start], pbegin);
  742. put2byte(&data[start+2], size);
  743. pPage->nFree += size;
  744. /* Coalesce adjacent free blocks */
  745. addr = pPage->hdrOffset + 1;
  746. while( (pbegin = get2byte(&data[addr]))>0 ){
  747. int pnext, psize;
  748. assert( pbegin>addr );
  749. assert( pbegin<=pPage->pBt->usableSize-4 );
  750. pnext = get2byte(&data[pbegin]);
  751. psize = get2byte(&data[pbegin+2]);
  752. if( pbegin + psize + 3 >= pnext && pnext>0 ){
  753. int frag = pnext - (pbegin+psize);
  754. assert( frag<=data[pPage->hdrOffset+7] );
  755. data[pPage->hdrOffset+7] -= frag;
  756. put2byte(&data[pbegin], get2byte(&data[pnext]));
  757. put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin);
  758. }else{
  759. addr = pbegin;
  760. }
  761. }
  762. /* If the cell content area begins with a freeblock, remove it. */
  763. if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){
  764. int top;
  765. pbegin = get2byte(&data[hdr+1]);
  766. memcpy(&data[hdr+1], &data[pbegin], 2);
  767. top = get2byte(&data[hdr+5]);
  768. put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
  769. }
  770. }
  771. /*
  772. ** Decode the flags byte (the first byte of the header) for a page
  773. ** and initialize fields of the MemPage structure accordingly.
  774. */
  775. static void decodeFlags(MemPage *pPage, int flagByte){
  776. BtShared *pBt; /* A copy of pPage->pBt */
  777. assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
  778. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  779. pPage->intKey = (flagByte & (PTF_INTKEY|PTF_LEAFDATA))!=0;
  780. pPage->zeroData = (flagByte & PTF_ZERODATA)!=0;
  781. pPage->leaf = (flagByte & PTF_LEAF)!=0;
  782. pPage->childPtrSize = 4*(pPage->leaf==0);
  783. pBt = pPage->pBt;
  784. if( flagByte & PTF_LEAFDATA ){
  785. pPage->leafData = 1;
  786. pPage->maxLocal = pBt->maxLeaf;
  787. pPage->minLocal = pBt->minLeaf;
  788. }else{
  789. pPage->leafData = 0;
  790. pPage->maxLocal = pBt->maxLocal;
  791. pPage->minLocal = pBt->minLocal;
  792. }
  793. pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
  794. }
  795. /*
  796. ** Initialize the auxiliary information for a disk block.
  797. **
  798. ** The pParent parameter must be a pointer to the MemPage which
  799. ** is the parent of the page being initialized. The root of a
  800. ** BTree has no parent and so for that page, pParent==NULL.
  801. **
  802. ** Return SQLITE_OK on success. If we see that the page does
  803. ** not contain a well-formed database page, then return
  804. ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
  805. ** guarantee that the page is well-formed. It only shows that
  806. ** we failed to detect any corruption.
  807. */
  808. int sqlite3BtreeInitPage(
  809. MemPage *pPage, /* The page to be initialized */
  810. MemPage *pParent /* The parent. Might be NULL */
  811. ){
  812. int pc; /* Address of a freeblock within pPage->aData[] */
  813. int hdr; /* Offset to beginning of page header */
  814. u8 *data; /* Equal to pPage->aData */
  815. BtShared *pBt; /* The main btree structure */
  816. int usableSize; /* Amount of usable space on each page */
  817. int cellOffset; /* Offset from start of page to first cell pointer */
  818. int nFree; /* Number of unused bytes on the page */
  819. int top; /* First byte of the cell content area */
  820. pBt = pPage->pBt;
  821. assert( pBt!=0 );
  822. assert( pParent==0 || pParent->pBt==pBt );
  823. assert( sqlite3_mutex_held(pBt->mutex) );
  824. assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
  825. assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
  826. assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
  827. if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
  828. /* The parent page should never change unless the file is corrupt */
  829. return SQLITE_CORRUPT_BKPT;
  830. }
  831. if( pPage->isInit ) return SQLITE_OK;
  832. if( pPage->pParent==0 && pParent!=0 ){
  833. pPage->pParent = pParent;
  834. sqlite3PagerRef(pParent->pDbPage);
  835. }
  836. hdr = pPage->hdrOffset;
  837. data = pPage->aData;
  838. decodeFlags(pPage, data[hdr]);
  839. pPage->nOverflow = 0;
  840. pPage->idxShift = 0;
  841. usableSize = pBt->usableSize;
  842. pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
  843. top = get2byte(&data[hdr+5]);
  844. pPage->nCell = get2byte(&data[hdr+3]);
  845. if( pPage->nCell>MX_CELL(pBt) ){
  846. /* To many cells for a single page. The page must be corrupt */
  847. return SQLITE_CORRUPT_BKPT;
  848. }
  849. if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
  850. /* All pages must have at least one cell, except for root pages */
  851. return SQLITE_CORRUPT_BKPT;
  852. }
  853. /* Compute the total free space on the page */
  854. pc = get2byte(&data[hdr+1]);
  855. nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
  856. while( pc>0 ){
  857. int next, size;
  858. if( pc>usableSize-4 ){
  859. /* Free block is off the page */
  860. return SQLITE_CORRUPT_BKPT;
  861. }
  862. next = get2byte(&data[pc]);
  863. size = get2byte(&data[pc+2]);
  864. if( next>0 && next<=pc+size+3 ){
  865. /* Free blocks must be in accending order */
  866. return SQLITE_CORRUPT_BKPT;
  867. }
  868. nFree += size;
  869. pc = next;
  870. }
  871. pPage->nFree = nFree;
  872. if( nFree>=usableSize ){
  873. /* Free space cannot exceed total page size */
  874. return SQLITE_CORRUPT_BKPT;
  875. }
  876. pPage->isInit = 1;
  877. return SQLITE_OK;
  878. }
  879. /*
  880. ** Set up a raw page so that it looks like a database page holding
  881. ** no entries.
  882. */
  883. static void zeroPage(MemPage *pPage, int flags){
  884. unsigned char *data = pPage->aData;
  885. BtShared *pBt = pPage->pBt;
  886. int hdr = pPage->hdrOffset;
  887. int first;
  888. assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno );
  889. assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
  890. assert( sqlite3PagerGetData(pPage->pDbPage) == data );
  891. assert( sqlite3PagerIswriteable(pPage->pDbPage) );
  892. assert( sqlite3_mutex_held(pBt->mutex) );
  893. memset(&data[hdr], 0, pBt->usableSize - hdr);
  894. data[hdr] = flags;
  895. first = hdr + 8 + 4*((flags&PTF_LEAF)==0);
  896. memset(&data[hdr+1], 0, 4);
  897. data[hdr+7] = 0;
  898. put2byte(&data[hdr+5], pBt->usableSize);
  899. pPage->nFree = pBt->usableSize - first;
  900. decodeFlags(pPage, flags);
  901. pPage->hdrOffset = hdr;
  902. pPage->cellOffset = first;
  903. pPage->nOverflow = 0;
  904. pPage->idxShift = 0;
  905. pPage->nCell = 0;
  906. pPage->isInit = 1;
  907. }
  908. /*
  909. ** Get a page from the pager. Initialize the MemPage.pBt and
  910. ** MemPage.aData elements if needed.
  911. **
  912. ** If the noContent flag is set, it means that we do not care about
  913. ** the content of the page at this time. So do not go to the disk
  914. ** to fetch the content. Just fill in the content with zeros for now.
  915. ** If in the future we call sqlite3PagerWrite() on this page, that
  916. ** means we have started to be concerned about content and the disk
  917. ** read should occur at that point.
  918. */
  919. int sqlite3BtreeGetPage(
  920. BtShared *pBt, /* The btree */
  921. Pgno pgno, /* Number of the page to fetch */
  922. MemPage **ppPage, /* Return the page in this parameter */
  923. int noContent /* Do not load page content if true */
  924. ){
  925. int rc;
  926. MemPage *pPage;
  927. DbPage *pDbPage;
  928. assert( sqlite3_mutex_held(pBt->mutex) );
  929. rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent);
  930. if( rc ) return rc;
  931. pPage = (MemPage *)sqlite3PagerGetExtra(pDbPage);
  932. pPage->aData = sqlite3PagerGetData(pDbPage);
  933. pPage->pDbPage = pDbPage;
  934. pPage->pBt = pBt;
  935. pPage->pgno = pgno;
  936. pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
  937. *ppPage = pPage;
  938. return SQLITE_OK;
  939. }
  940. /*
  941. ** Get a page from the pager and initialize it. This routine
  942. ** is just a convenience wrapper around separate calls to
  943. ** sqlite3BtreeGetPage() and sqlite3BtreeInitPage().
  944. */
  945. static int getAndInitPage(
  946. BtShared *pBt, /* The database file */
  947. Pgno pgno, /* Number of the page to get */
  948. MemPage **ppPage, /* Write the page pointer here */
  949. MemPage *pParent /* Parent of the page */
  950. ){
  951. int rc;
  952. assert( sqlite3_mutex_held(pBt->mutex) );
  953. if( pgno==0 ){
  954. return SQLITE_CORRUPT_BKPT;
  955. }
  956. rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0);
  957. if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
  958. rc = sqlite3BtreeInitPage(*ppPage, pParent);
  959. }
  960. return rc;
  961. }
  962. /*
  963. ** Release a MemPage. This should be called once for each prior
  964. ** call to sqlite3BtreeGetPage.
  965. */
  966. static void releasePage(MemPage *pPage){
  967. if( pPage ){
  968. assert( pPage->aData );
  969. assert( pPage->pBt );
  970. assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
  971. assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
  972. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  973. sqlite3PagerUnref(pPage->pDbPage);
  974. }
  975. }
  976. /*
  977. ** This routine is called when the reference count for a page
  978. ** reaches zero. We need to unref the pParent pointer when that
  979. ** happens.
  980. */
  981. static void pageDestructor(DbPage *pData, int pageSize){
  982. MemPage *pPage;
  983. assert( (pageSize & 7)==0 );
  984. pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  985. assert( pPage->isInit==0 || sqlite3_mutex_held(pPage->pBt->mutex) );
  986. if( pPage->pParent ){
  987. MemPage *pParent = pPage->pParent;
  988. assert( pParent->pBt==pPage->pBt );
  989. pPage->pParent = 0;
  990. releasePage(pParent);
  991. }
  992. pPage->isInit = 0;
  993. }
  994. /*
  995. ** During a rollback, when the pager reloads information into the cache
  996. ** so that the cache is restored to its original state at the start of
  997. ** the transaction, for each page restored this routine is called.
  998. **
  999. ** This routine needs to reset the extra data section at the end of the
  1000. ** page to agree with the restored data.
  1001. */
  1002. static void pageReinit(DbPage *pData, int pageSize){
  1003. MemPage *pPage;
  1004. assert( (pageSize & 7)==0 );
  1005. pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  1006. if( pPage->isInit ){
  1007. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  1008. pPage->isInit = 0;
  1009. sqlite3BtreeInitPage(pPage, pPage->pParent);
  1010. }
  1011. }
  1012. /*
  1013. ** Invoke the busy handler for a btree.
  1014. */
  1015. static int sqlite3BtreeInvokeBusyHandler(void *pArg, int n){
  1016. BtShared *pBt = (BtShared*)pArg;
  1017. assert( pBt->db );
  1018. assert( sqlite3_mutex_held(pBt->db->mutex) );
  1019. return sqlite3InvokeBusyHandler(&pBt->db->busyHandler);
  1020. }
  1021. /*
  1022. ** Open a database file.
  1023. **
  1024. ** zFilename is the name of the database file. If zFilename is NULL
  1025. ** a new database with a random name is created. This randomly named
  1026. ** database file will be deleted when sqlite3BtreeClose() is called.
  1027. ** If zFilename is ":memory:" then an in-memory database is created
  1028. ** that is automatically destroyed when it is closed.
  1029. */
  1030. int sqlite3BtreeOpen(
  1031. const char *zFilename, /* Name of the file containing the BTree database */
  1032. sqlite3 *db, /* Associated database handle */
  1033. Btree **ppBtree, /* Pointer to new Btree object written here */
  1034. int flags, /* Options */
  1035. int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */
  1036. ){
  1037. sqlite3_vfs *pVfs; /* The VFS to use for this btree */
  1038. BtShared *pBt = 0; /* Shared part of btree structure */
  1039. Btree *p; /* Handle to return */
  1040. int rc = SQLITE_OK;
  1041. int nReserve;
  1042. unsigned char zDbHeader[100];
  1043. /* Set the variable isMemdb to true for an in-memory database, or
  1044. ** false for a file-based database. This symbol is only required if
  1045. ** either of the shared-data or autovacuum features are compiled
  1046. ** into the library.
  1047. */
  1048. #if !defined(SQLITE_OMIT_SHARED_CACHE) || !defined(SQLITE_OMIT_AUTOVACUUM)
  1049. #ifdef SQLITE_OMIT_MEMORYDB
  1050. const int isMemdb = 0;
  1051. #else
  1052. const int isMemdb = zFilename && !strcmp(zFilename, ":memory:");
  1053. #endif
  1054. #endif
  1055. assert( db!=0 );
  1056. assert( sqlite3_mutex_held(db->mutex) );
  1057. pVfs = db->pVfs;
  1058. p = sqlite3MallocZero(sizeof(Btree));
  1059. if( !p ){
  1060. return SQLITE_NOMEM;
  1061. }
  1062. p->inTrans = TRANS_NONE;
  1063. p->db = db;
  1064. #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
  1065. /*
  1066. ** If this Btree is a candidate for shared cache, try to find an
  1067. ** existing BtShared object that we can share with
  1068. */
  1069. if( (flags & BTREE_PRIVATE)==0
  1070. && isMemdb==0
  1071. && (db->flags & SQLITE_Vtab)==0
  1072. && zFilename && zFilename[0]
  1073. ){
  1074. if( sqlite3SharedCacheEnabled ){
  1075. int nFullPathname = pVfs->mxPathname+1;
  1076. char *zFullPathname = (char *)sqlite3_malloc(nFullPathname);
  1077. sqlite3_mutex *mutexShared;
  1078. p->sharable = 1;
  1079. if( db ){
  1080. db->flags |= SQLITE_SharedCache;
  1081. }
  1082. if( !zFullPathname ){
  1083. sqlite3_free(p);
  1084. return SQLITE_NOMEM;
  1085. }
  1086. sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname);
  1087. mutexShared = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
  1088. sqlite3_mutex_enter(mutexShared);
  1089. for(pBt=sqlite3SharedCacheList; pBt; pBt=pBt->pNext){
  1090. assert( pBt->nRef>0 );
  1091. if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager))
  1092. && sqlite3PagerVfs(pBt->pPager)==pVfs ){
  1093. p->pBt = pBt;
  1094. pBt->nRef++;
  1095. break;
  1096. }
  1097. }
  1098. sqlite3_mutex_leave(mutexShared);
  1099. sqlite3_free(zFullPathname);
  1100. }
  1101. #ifdef SQLITE_DEBUG
  1102. else{
  1103. /* In debug mode, we mark all persistent databases as sharable
  1104. ** even when they are not. This exercises the locking code and
  1105. ** gives more opportunity for asserts(sqlite3_mutex_held())
  1106. ** statements to find locking problems.
  1107. */
  1108. p->sharable = 1;
  1109. }
  1110. #endif
  1111. }
  1112. #endif
  1113. if( pBt==0 ){
  1114. /*
  1115. ** The following asserts make sure that structures used by the btree are
  1116. ** the right size. This is to guard against size changes that result
  1117. ** when compiling on a different architecture.
  1118. */
  1119. assert( sizeof(i64)==8 || sizeof(i64)==4 );
  1120. assert( sizeof(u64)==8 || sizeof(u64)==4 );
  1121. assert( sizeof(u32)==4 );
  1122. assert( sizeof(u16)==2 );
  1123. assert( sizeof(Pgno)==4 );
  1124. pBt = sqlite3MallocZero( sizeof(*pBt) );
  1125. if( pBt==0 ){
  1126. rc = SQLITE_NOMEM;
  1127. goto btree_open_out;
  1128. }
  1129. pBt->busyHdr.xFunc = sqlite3BtreeInvokeBusyHandler;
  1130. pBt->busyHdr.pArg = pBt;
  1131. rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
  1132. EXTRA_SIZE, flags, vfsFlags);
  1133. if( rc==SQLITE_OK ){
  1134. rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
  1135. }
  1136. if( rc!=SQLITE_OK ){
  1137. goto btree_open_out;
  1138. }
  1139. sqlite3PagerSetBusyhandler(pBt->pPager, &pBt->busyHdr);
  1140. p->pBt = pBt;
  1141. sqlite3PagerSetDestructor(pBt->pPager, pageDestructor);
  1142. sqlite3PagerSetReiniter(pBt->pPager, pageReinit);
  1143. pBt->pCursor = 0;
  1144. pBt->pPage1 = 0;
  1145. pBt->readOnly = sqlite3PagerIsreadonly(pBt->pPager);
  1146. pBt->pageSize = get2byte(&zDbHeader[16]);
  1147. if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
  1148. || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
  1149. pBt->pageSize = 0;
  1150. sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
  1151. pBt->maxEmbedFrac = 64; /* 25% */
  1152. pBt->minEmbedFrac = 32; /* 12.5% */
  1153. pBt->minLeafFrac = 32; /* 12.5% */
  1154. #ifndef SQLITE_OMIT_AUTOVACUUM
  1155. /* If the magic name ":memory:" will create an in-memory database, then
  1156. ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if
  1157. ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if
  1158. ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a
  1159. ** regular file-name. In this case the auto-vacuum applies as per normal.
  1160. */
  1161. if( zFilename && !isMemdb ){
  1162. pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0);
  1163. pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0);
  1164. }
  1165. #endif
  1166. nReserve = 0;
  1167. }else{
  1168. nReserve = zDbHeader[20];
  1169. pBt->maxEmbedFrac = zDbHeader[21];
  1170. pBt->minEmbedFrac = zDbHeader[22];
  1171. pBt->minLeafFrac = zDbHeader[23];
  1172. pBt->pageSizeFixed = 1;
  1173. #ifndef SQLITE_OMIT_AUTOVACUUM
  1174. pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
  1175. pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0);
  1176. #endif
  1177. }
  1178. pBt->usableSize = pBt->pageSize - nReserve;
  1179. assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */
  1180. sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
  1181. #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
  1182. /* Add the new BtShared object to the linked list sharable BtShareds.
  1183. */
  1184. if( p->sharable ){
  1185. sqlite3_mutex *mutexShared;
  1186. pBt->nRef = 1;
  1187. mutexShared = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
  1188. if( SQLITE_THREADSAFE ){
  1189. pBt->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
  1190. if( pBt->mutex==0 ){
  1191. rc = SQLITE_NOMEM;
  1192. db->mallocFailed = 0;
  1193. goto btree_open_out;
  1194. }
  1195. }
  1196. sqlite3_mutex_enter(mutexShared);
  1197. pBt->pNext = sqlite3SharedCacheList;
  1198. sqlite3SharedCacheList = pBt;
  1199. sqlite3_mutex_leave(mutexShared);
  1200. }
  1201. #endif
  1202. }
  1203. #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
  1204. /* If the new Btree uses a sharable pBtShared, then link the new
  1205. ** Btree into the list of all sharable Btrees for the same connection.
  1206. ** The list is kept in ascending order by pBt address.
  1207. */
  1208. if( p->sharable ){
  1209. int i;
  1210. Btree *pSib;
  1211. for(i=0; i<db->nDb; i++){
  1212. if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){
  1213. while( pSib->pPrev ){ pSib = pSib->pPrev; }
  1214. if( p->pBt<pSib->pBt ){
  1215. p->pNext = pSib;
  1216. p->pPrev = 0;
  1217. pSib->pPrev = p;
  1218. }else{
  1219. while( pSib->pNext && pSib->pNext->pBt<p->pBt ){
  1220. pSib = pSib->pNext;
  1221. }
  1222. p->pNext = pSib->pNext;
  1223. p->pPrev = pSib;
  1224. if( p->pNext ){
  1225. p->pNext->pPrev = p;
  1226. }
  1227. pSib->pNext = p;
  1228. }
  1229. break;
  1230. }
  1231. }
  1232. }
  1233. #endif
  1234. *ppBtree = p;
  1235. btree_open_out:
  1236. if( rc!=SQLITE_OK ){
  1237. if( pBt && pBt->pPager ){
  1238. sqlite3PagerClose(pBt->pPager);
  1239. }
  1240. sqlite3_free(pBt);
  1241. sqlite3_free(p);
  1242. *ppBtree = 0;
  1243. }
  1244. return rc;
  1245. }
  1246. /*
  1247. ** Decrement the BtShared.nRef counter. When it reaches zero,
  1248. ** remove the BtShared structure from the sharing list. Return
  1249. ** true if the BtShared.nRef counter reaches zero and return
  1250. ** false if it is still positive.
  1251. */
  1252. static int removeFromSharingList(BtShared *pBt){
  1253. #ifndef SQLITE_OMIT_SHARED_CACHE
  1254. sqlite3_mutex *pMaster;
  1255. BtShared *pList;
  1256. int removed = 0;
  1257. assert( sqlite3_mutex_notheld(pBt->mutex) );
  1258. pMaster = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
  1259. sqlite3_mutex_enter(pMaster);
  1260. pBt->nRef--;
  1261. if( pBt->nRef<=0 ){
  1262. if( sqlite3SharedCacheList==pBt ){
  1263. sqlite3SharedCacheList = pBt->pNext;
  1264. }else{
  1265. pList = sqlite3SharedCacheList;
  1266. while( pList && pList->pNext!=pBt ){
  1267. pList=pList->pNext;
  1268. }
  1269. if( pList ){
  1270. pList->pNext = pBt->pNext;
  1271. }
  1272. }
  1273. if( SQLITE_THREADSAFE ){
  1274. sqlite3_mutex_free(pBt->mutex);
  1275. }
  1276. removed = 1;
  1277. }
  1278. sqlite3_mutex_leave(pMaster);
  1279. return removed;
  1280. #else
  1281. return 1;
  1282. #endif
  1283. }
  1284. /*
  1285. ** Close an open database and invalidate all cursors.
  1286. */
  1287. int sqlite3BtreeClose(Btree *p){
  1288. BtShared *pBt = p->pBt;
  1289. BtCursor *pCur;
  1290. /* Close all cursors opened via this handle. */
  1291. assert( sqlite3_mutex_held(p->db->mutex) );
  1292. sqlite3BtreeEnter(p);
  1293. pBt->db = p->db;
  1294. pCur = pBt->pCursor;
  1295. while( pCur ){
  1296. BtCursor *pTmp = pCur;
  1297. pCur = pCur->pNext;
  1298. if( pTmp->pBtree==p ){
  1299. sqlite3BtreeCloseCursor(pTmp);
  1300. }
  1301. }
  1302. /* Rollback any active transaction and free the handle structure.
  1303. ** The call to sqlite3BtreeRollback() drops any table-locks held by
  1304. ** this handle.
  1305. */
  1306. sqlite3BtreeRollback(p);
  1307. sqlite3BtreeLeave(p);
  1308. /* If there are still other outstanding references to the shared-btree
  1309. ** structure, return now. The remainder of this procedure cleans
  1310. ** up the shared-btree.
  1311. */
  1312. assert( p->wantToLock==0 && p->locked==0 );
  1313. if( !p->sharable || removeFromSharingList(pBt) ){
  1314. /* The pBt is no longer on the sharing list, so we can access
  1315. ** it without having to hold the mutex.
  1316. **
  1317. ** Clean out and delete the BtShared object.
  1318. */
  1319. assert( !pBt->pCursor );
  1320. sqlite3PagerClose(pBt->pPager);
  1321. if( pBt->xFreeSchema && pBt->pSchema ){
  1322. pBt->xFreeSchema(pBt->pSchema);
  1323. }
  1324. sqlite3_free(pBt->pSchema);
  1325. sqlite3_free(pBt);
  1326. }
  1327. #ifndef SQLITE_OMIT_SHARED_CACHE
  1328. assert( p->wantToLock==0 );
  1329. assert( p->locked==0 );
  1330. if( p->pPrev ) p->pPrev->pNext = p->pNext;
  1331. if( p->pNext ) p->pNext->pPrev = p->pPrev;
  1332. #endif
  1333. sqlite3_free(p);
  1334. return SQLITE_OK;
  1335. }
  1336. /*
  1337. ** Change the limit on the number of pages allowed in the cache.
  1338. **
  1339. ** The maximum number of cache pages is set to the absolute
  1340. ** value of mxPage. If mxPage is negative, the pager will
  1341. ** operate asynchronously - it will not stop to do fsync()s
  1342. ** to insure data is written to the disk surface before
  1343. ** continuing. Transactions still work if synchronous is off,
  1344. ** and the database cannot be corrupted if this program
  1345. ** crashes. But if the operating system crashes or there is
  1346. ** an abrupt power failure when synchronous is off, the database
  1347. ** could be left in an inconsistent and unrecoverable state.
  1348. ** Synchronous is on by default so database corruption is not
  1349. ** normally a worry.
  1350. */
  1351. int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
  1352. BtShared *pBt = p->pBt;
  1353. assert( sqlite3_mutex_held(p->db->mutex) );
  1354. sqlite3BtreeEnter(p);
  1355. sqlite3PagerSetCachesize(pBt->pPager, mxPage);
  1356. sqlite3BtreeLeave(p);
  1357. return SQLITE_OK;
  1358. }
  1359. /*
  1360. ** Change the way data is synced to disk in order to increase or decrease
  1361. ** how well the database resists damage due to OS crashes and power
  1362. ** failures. Level 1 is the same as asynchronous (no syncs() occur and
  1363. ** there is a high probability of damage) Level 2 is the default. There
  1364. ** is a very low but non-zero probability of damage. Level 3 reduces the
  1365. ** probability of damage to near zero but with a write performance reduction.
  1366. */
  1367. #ifndef SQLITE_OMIT_PAGER_PRAGMAS
  1368. int sqlite3BtreeSetSafetyLevel(Btree *p, int level, int fullSync){
  1369. BtShared *pBt = p->pBt;
  1370. assert( sqlite3_mutex_held(p->db->mutex) );
  1371. sqlite3BtreeEnter(p);
  1372. sqlite3PagerSetSafetyLevel(pBt->pPager, level, fullSync);
  1373. sqlite3BtreeLeave(p);
  1374. return SQLITE_OK;
  1375. }
  1376. #endif
  1377. /*
  1378. ** Return TRUE if the given btree is set to safety level 1. In other
  1379. ** words, return TRUE if no sync() occurs on the disk files.
  1380. */
  1381. int sqlite3BtreeSyncDisabled(Btree *p){
  1382. BtShared *pBt = p->pBt;
  1383. int rc;
  1384. assert( sqlite3_mutex_held(p->db->mutex) );
  1385. sqlite3BtreeEnter(p);
  1386. assert( pBt && pBt->pPager );
  1387. rc = sqlite3PagerNosync(pBt->pPager);
  1388. sqlite3BtreeLeave(p);
  1389. return rc;
  1390. }
  1391. #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM)
  1392. /*
  1393. ** Change the default pages size and the number of reserved bytes per page.
  1394. **
  1395. ** The page size must be a power of 2 between 512 and 65536. If the page
  1396. ** size supplied does not meet this constraint then the page size is not
  1397. ** changed.
  1398. **
  1399. ** Page sizes are constrained to be a power of two so that the region
  1400. ** of the database file used for locking (beginning at PENDING_BYTE,
  1401. ** the first byte past the 1GB boundary, 0x40000000) needs to occur
  1402. ** at the beginning of a page.
  1403. **
  1404. ** If parameter nReserve is less than zero, then the number of reserved
  1405. ** bytes per page is left unchanged.
  1406. */
  1407. int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve){
  1408. int rc = SQLITE_OK;
  1409. BtShared *pBt = p->pBt;
  1410. sqlite3BtreeEnter(p);
  1411. if( pBt->pageSizeFixed ){
  1412. sqlite3BtreeLeave(p);
  1413. return SQLITE_READONLY;
  1414. }
  1415. if( nReserve<0 ){
  1416. nReserve = pBt->pageSize - pBt->usableSize;
  1417. }
  1418. if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE &&
  1419. ((pageSize-1)&pageSize)==0 ){
  1420. assert( (pageSize & 7)==0 );
  1421. assert( !pBt->pPage1 && !pBt->pCursor );
  1422. pBt->pageSize = pageSize;
  1423. rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize);
  1424. }
  1425. pBt->usableSize = pBt->pageSize - nReserve;
  1426. sqlite3BtreeLeave(p);
  1427. return rc;
  1428. }
  1429. /*
  1430. ** Return the currently defined page size
  1431. */
  1432. int sqlite3BtreeGetPageSize(Btree *p){
  1433. return p->pBt->pageSize;
  1434. }
  1435. int sqlite3BtreeGetReserve(Btree *p){
  1436. int n;
  1437. sqlite3BtreeEnter(p);
  1438. n = p->pBt->pageSize - p->pBt->usableSize;
  1439. sqlite3BtreeLeave(p);
  1440. return n;
  1441. }
  1442. /*
  1443. ** Set the maximum page count for a database if mxPage is positive.
  1444. ** No changes are made if mxPage is 0 or negative.
  1445. ** Regardless of the value of mxPage, return the maximum page count.
  1446. */
  1447. int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){
  1448. int n;
  1449. sqlite3BtreeEnter(p);
  1450. n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage);
  1451. sqlite3BtreeLeave(p);
  1452. return n;
  1453. }
  1454. #endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */
  1455. /*
  1456. ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
  1457. ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
  1458. ** is disabled. The default value for the auto-vacuum property is
  1459. ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
  1460. */
  1461. int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
  1462. #ifdef SQLITE_OMIT_AUTOVACUUM
  1463. return SQLITE_READONLY;
  1464. #else
  1465. BtShared *pBt = p->pBt;
  1466. int rc = SQLITE_OK;
  1467. int av = (autoVacuum?1:0);
  1468. sqlite3BtreeEnter(p);
  1469. if( pBt->pageSizeFixed && av!=pBt->autoVacuum ){
  1470. rc = SQLITE_READONLY;
  1471. }else{
  1472. pBt->autoVacuum = av;
  1473. }
  1474. sqlite3BtreeLeave(p);
  1475. return rc;
  1476. #endif
  1477. }
  1478. /*
  1479. ** Return the value of the 'auto-vacuum' property. If auto-vacuum is
  1480. ** enabled 1 is returned. Otherwise 0.
  1481. */
  1482. int sqlite3BtreeGetAutoVacuum(Btree *p){
  1483. #ifdef SQLITE_OMIT_AUTOVACUUM
  1484. return BTREE_AUTOVACUUM_NONE;
  1485. #else
  1486. int rc;
  1487. sqlite3BtreeEnter(p);
  1488. rc = (
  1489. (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE:
  1490. (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL:
  1491. BTREE_AUTOVACUUM_INCR
  1492. );
  1493. sqlite3BtreeLeave(p);
  1494. return rc;
  1495. #endif
  1496. }
  1497. /*
  1498. ** Get a reference to pPage1 of the database file. This will
  1499. ** also acquire a readlock on that file.
  1500. **
  1501. ** SQLITE_OK is returned on success. If the file is not a
  1502. ** well-formed database file, then SQLITE_CORRUPT is returned.
  1503. ** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
  1504. ** is returned if we run out of memory.
  1505. */
  1506. static int lockBtree(BtShared *pBt){
  1507. int rc, pageSize;
  1508. MemPage *pPage1;
  1509. assert( sqlite3_mutex_held(pBt->mutex) );
  1510. if( pBt->pPage1 ) return SQLITE_OK;
  1511. rc = sqlite3BtreeGetPage(pBt, 1, &pPage1, 0);
  1512. if( rc!=SQLITE_OK ) return rc;
  1513. /* Do some checking to help insure the file we opened really is
  1514. ** a valid database file.
  1515. */
  1516. rc = SQLITE_NOTADB;
  1517. if( sqlite3PagerPagecount(pBt->pPager)>0 ){
  1518. u8 *page1 = pPage1->aData;
  1519. if( memcmp(page1, zMagicHeader, 16)!=0 ){
  1520. goto page1_init_failed;
  1521. }
  1522. if( page1[18]>1 ){
  1523. pBt->readOnly = 1;
  1524. }
  1525. if( page1[19]>1 ){
  1526. goto page1_init_failed;
  1527. }
  1528. pageSize = get2byte(&page1[16]);
  1529. if( ((pageSize-1)&pageSize)!=0 || pageSize<512 ||
  1530. (SQLITE_MAX_PAGE_SIZE<32768 && pageSize>SQLITE_MAX_PAGE_SIZE)
  1531. ){
  1532. goto page1_init_failed;
  1533. }
  1534. assert( (pageSize & 7)==0 );
  1535. pBt->pageSize = pageSize;
  1536. pBt->usableSize = pageSize - page1[20];
  1537. if( pBt->usableSize<500 ){
  1538. goto page1_init_failed;
  1539. }
  1540. pBt->maxEmbedFrac = page1[21];
  1541. pBt->minEmbedFrac = page1[22];
  1542. pBt->minLeafFrac = page1[23];
  1543. #ifndef SQLITE_OMIT_AUTOVACUUM
  1544. pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0);
  1545. pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0);
  1546. #endif
  1547. }
  1548. /* maxLocal is the maximum amount of payload to store locally for
  1549. ** a cell. Make sure it is small enough so that at least minFanout
  1550. ** cells can will fit on one page. We assume a 10-byte page header.
  1551. ** Besides the payload, the cell must store:
  1552. ** 2-byte pointer to the cell
  1553. ** 4-byte child pointer
  1554. ** 9-byte nKey value
  1555. ** 4-byte nData value
  1556. ** 4-byte overflow page pointer
  1557. ** So a cell consists of a 2-byte poiner, a header which is as much as
  1558. ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
  1559. ** page pointer.
  1560. */
  1561. pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23;
  1562. pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23;
  1563. pBt->maxLeaf = pBt->usableSize - 35;
  1564. pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23;
  1565. if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){
  1566. goto page1_init_failed;
  1567. }
  1568. assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) );
  1569. pBt->pPage1 = pPage1;
  1570. return SQLITE_OK;
  1571. page1_init_failed:
  1572. releasePage(pPage1);
  1573. pBt->pPage1 = 0;
  1574. return rc;
  1575. }
  1576. /*
  1577. ** This routine works like lockBtree() except that it also invokes the
  1578. ** busy callback if there is lock contention.
  1579. */
  1580. static int lockBtreeWithRetry(Btree *pRef){
  1581. int rc = SQLITE_OK;
  1582. assert( sqlite3BtreeHoldsMutex(pRef) );
  1583. if( pRef->inTrans==TRANS_NONE ){
  1584. u8 inTransaction = pRef->pBt->inTransaction;
  1585. btreeIntegrity(pRef);
  1586. rc = sqlite3BtreeBeginTrans(pRef, 0);
  1587. pRef->pBt->inTransaction = inTransaction;
  1588. pRef->inTrans = TRANS_NONE;
  1589. if( rc==SQLITE_OK ){
  1590. pRef->pBt->nTransaction--;
  1591. }
  1592. btreeIntegrity(pRef);
  1593. }
  1594. return rc;
  1595. }
  1596. /*
  1597. ** If there are no outstanding cursors and we are not in the middle
  1598. ** of a transaction but there is a read lock on the database, then
  1599. ** this routine unrefs the first page of the database file which
  1600. ** has the effect of releasing the read lock.
  1601. **
  1602. ** If there are any outstanding cursors, this routine is a no-op.
  1603. **
  1604. ** If there is a transaction in progress, this routine is a no-op.
  1605. */
  1606. static void unlockBtreeIfUnused(BtShared *pBt){
  1607. assert( sqlite3_mutex_held(pBt->mutex) );
  1608. if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){
  1609. if( sqlite3PagerRefcount(pBt->pPager)>=1 ){
  1610. if( pBt->pPage1->aData==0 ){
  1611. MemPage *pPage = pBt->pPage1;
  1612. pPage->aData = sqlite3PagerGetData(pPage->pDbPage);
  1613. pPage->pBt = pBt;
  1614. pPage->pgno = 1;
  1615. }
  1616. releasePage(pBt->pPage1);
  1617. }
  1618. pBt->pPage1 = 0;
  1619. pBt->inStmt = 0;
  1620. }
  1621. }
  1622. /*
  1623. ** Create a new database by initializing the first page of the
  1624. ** file.
  1625. */
  1626. static int newDatabase(BtShared *pBt){
  1627. MemPage *pP1;
  1628. unsigned char *data;
  1629. int rc;
  1630. assert( sqlite3_mutex_held(pBt->mutex) );
  1631. if( sqlite3PagerPagecount(pBt->pPager)>0 ) return SQLITE_OK;
  1632. pP1 = pBt->pPage1;
  1633. assert( pP1!=0 );
  1634. data = pP1->aData;
  1635. rc = sqlite3PagerWrite(pP1->pDbPage);
  1636. if( rc ) return rc;
  1637. memcpy(data, zMagicHeader, sizeof(zMagicHeader));
  1638. assert( sizeof(zMagicHeader)==16 );
  1639. put2byte(&data[16], pBt->pageSize);
  1640. data[18] = 1;
  1641. data[19] = 1;
  1642. data[20] = pBt->pageSize - pBt->usableSize;
  1643. data[21] = pBt->maxEmbedFrac;
  1644. data[22] = pBt->minEmbedFrac;
  1645. data[23] = pBt->minLeafFrac;
  1646. memset(&data[24], 0, 100-24);
  1647. zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA );
  1648. pBt->pageSizeFixed = 1;
  1649. #ifndef SQLITE_OMIT_AUTOVACUUM
  1650. assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 );
  1651. assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 );
  1652. put4byte(&data[36 + 4*4], pBt->autoVacuum);
  1653. put4byte(&data[36 + 7*4], pBt->incrVacuum);
  1654. #endif
  1655. return SQLITE_OK;
  1656. }
  1657. /*
  1658. ** Attempt to start a new transaction. A write-transaction
  1659. ** is started if the second argument is nonzero, otherwise a read-
  1660. ** transaction. If the second argument is 2 or more and exclusive
  1661. ** transaction is started, meaning that no other process is allowed
  1662. ** to access the database. A preexisting transaction may not be
  1663. ** upgraded to exclusive by calling this routine a second time - the
  1664. ** exclusivity flag only works for a new transaction.
  1665. **
  1666. ** A write-transaction must be started before attempting any
  1667. ** changes to the database. None of the following routines
  1668. ** will work unless a transaction is started first:
  1669. **
  1670. ** sqlite3BtreeCreateTable()
  1671. ** sqlite3BtreeCreateIndex()
  1672. ** sqlite3BtreeClearTable()
  1673. ** sqlite3BtreeDropTable()
  1674. ** sqlite3BtreeInsert()
  1675. ** sqlite3BtreeDelete()
  1676. ** sqlite3BtreeUpdateMeta()
  1677. **
  1678. ** If an initial attempt to acquire the lock fails because of lock contention
  1679. ** and the database was previously unlocked, then invoke the busy handler
  1680. ** if there is one. But if there was previously a read-lock, do not
  1681. ** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is
  1682. ** returned when there is already a read-lock in order to avoid a deadlock.
  1683. **
  1684. ** Suppose there are two processes A and B. A has a read lock and B has
  1685. ** a reserved lock. B tries to promote to exclusive but is blocked because
  1686. ** of A's read lock. A tries to promote to reserved but is blocked by B.
  1687. ** One or the other of the two processes must give way or there can be
  1688. ** no progress. By returning SQLITE_BUSY and not invoking the busy callback
  1689. ** when A already has a read lock, we encourage A to give up and let B
  1690. ** proceed.
  1691. */
  1692. int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
  1693. BtShared *pBt = p->pBt;
  1694. int rc = SQLITE_OK;
  1695. sqlite3BtreeEnter(p);
  1696. pBt->db = p->db;
  1697. btreeIntegrity(p);
  1698. /* If the btree is already in a write-transaction, or it
  1699. ** is already in a read-transaction and a read-transaction
  1700. ** is requested, this is a no-op.
  1701. */
  1702. if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
  1703. goto trans_begun;
  1704. }
  1705. /* Write transactions are not possible on a read-only database */
  1706. if( pBt->readOnly && wrflag ){
  1707. rc = SQLITE_READONLY;
  1708. goto trans_begun;
  1709. }
  1710. /* If another database handle has already opened a write transaction
  1711. ** on this shared-btree structure and a second write transaction is
  1712. ** requested, return SQLITE_BUSY.
  1713. */
  1714. if( pBt->inTransaction==TRANS_WRITE && wrflag ){
  1715. rc = SQLITE_BUSY;
  1716. goto trans_begun;
  1717. }
  1718. do {
  1719. if( pBt->pPage1==0 ){
  1720. rc = lockBtree(pBt);
  1721. }
  1722. if( rc==SQLITE_OK && wrflag ){
  1723. if( pBt->readOnly ){
  1724. rc = SQLITE_READONLY;
  1725. }else{
  1726. rc = sqlite3PagerBegin(pBt->pPage1->pDbPage, wrflag>1);
  1727. if( rc==SQLITE_OK ){
  1728. rc = newDatabase(pBt);
  1729. }
  1730. }
  1731. }
  1732. if( rc==SQLITE_OK ){
  1733. if( wrflag ) pBt->inStmt = 0;
  1734. }else{
  1735. unlockBtreeIfUnused(pBt);
  1736. }
  1737. }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
  1738. sqlite3BtreeInvokeBusyHandler(pBt, 0) );
  1739. if( rc==SQLITE_OK ){
  1740. if( p->inTrans==TRANS_NONE ){
  1741. pBt->nTransaction++;
  1742. }
  1743. p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
  1744. if( p->inTrans>pBt->inTransaction ){
  1745. pBt->inTransaction = p->inTrans;
  1746. }
  1747. }
  1748. trans_begun:
  1749. btreeIntegrity(p);
  1750. sqlite3BtreeLeave(p);
  1751. return rc;
  1752. }
  1753. #ifndef SQLITE_OMIT_AUTOVACUUM
  1754. /*
  1755. ** Set the pointer-map entries for all children of page pPage. Also, if
  1756. ** pPage contains cells that point to overflow pages, set the pointer
  1757. ** map entries for the overflow pages as well.
  1758. */
  1759. static int setChildPtrmaps(MemPage *pPage){
  1760. int i; /* Counter variable */
  1761. int nCell; /* Number of cells in page pPage */
  1762. int rc; /* Return code */
  1763. BtShared *pBt = pPage->pBt;
  1764. int isInitOrig = pPage->isInit;
  1765. Pgno pgno = pPage->pgno;
  1766. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  1767. rc = sqlite3BtreeInitPage(pPage, pPage->pParent);
  1768. if( rc!=SQLITE_OK ){
  1769. goto set_child_ptrmaps_out;
  1770. }
  1771. nCell = pPage->nCell;
  1772. for(i=0; i<nCell; i++){
  1773. u8 *pCell = findCell(pPage, i);
  1774. rc = ptrmapPutOvflPtr(pPage, pCell);
  1775. if( rc!=SQLITE_OK ){
  1776. goto set_child_ptrmaps_out;
  1777. }
  1778. if( !pPage->leaf ){
  1779. Pgno childPgno = get4byte(pCell);
  1780. rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
  1781. if( rc!=SQLITE_OK ) goto set_child_ptrmaps_out;
  1782. }
  1783. }
  1784. if( !pPage->leaf ){
  1785. Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
  1786. rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
  1787. }
  1788. set_child_ptrmaps_out:
  1789. pPage->isInit = isInitOrig;
  1790. return rc;
  1791. }
  1792. /*
  1793. ** Somewhere on pPage, which is guarenteed to be a btree page, not an overflow
  1794. ** page, is a pointer to page iFrom. Modify this pointer so that it points to
  1795. ** iTo. Parameter eType describes the type of pointer to be modified, as
  1796. ** follows:
  1797. **
  1798. ** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child
  1799. ** page of pPage.
  1800. **
  1801. ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
  1802. ** page pointed to by one of the cells on pPage.
  1803. **
  1804. ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
  1805. ** overflow page in the list.
  1806. */
  1807. static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
  1808. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  1809. if( eType==PTRMAP_OVERFLOW2 ){
  1810. /* The pointer is always the first 4 bytes of the page in this case. */
  1811. if( get4byte(pPage->aData)!=iFrom ){
  1812. return SQLITE_CORRUPT_BKPT;
  1813. }
  1814. put4byte(pPage->aData, iTo);
  1815. }else{
  1816. int isInitOrig = pPage->isInit;
  1817. int i;
  1818. int nCell;
  1819. sqlite3BtreeInitPage(pPage, 0);
  1820. nCell = pPage->nCell;
  1821. for(i=0; i<nCell; i++){
  1822. u8 *pCell = findCell(pPage, i);
  1823. if( eType==PTRMAP_OVERFLOW1 ){
  1824. CellInfo info;
  1825. sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  1826. if( info.iOverflow ){
  1827. if( iFrom==get4byte(&pCell[info.iOverflow]) ){
  1828. put4byte(&pCell[info.iOverflow], iTo);
  1829. break;
  1830. }
  1831. }
  1832. }else{
  1833. if( get4byte(pCell)==iFrom ){
  1834. put4byte(pCell, iTo);
  1835. break;
  1836. }
  1837. }
  1838. }
  1839. if( i==nCell ){
  1840. if( eType!=PTRMAP_BTREE ||
  1841. get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
  1842. return SQLITE_CORRUPT_BKPT;
  1843. }
  1844. put4byte(&pPage->aData[pPage->hdrOffset+8], iTo);
  1845. }
  1846. pPage->isInit = isInitOrig;
  1847. }
  1848. return SQLITE_OK;
  1849. }
  1850. /*
  1851. ** Move the open database page pDbPage to location iFreePage in the
  1852. ** database. The pDbPage reference remains valid.
  1853. */
  1854. static int relocatePage(
  1855. BtShared *pBt, /* Btree */
  1856. MemPage *pDbPage, /* Open page to move */
  1857. u8 eType, /* Pointer map 'type' entry for pDbPage */
  1858. Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */
  1859. Pgno iFreePage /* The location to move pDbPage to */
  1860. ){
  1861. MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */
  1862. Pgno iDbPage = pDbPage->pgno;
  1863. Pager *pPager = pBt->pPager;
  1864. int rc;
  1865. assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 ||
  1866. eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE );
  1867. assert( sqlite3_mutex_held(pBt->mutex) );
  1868. assert( pDbPage->pBt==pBt );
  1869. /* Move page iDbPage from its current location to page number iFreePage */
  1870. TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n",
  1871. iDbPage, iFreePage, iPtrPage, eType));
  1872. rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage);
  1873. if( rc!=SQLITE_OK ){
  1874. return rc;
  1875. }
  1876. pDbPage->pgno = iFreePage;
  1877. /* If pDbPage was a btree-page, then it may have child pages and/or cells
  1878. ** that point to overflow pages. The pointer map entries for all these
  1879. ** pages need to be changed.
  1880. **
  1881. ** If pDbPage is an overflow page, then the first 4 bytes may store a
  1882. ** pointer to a subsequent overflow page. If this is the case, then
  1883. ** the pointer map needs to be updated for the subsequent overflow page.
  1884. */
  1885. if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){
  1886. rc = setChildPtrmaps(pDbPage);
  1887. if( rc!=SQLITE_OK ){
  1888. return rc;
  1889. }
  1890. }else{
  1891. Pgno nextOvfl = get4byte(pDbPage->aData);
  1892. if( nextOvfl!=0 ){
  1893. rc = ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage);
  1894. if( rc!=SQLITE_OK ){
  1895. return rc;
  1896. }
  1897. }
  1898. }
  1899. /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
  1900. ** that it points at iFreePage. Also fix the pointer map entry for
  1901. ** iPtrPage.
  1902. */
  1903. if( eType!=PTRMAP_ROOTPAGE ){
  1904. rc = sqlite3BtreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
  1905. if( rc!=SQLITE_OK ){
  1906. return rc;
  1907. }
  1908. rc = sqlite3PagerWrite(pPtrPage->pDbPage);
  1909. if( rc!=SQLITE_OK ){
  1910. releasePage(pPtrPage);
  1911. return rc;
  1912. }
  1913. rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
  1914. releasePage(pPtrPage);
  1915. if( rc==SQLITE_OK ){
  1916. rc = ptrmapPut(pBt, iFreePage, eType, iPtrPage);
  1917. }
  1918. }
  1919. return rc;
  1920. }
  1921. /* Forward declaration required by incrVacuumStep(). */
  1922. static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
  1923. /*
  1924. ** Perform a single step of an incremental-vacuum. If successful,
  1925. ** return SQLITE_OK. If there is no work to do (and therefore no
  1926. ** point in calling this function again), return SQLITE_DONE.
  1927. **
  1928. ** More specificly, this function attempts to re-organize the
  1929. ** database so that the last page of the file currently in use
  1930. ** is no longer in use.
  1931. **
  1932. ** If the nFin parameter is non-zero, the implementation assumes
  1933. ** that the caller will keep calling incrVacuumStep() until
  1934. ** it returns SQLITE_DONE or an error, and that nFin is the
  1935. ** number of pages the database file will contain after this
  1936. ** process is complete.
  1937. */
  1938. static int incrVacuumStep(BtShared *pBt, Pgno nFin){
  1939. Pgno iLastPg; /* Last page in the database */
  1940. Pgno nFreeList; /* Number of pages still on the free-list */
  1941. assert( sqlite3_mutex_held(pBt->mutex) );
  1942. iLastPg = pBt->nTrunc;
  1943. if( iLastPg==0 ){
  1944. iLastPg = sqlite3PagerPagecount(pBt->pPager);
  1945. }
  1946. if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
  1947. int rc;
  1948. u8 eType;
  1949. Pgno iPtrPage;
  1950. nFreeList = get4byte(&pBt->pPage1->aData[36]);
  1951. if( nFreeList==0 || nFin==iLastPg ){
  1952. return SQLITE_DONE;
  1953. }
  1954. rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage);
  1955. if( rc!=SQLITE_OK ){
  1956. return rc;
  1957. }
  1958. if( eType==PTRMAP_ROOTPAGE ){
  1959. return SQLITE_CORRUPT_BKPT;
  1960. }
  1961. if( eType==PTRMAP_FREEPAGE ){
  1962. if( nFin==0 ){
  1963. /* Remove the page from the files free-list. This is not required
  1964. ** if nFin is non-zero. In that case, the free-list will be
  1965. ** truncated to zero after this function returns, so it doesn't
  1966. ** matter if it still contains some garbage entries.
  1967. */
  1968. Pgno iFreePg;
  1969. MemPage *pFreePg;
  1970. rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, 1);
  1971. if( rc!=SQLITE_OK ){
  1972. return rc;
  1973. }
  1974. assert( iFreePg==iLastPg );
  1975. releasePage(pFreePg);
  1976. }
  1977. } else {
  1978. Pgno iFreePg; /* Index of free page to move pLastPg to */
  1979. MemPage *pLastPg;
  1980. rc = sqlite3BtreeGetPage(pBt, iLastPg, &pLastPg, 0);
  1981. if( rc!=SQLITE_OK ){
  1982. return rc;
  1983. }
  1984. /* If nFin is zero, this loop runs exactly once and page pLastPg
  1985. ** is swapped with the first free page pulled off the free list.
  1986. **
  1987. ** On the other hand, if nFin is greater than zero, then keep
  1988. ** looping until a free-page located within the first nFin pages
  1989. ** of the file is found.
  1990. */
  1991. do {
  1992. MemPage *pFreePg;
  1993. rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, 0, 0);
  1994. if( rc!=SQLITE_OK ){
  1995. releasePage(pLastPg);
  1996. return rc;
  1997. }
  1998. releasePage(pFreePg);
  1999. }while( nFin!=0 && iFreePg>nFin );
  2000. assert( iFreePg<iLastPg );
  2001. rc = sqlite3PagerWrite(pLastPg->pDbPage);
  2002. if( rc==SQLITE_OK ){
  2003. rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg);
  2004. }
  2005. releasePage(pLastPg);
  2006. if( rc!=SQLITE_OK ){
  2007. return rc;
  2008. }
  2009. }
  2010. }
  2011. pBt->nTrunc = iLastPg - 1;
  2012. while( pBt->nTrunc==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, pBt->nTrunc) ){
  2013. pBt->nTrunc--;
  2014. }
  2015. return SQLITE_OK;
  2016. }
  2017. /*
  2018. ** A write-transaction must be opened before calling this function.
  2019. ** It performs a single unit of work towards an incremental vacuum.
  2020. **
  2021. ** If the incremental vacuum is finished after this function has run,
  2022. ** SQLITE_DONE is returned. If it is not finished, but no error occured,
  2023. ** SQLITE_OK is returned. Otherwise an SQLite error code.
  2024. */
  2025. int sqlite3BtreeIncrVacuum(Btree *p){
  2026. int rc;
  2027. BtShared *pBt = p->pBt;
  2028. sqlite3BtreeEnter(p);
  2029. pBt->db = p->db;
  2030. assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE );
  2031. if( !pBt->autoVacuum ){
  2032. rc = SQLITE_DONE;
  2033. }else{
  2034. invalidateAllOverflowCache(pBt);
  2035. rc = incrVacuumStep(pBt, 0);
  2036. }
  2037. sqlite3BtreeLeave(p);
  2038. return rc;
  2039. }
  2040. /*
  2041. ** This routine is called prior to sqlite3PagerCommit when a transaction
  2042. ** is commited for an auto-vacuum database.
  2043. **
  2044. ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages
  2045. ** the database file should be truncated to during the commit process.
  2046. ** i.e. the database has been reorganized so that only the first *pnTrunc
  2047. ** pages are in use.
  2048. */
  2049. static int autoVacuumCommit(BtShared *pBt, Pgno *pnTrunc){
  2050. int rc = SQLITE_OK;
  2051. Pager *pPager = pBt->pPager;
  2052. #ifndef NDEBUG
  2053. int nRef = sqlite3PagerRefcount(pPager);
  2054. #endif
  2055. assert( sqlite3_mutex_held(pBt->mutex) );
  2056. invalidateAllOverflowCache(pBt);
  2057. assert(pBt->autoVacuum);
  2058. if( !pBt->incrVacuum ){
  2059. Pgno nFin = 0;
  2060. if( pBt->nTrunc==0 ){
  2061. Pgno nFree;
  2062. Pgno nPtrmap;
  2063. const int pgsz = pBt->pageSize;
  2064. Pgno nOrig = sqlite3PagerPagecount(pBt->pPager);
  2065. if( PTRMAP_ISPAGE(pBt, nOrig) ){
  2066. return SQLITE_CORRUPT_BKPT;
  2067. }
  2068. if( nOrig==PENDING_BYTE_PAGE(pBt) ){
  2069. nOrig--;
  2070. }
  2071. nFree = get4byte(&pBt->pPage1->aData[36]);
  2072. nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+pgsz/5)/(pgsz/5);
  2073. nFin = nOrig - nFree - nPtrmap;
  2074. if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){
  2075. nFin--;
  2076. }
  2077. while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
  2078. nFin--;
  2079. }
  2080. }
  2081. while( rc==SQLITE_OK ){
  2082. rc = incrVacuumStep(pBt, nFin);
  2083. }
  2084. if( rc==SQLITE_DONE ){
  2085. assert(nFin==0 || pBt->nTrunc==0 || nFin<=pBt->nTrunc);
  2086. rc = SQLITE_OK;
  2087. if( pBt->nTrunc ){
  2088. rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
  2089. put4byte(&pBt->pPage1->aData[32], 0);
  2090. put4byte(&pBt->pPage1->aData[36], 0);
  2091. pBt->nTrunc = nFin;
  2092. }
  2093. }
  2094. if( rc!=SQLITE_OK ){
  2095. sqlite3PagerRollback(pPager);
  2096. }
  2097. }
  2098. if( rc==SQLITE_OK ){
  2099. *pnTrunc = pBt->nTrunc;
  2100. pBt->nTrunc = 0;
  2101. }
  2102. assert( nRef==sqlite3PagerRefcount(pPager) );
  2103. return rc;
  2104. }
  2105. #endif
  2106. /*
  2107. ** This routine does the first phase of a two-phase commit. This routine
  2108. ** causes a rollback journal to be created (if it does not already exist)
  2109. ** and populated with enough information so that if a power loss occurs
  2110. ** the database can be restored to its original state by playing back
  2111. ** the journal. Then the contents of the journal are flushed out to
  2112. ** the disk. After the journal is safely on oxide, the changes to the
  2113. ** database are written into the database file and flushed to oxide.
  2114. ** At the end of this call, the rollback journal still exists on the
  2115. ** disk and we are still holding all locks, so the transaction has not
  2116. ** committed. See sqlite3BtreeCommit() for the second phase of the
  2117. ** commit process.
  2118. **
  2119. ** This call is a no-op if no write-transaction is currently active on pBt.
  2120. **
  2121. ** Otherwise, sync the database file for the btree pBt. zMaster points to
  2122. ** the name of a master journal file that should be written into the
  2123. ** individual journal file, or is NULL, indicating no master journal file
  2124. ** (single database transaction).
  2125. **
  2126. ** When this is called, the master journal should already have been
  2127. ** created, populated with this journal pointer and synced to disk.
  2128. **
  2129. ** Once this is routine has returned, the only thing required to commit
  2130. ** the write-transaction for this database file is to delete the journal.
  2131. */
  2132. int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){
  2133. int rc = SQLITE_OK;
  2134. if( p->inTrans==TRANS_WRITE ){
  2135. BtShared *pBt = p->pBt;
  2136. Pgno nTrunc = 0;
  2137. sqlite3BtreeEnter(p);
  2138. pBt->db = p->db;
  2139. #ifndef SQLITE_OMIT_AUTOVACUUM
  2140. if( pBt->autoVacuum ){
  2141. rc = autoVacuumCommit(pBt, &nTrunc);
  2142. if( rc!=SQLITE_OK ){
  2143. sqlite3BtreeLeave(p);
  2144. return rc;
  2145. }
  2146. }
  2147. #endif
  2148. rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, nTrunc);
  2149. sqlite3BtreeLeave(p);
  2150. }
  2151. return rc;
  2152. }
  2153. /*
  2154. ** Commit the transaction currently in progress.
  2155. **
  2156. ** This routine implements the second phase of a 2-phase commit. The
  2157. ** sqlite3BtreeSync() routine does the first phase and should be invoked
  2158. ** prior to calling this routine. The sqlite3BtreeSync() routine did
  2159. ** all the work of writing information out to disk and flushing the
  2160. ** contents so that they are written onto the disk platter. All this
  2161. ** routine has to do is delete or truncate the rollback journal
  2162. ** (which causes the transaction to commit) and drop locks.
  2163. **
  2164. ** This will release the write lock on the database file. If there
  2165. ** are no active cursors, it also releases the read lock.
  2166. */
  2167. int sqlite3BtreeCommitPhaseTwo(Btree *p){
  2168. BtShared *pBt = p->pBt;
  2169. sqlite3BtreeEnter(p);
  2170. pBt->db = p->db;
  2171. btreeIntegrity(p);
  2172. /* If the handle has a write-transaction open, commit the shared-btrees
  2173. ** transaction and set the shared state to TRANS_READ.
  2174. */
  2175. if( p->inTrans==TRANS_WRITE ){
  2176. int rc;
  2177. assert( pBt->inTransaction==TRANS_WRITE );
  2178. assert( pBt->nTransaction>0 );
  2179. rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
  2180. if( rc!=SQLITE_OK ){
  2181. sqlite3BtreeLeave(p);
  2182. return rc;
  2183. }
  2184. pBt->inTransaction = TRANS_READ;
  2185. pBt->inStmt = 0;
  2186. }
  2187. unlockAllTables(p);
  2188. /* If the handle has any kind of transaction open, decrement the transaction
  2189. ** count of the shared btree. If the transaction count reaches 0, set
  2190. ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below
  2191. ** will unlock the pager.
  2192. */
  2193. if( p->inTrans!=TRANS_NONE ){
  2194. pBt->nTransaction--;
  2195. if( 0==pBt->nTransaction ){
  2196. pBt->inTransaction = TRANS_NONE;
  2197. }
  2198. }
  2199. /* Set the handles current transaction state to TRANS_NONE and unlock
  2200. ** the pager if this call closed the only read or write transaction.
  2201. */
  2202. p->inTrans = TRANS_NONE;
  2203. unlockBtreeIfUnused(pBt);
  2204. btreeIntegrity(p);
  2205. sqlite3BtreeLeave(p);
  2206. return SQLITE_OK;
  2207. }
  2208. /*
  2209. ** Do both phases of a commit.
  2210. */
  2211. int sqlite3BtreeCommit(Btree *p){
  2212. int rc;
  2213. sqlite3BtreeEnter(p);
  2214. rc = sqlite3BtreeCommitPhaseOne(p, 0);
  2215. if( rc==SQLITE_OK ){
  2216. rc = sqlite3BtreeCommitPhaseTwo(p);
  2217. }
  2218. sqlite3BtreeLeave(p);
  2219. return rc;
  2220. }
  2221. #ifndef NDEBUG
  2222. /*
  2223. ** Return the number of write-cursors open on this handle. This is for use
  2224. ** in assert() expressions, so it is only compiled if NDEBUG is not
  2225. ** defined.
  2226. **
  2227. ** For the purposes of this routine, a write-cursor is any cursor that
  2228. ** is capable of writing to the databse. That means the cursor was
  2229. ** originally opened for writing and the cursor has not be disabled
  2230. ** by having its state changed to CURSOR_FAULT.
  2231. */
  2232. static int countWriteCursors(BtShared *pBt){
  2233. BtCursor *pCur;
  2234. int r = 0;
  2235. for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
  2236. if( pCur->wrFlag && pCur->eState!=CURSOR_FAULT ) r++;
  2237. }
  2238. return r;
  2239. }
  2240. #endif
  2241. /*
  2242. ** This routine sets the state to CURSOR_FAULT and the error
  2243. ** code to errCode for every cursor on BtShared that pBtree
  2244. ** references.
  2245. **
  2246. ** Every cursor is tripped, including cursors that belong
  2247. ** to other database connections that happen to be sharing
  2248. ** the cache with pBtree.
  2249. **
  2250. ** This routine gets called when a rollback occurs.
  2251. ** All cursors using the same cache must be tripped
  2252. ** to prevent them from trying to use the btree after
  2253. ** the rollback. The rollback may have deleted tables
  2254. ** or moved root pages, so it is not sufficient to
  2255. ** save the state of the cursor. The cursor must be
  2256. ** invalidated.
  2257. */
  2258. void sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode){
  2259. BtCursor *p;
  2260. sqlite3BtreeEnter(pBtree);
  2261. for(p=pBtree->pBt->pCursor; p; p=p->pNext){
  2262. clearCursorPosition(p);
  2263. p->eState = CURSOR_FAULT;
  2264. p->skip = errCode;
  2265. }
  2266. sqlite3BtreeLeave(pBtree);
  2267. }
  2268. /*
  2269. ** Rollback the transaction in progress. All cursors will be
  2270. ** invalided by this operation. Any attempt to use a cursor
  2271. ** that was open at the beginning of this operation will result
  2272. ** in an error.
  2273. **
  2274. ** This will release the write lock on the database file. If there
  2275. ** are no active cursors, it also releases the read lock.
  2276. */
  2277. int sqlite3BtreeRollback(Btree *p){
  2278. int rc;
  2279. BtShared *pBt = p->pBt;
  2280. MemPage *pPage1;
  2281. sqlite3BtreeEnter(p);
  2282. pBt->db = p->db;
  2283. rc = saveAllCursors(pBt, 0, 0);
  2284. #ifndef SQLITE_OMIT_SHARED_CACHE
  2285. if( rc!=SQLITE_OK ){
  2286. /* This is a horrible situation. An IO or malloc() error occured whilst
  2287. ** trying to save cursor positions. If this is an automatic rollback (as
  2288. ** the result of a constraint, malloc() failure or IO error) then
  2289. ** the cache may be internally inconsistent (not contain valid trees) so
  2290. ** we cannot simply return the error to the caller. Instead, abort
  2291. ** all queries that may be using any of the cursors that failed to save.
  2292. */
  2293. sqlite3BtreeTripAllCursors(p, rc);
  2294. }
  2295. #endif
  2296. btreeIntegrity(p);
  2297. unlockAllTables(p);
  2298. if( p->inTrans==TRANS_WRITE ){
  2299. int rc2;
  2300. #ifndef SQLITE_OMIT_AUTOVACUUM
  2301. pBt->nTrunc = 0;
  2302. #endif
  2303. assert( TRANS_WRITE==pBt->inTransaction );
  2304. rc2 = sqlite3PagerRollback(pBt->pPager);
  2305. if( rc2!=SQLITE_OK ){
  2306. rc = rc2;
  2307. }
  2308. /* The rollback may have destroyed the pPage1->aData value. So
  2309. ** call sqlite3BtreeGetPage() on page 1 again to make
  2310. ** sure pPage1->aData is set correctly. */
  2311. if( sqlite3BtreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
  2312. releasePage(pPage1);
  2313. }
  2314. assert( countWriteCursors(pBt)==0 );
  2315. pBt->inTransaction = TRANS_READ;
  2316. }
  2317. if( p->inTrans!=TRANS_NONE ){
  2318. assert( pBt->nTransaction>0 );
  2319. pBt->nTransaction--;
  2320. if( 0==pBt->nTransaction ){
  2321. pBt->inTransaction = TRANS_NONE;
  2322. }
  2323. }
  2324. p->inTrans = TRANS_NONE;
  2325. pBt->inStmt = 0;
  2326. unlockBtreeIfUnused(pBt);
  2327. btreeIntegrity(p);
  2328. sqlite3BtreeLeave(p);
  2329. return rc;
  2330. }
  2331. /*
  2332. ** Start a statement subtransaction. The subtransaction can
  2333. ** can be rolled back independently of the main transaction.
  2334. ** You must start a transaction before starting a subtransaction.
  2335. ** The subtransaction is ended automatically if the main transaction
  2336. ** commits or rolls back.
  2337. **
  2338. ** Only one subtransaction may be active at a time. It is an error to try
  2339. ** to start a new subtransaction if another subtransaction is already active.
  2340. **
  2341. ** Statement subtransactions are used around individual SQL statements
  2342. ** that are contained within a BEGIN...COMMIT block. If a constraint
  2343. ** error occurs within the statement, the effect of that one statement
  2344. ** can be rolled back without having to rollback the entire transaction.
  2345. */
  2346. int sqlite3BtreeBeginStmt(Btree *p){
  2347. int rc;
  2348. BtShared *pBt = p->pBt;
  2349. sqlite3BtreeEnter(p);
  2350. pBt->db = p->db;
  2351. if( (p->inTrans!=TRANS_WRITE) || pBt->inStmt ){
  2352. rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
  2353. }else{
  2354. assert( pBt->inTransaction==TRANS_WRITE );
  2355. rc = pBt->readOnly ? SQLITE_OK : sqlite3PagerStmtBegin(pBt->pPager);
  2356. pBt->inStmt = 1;
  2357. }
  2358. sqlite3BtreeLeave(p);
  2359. return rc;
  2360. }
  2361. /*
  2362. ** Commit the statment subtransaction currently in progress. If no
  2363. ** subtransaction is active, this is a no-op.
  2364. */
  2365. int sqlite3BtreeCommitStmt(Btree *p){
  2366. int rc;
  2367. BtShared *pBt = p->pBt;
  2368. sqlite3BtreeEnter(p);
  2369. pBt->db = p->db;
  2370. if( pBt->inStmt && !pBt->readOnly ){
  2371. rc = sqlite3PagerStmtCommit(pBt->pPager);
  2372. }else{
  2373. rc = SQLITE_OK;
  2374. }
  2375. pBt->inStmt = 0;
  2376. sqlite3BtreeLeave(p);
  2377. return rc;
  2378. }
  2379. /*
  2380. ** Rollback the active statement subtransaction. If no subtransaction
  2381. ** is active this routine is a no-op.
  2382. **
  2383. ** All cursors will be invalidated by this operation. Any attempt
  2384. ** to use a cursor that was open at the beginning of this operation
  2385. ** will result in an error.
  2386. */
  2387. int sqlite3BtreeRollbackStmt(Btree *p){
  2388. int rc = SQLITE_OK;
  2389. BtShared *pBt = p->pBt;
  2390. sqlite3BtreeEnter(p);
  2391. pBt->db = p->db;
  2392. if( pBt->inStmt && !pBt->readOnly ){
  2393. rc = sqlite3PagerStmtRollback(pBt->pPager);
  2394. assert( countWriteCursors(pBt)==0 );
  2395. pBt->inStmt = 0;
  2396. }
  2397. sqlite3BtreeLeave(p);
  2398. return rc;
  2399. }
  2400. /*
  2401. ** Default key comparison function to be used if no comparison function
  2402. ** is specified on the sqlite3BtreeCursor() call.
  2403. */
  2404. static int dfltCompare(
  2405. void *NotUsed, /* User data is not used */
  2406. int n1, const void *p1, /* First key to compare */
  2407. int n2, const void *p2 /* Second key to compare */
  2408. ){
  2409. int c;
  2410. c = memcmp(p1, p2, n1<n2 ? n1 : n2);
  2411. if( c==0 ){
  2412. c = n1 - n2;
  2413. }
  2414. return c;
  2415. }
  2416. /*
  2417. ** Create a new cursor for the BTree whose root is on the page
  2418. ** iTable. The act of acquiring a cursor gets a read lock on
  2419. ** the database file.
  2420. **
  2421. ** If wrFlag==0, then the cursor can only be used for reading.
  2422. ** If wrFlag==1, then the cursor can be used for reading or for
  2423. ** writing if other conditions for writing are also met. These
  2424. ** are the conditions that must be met in order for writing to
  2425. ** be allowed:
  2426. **
  2427. ** 1: The cursor must have been opened with wrFlag==1
  2428. **
  2429. ** 2: Other database connections that share the same pager cache
  2430. ** but which are not in the READ_UNCOMMITTED state may not have
  2431. ** cursors open with wrFlag==0 on the same table. Otherwise
  2432. ** the changes made by this write cursor would be visible to
  2433. ** the read cursors in the other database connection.
  2434. **
  2435. ** 3: The database must be writable (not on read-only media)
  2436. **
  2437. ** 4: There must be an active transaction.
  2438. **
  2439. ** No checking is done to make sure that page iTable really is the
  2440. ** root page of a b-tree. If it is not, then the cursor acquired
  2441. ** will not work correctly.
  2442. **
  2443. ** The comparison function must be logically the same for every cursor
  2444. ** on a particular table. Changing the comparison function will result
  2445. ** in incorrect operations. If the comparison function is NULL, a
  2446. ** default comparison function is used. The comparison function is
  2447. ** always ignored for INTKEY tables.
  2448. */
  2449. static int btreeCursor(
  2450. Btree *p, /* The btree */
  2451. int iTable, /* Root page of table to open */
  2452. int wrFlag, /* 1 to write. 0 read-only */
  2453. int (*xCmp)(void*,int,const void*,int,const void*), /* Key Comparison func */
  2454. void *pArg, /* First arg to xCompare() */
  2455. BtCursor **ppCur /* Write new cursor here */
  2456. ){
  2457. int rc;
  2458. BtCursor *pCur;
  2459. BtShared *pBt = p->pBt;
  2460. assert( sqlite3BtreeHoldsMutex(p) );
  2461. *ppCur = 0;
  2462. if( wrFlag ){
  2463. if( pBt->readOnly ){
  2464. return SQLITE_READONLY;
  2465. }
  2466. if( checkReadLocks(p, iTable, 0) ){
  2467. return SQLITE_LOCKED;
  2468. }
  2469. }
  2470. if( pBt->pPage1==0 ){
  2471. rc = lockBtreeWithRetry(p);
  2472. if( rc!=SQLITE_OK ){
  2473. return rc;
  2474. }
  2475. if( pBt->readOnly && wrFlag ){
  2476. return SQLITE_READONLY;
  2477. }
  2478. }
  2479. pCur = sqlite3MallocZero( sizeof(*pCur) );
  2480. if( pCur==0 ){
  2481. rc = SQLITE_NOMEM;
  2482. goto create_cursor_exception;
  2483. }
  2484. pCur->pgnoRoot = (Pgno)iTable;
  2485. if( iTable==1 && sqlite3PagerPagecount(pBt->pPager)==0 ){
  2486. rc = SQLITE_EMPTY;
  2487. goto create_cursor_exception;
  2488. }
  2489. rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->pPage, 0);
  2490. if( rc!=SQLITE_OK ){
  2491. goto create_cursor_exception;
  2492. }
  2493. /* Now that no other errors can occur, finish filling in the BtCursor
  2494. ** variables, link the cursor into the BtShared list and set *ppCur (the
  2495. ** output argument to this function).
  2496. */
  2497. pCur->xCompare = xCmp ? xCmp : dfltCompare;
  2498. pCur->pArg = pArg;
  2499. pCur->pBtree = p;
  2500. pCur->pBt = pBt;
  2501. pCur->wrFlag = wrFlag;
  2502. pCur->pNext = pBt->pCursor;
  2503. if( pCur->pNext ){
  2504. pCur->pNext->pPrev = pCur;
  2505. }
  2506. pBt->pCursor = pCur;
  2507. pCur->eState = CURSOR_INVALID;
  2508. *ppCur = pCur;
  2509. return SQLITE_OK;
  2510. create_cursor_exception:
  2511. if( pCur ){
  2512. releasePage(pCur->pPage);
  2513. sqlite3_free(pCur);
  2514. }
  2515. unlockBtreeIfUnused(pBt);
  2516. return rc;
  2517. }
  2518. int sqlite3BtreeCursor(
  2519. Btree *p, /* The btree */
  2520. int iTable, /* Root page of table to open */
  2521. int wrFlag, /* 1 to write. 0 read-only */
  2522. int (*xCmp)(void*,int,const void*,int,const void*), /* Key Comparison func */
  2523. void *pArg, /* First arg to xCompare() */
  2524. BtCursor **ppCur /* Write new cursor here */
  2525. ){
  2526. int rc;
  2527. sqlite3BtreeEnter(p);
  2528. p->pBt->db = p->db;
  2529. rc = btreeCursor(p, iTable, wrFlag, xCmp, pArg, ppCur);
  2530. sqlite3BtreeLeave(p);
  2531. return rc;
  2532. }
  2533. /*
  2534. ** Close a cursor. The read lock on the database file is released
  2535. ** when the last cursor is closed.
  2536. */
  2537. int sqlite3BtreeCloseCursor(BtCursor *pCur){
  2538. BtShared *pBt = pCur->pBt;
  2539. Btree *pBtree = pCur->pBtree;
  2540. sqlite3BtreeEnter(pBtree);
  2541. pBt->db = pBtree->db;
  2542. clearCursorPosition(pCur);
  2543. if( pCur->pPrev ){
  2544. pCur->pPrev->pNext = pCur->pNext;
  2545. }else{
  2546. pBt->pCursor = pCur->pNext;
  2547. }
  2548. if( pCur->pNext ){
  2549. pCur->pNext->pPrev = pCur->pPrev;
  2550. }
  2551. releasePage(pCur->pPage);
  2552. unlockBtreeIfUnused(pBt);
  2553. invalidateOverflowCache(pCur);
  2554. sqlite3_free(pCur);
  2555. sqlite3BtreeLeave(pBtree);
  2556. return SQLITE_OK;
  2557. }
  2558. /*
  2559. ** Make a temporary cursor by filling in the fields of pTempCur.
  2560. ** The temporary cursor is not on the cursor list for the Btree.
  2561. */
  2562. void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur){
  2563. assert( cursorHoldsMutex(pCur) );
  2564. memcpy(pTempCur, pCur, sizeof(*pCur));
  2565. pTempCur->pNext = 0;
  2566. pTempCur->pPrev = 0;
  2567. if( pTempCur->pPage ){
  2568. sqlite3PagerRef(pTempCur->pPage->pDbPage);
  2569. }
  2570. }
  2571. /*
  2572. ** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
  2573. ** function above.
  2574. */
  2575. void sqlite3BtreeReleaseTempCursor(BtCursor *pCur){
  2576. assert( cursorHoldsMutex(pCur) );
  2577. if( pCur->pPage ){
  2578. sqlite3PagerUnref(pCur->pPage->pDbPage);
  2579. }
  2580. }
  2581. /*
  2582. ** Make sure the BtCursor* given in the argument has a valid
  2583. ** BtCursor.info structure. If it is not already valid, call
  2584. ** sqlite3BtreeParseCell() to fill it in.
  2585. **
  2586. ** BtCursor.info is a cache of the information in the current cell.
  2587. ** Using this cache reduces the number of calls to sqlite3BtreeParseCell().
  2588. **
  2589. ** 2007-06-25: There is a bug in some versions of MSVC that cause the
  2590. ** compiler to crash when getCellInfo() is implemented as a macro.
  2591. ** But there is a measureable speed advantage to using the macro on gcc
  2592. ** (when less compiler optimizations like -Os or -O0 are used and the
  2593. ** compiler is not doing agressive inlining.) So we use a real function
  2594. ** for MSVC and a macro for everything else. Ticket #2457.
  2595. */
  2596. #ifndef NDEBUG
  2597. static void assertCellInfo(BtCursor *pCur){
  2598. CellInfo info;
  2599. memset(&info, 0, sizeof(info));
  2600. sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &info);
  2601. assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
  2602. }
  2603. #else
  2604. #define assertCellInfo(x)
  2605. #endif
  2606. #ifdef _MSC_VER
  2607. /* Use a real function in MSVC to work around bugs in that compiler. */
  2608. static void getCellInfo(BtCursor *pCur){
  2609. if( pCur->info.nSize==0 ){
  2610. sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info);
  2611. }else{
  2612. assertCellInfo(pCur);
  2613. }
  2614. }
  2615. #else /* if not _MSC_VER */
  2616. /* Use a macro in all other compilers so that the function is inlined */
  2617. #define getCellInfo(pCur) \
  2618. if( pCur->info.nSize==0 ){ \
  2619. sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info); \
  2620. }else{ \
  2621. assertCellInfo(pCur); \
  2622. }
  2623. #endif /* _MSC_VER */
  2624. /*
  2625. ** Set *pSize to the size of the buffer needed to hold the value of
  2626. ** the key for the current entry. If the cursor is not pointing
  2627. ** to a valid entry, *pSize is set to 0.
  2628. **
  2629. ** For a table with the INTKEY flag set, this routine returns the key
  2630. ** itself, not the number of bytes in the key.
  2631. */
  2632. int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
  2633. int rc;
  2634. assert( cursorHoldsMutex(pCur) );
  2635. rc = restoreOrClearCursorPosition(pCur);
  2636. if( rc==SQLITE_OK ){
  2637. assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
  2638. if( pCur->eState==CURSOR_INVALID ){
  2639. *pSize = 0;
  2640. }else{
  2641. getCellInfo(pCur);
  2642. *pSize = pCur->info.nKey;
  2643. }
  2644. }
  2645. return rc;
  2646. }
  2647. /*
  2648. ** Set *pSize to the number of bytes of data in the entry the
  2649. ** cursor currently points to. Always return SQLITE_OK.
  2650. ** Failure is not possible. If the cursor is not currently
  2651. ** pointing to an entry (which can happen, for example, if
  2652. ** the database is empty) then *pSize is set to 0.
  2653. */
  2654. int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
  2655. int rc;
  2656. assert( cursorHoldsMutex(pCur) );
  2657. rc = restoreOrClearCursorPosition(pCur);
  2658. if( rc==SQLITE_OK ){
  2659. assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
  2660. if( pCur->eState==CURSOR_INVALID ){
  2661. /* Not pointing at a valid entry - set *pSize to 0. */
  2662. *pSize = 0;
  2663. }else{
  2664. getCellInfo(pCur);
  2665. *pSize = pCur->info.nData;
  2666. }
  2667. }
  2668. return rc;
  2669. }
  2670. /*
  2671. ** Given the page number of an overflow page in the database (parameter
  2672. ** ovfl), this function finds the page number of the next page in the
  2673. ** linked list of overflow pages. If possible, it uses the auto-vacuum
  2674. ** pointer-map data instead of reading the content of page ovfl to do so.
  2675. **
  2676. ** If an error occurs an SQLite error code is returned. Otherwise:
  2677. **
  2678. ** Unless pPgnoNext is NULL, the page number of the next overflow
  2679. ** page in the linked list is written to *pPgnoNext. If page ovfl
  2680. ** is the last page in its linked list, *pPgnoNext is set to zero.
  2681. **
  2682. ** If ppPage is not NULL, *ppPage is set to the MemPage* handle
  2683. ** for page ovfl. The underlying pager page may have been requested
  2684. ** with the noContent flag set, so the page data accessable via
  2685. ** this handle may not be trusted.
  2686. */
  2687. static int getOverflowPage(
  2688. BtShared *pBt,
  2689. Pgno ovfl, /* Overflow page */
  2690. MemPage **ppPage, /* OUT: MemPage handle */
  2691. Pgno *pPgnoNext /* OUT: Next overflow page number */
  2692. ){
  2693. Pgno next = 0;
  2694. int rc;
  2695. assert( sqlite3_mutex_held(pBt->mutex) );
  2696. /* One of these must not be NULL. Otherwise, why call this function? */
  2697. assert(ppPage || pPgnoNext);
  2698. /* If pPgnoNext is NULL, then this function is being called to obtain
  2699. ** a MemPage* reference only. No page-data is required in this case.
  2700. */
  2701. if( !pPgnoNext ){
  2702. return sqlite3BtreeGetPage(pBt, ovfl, ppPage, 1);
  2703. }
  2704. #ifndef SQLITE_OMIT_AUTOVACUUM
  2705. /* Try to find the next page in the overflow list using the
  2706. ** autovacuum pointer-map pages. Guess that the next page in
  2707. ** the overflow list is page number (ovfl+1). If that guess turns
  2708. ** out to be wrong, fall back to loading the data of page
  2709. ** number ovfl to determine the next page number.
  2710. */
  2711. if( pBt->autoVacuum ){
  2712. Pgno pgno;
  2713. Pgno iGuess = ovfl+1;
  2714. u8 eType;
  2715. while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){
  2716. iGuess++;
  2717. }
  2718. if( iGuess<=sqlite3PagerPagecount(pBt->pPager) ){
  2719. rc = ptrmapGet(pBt, iGuess, &eType, &pgno);
  2720. if( rc!=SQLITE_OK ){
  2721. return rc;
  2722. }
  2723. if( eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){
  2724. next = iGuess;
  2725. }
  2726. }
  2727. }
  2728. #endif
  2729. if( next==0 || ppPage ){
  2730. MemPage *pPage = 0;
  2731. rc = sqlite3BtreeGetPage(pBt, ovfl, &pPage, next!=0);
  2732. assert(rc==SQLITE_OK || pPage==0);
  2733. if( next==0 && rc==SQLITE_OK ){
  2734. next = get4byte(pPage->aData);
  2735. }
  2736. if( ppPage ){
  2737. *ppPage = pPage;
  2738. }else{
  2739. releasePage(pPage);
  2740. }
  2741. }
  2742. *pPgnoNext = next;
  2743. return rc;
  2744. }
  2745. /*
  2746. ** Copy data from a buffer to a page, or from a page to a buffer.
  2747. **
  2748. ** pPayload is a pointer to data stored on database page pDbPage.
  2749. ** If argument eOp is false, then nByte bytes of data are copied
  2750. ** from pPayload to the buffer pointed at by pBuf. If eOp is true,
  2751. ** then sqlite3PagerWrite() is called on pDbPage and nByte bytes
  2752. ** of data are copied from the buffer pBuf to pPayload.
  2753. **
  2754. ** SQLITE_OK is returned on success, otherwise an error code.
  2755. */
  2756. static int copyPayload(
  2757. void *pPayload, /* Pointer to page data */
  2758. void *pBuf, /* Pointer to buffer */
  2759. int nByte, /* Number of bytes to copy */
  2760. int eOp, /* 0 -> copy from page, 1 -> copy to page */
  2761. DbPage *pDbPage /* Page containing pPayload */
  2762. ){
  2763. if( eOp ){
  2764. /* Copy data from buffer to page (a write operation) */
  2765. int rc = sqlite3PagerWrite(pDbPage);
  2766. if( rc!=SQLITE_OK ){
  2767. return rc;
  2768. }
  2769. memcpy(pPayload, pBuf, nByte);
  2770. }else{
  2771. /* Copy data from page to buffer (a read operation) */
  2772. memcpy(pBuf, pPayload, nByte);
  2773. }
  2774. return SQLITE_OK;
  2775. }
  2776. /*
  2777. ** This function is used to read or overwrite payload information
  2778. ** for the entry that the pCur cursor is pointing to. If the eOp
  2779. ** parameter is 0, this is a read operation (data copied into
  2780. ** buffer pBuf). If it is non-zero, a write (data copied from
  2781. ** buffer pBuf).
  2782. **
  2783. ** A total of "amt" bytes are read or written beginning at "offset".
  2784. ** Data is read to or from the buffer pBuf.
  2785. **
  2786. ** This routine does not make a distinction between key and data.
  2787. ** It just reads or writes bytes from the payload area. Data might
  2788. ** appear on the main page or be scattered out on multiple overflow
  2789. ** pages.
  2790. **
  2791. ** If the BtCursor.isIncrblobHandle flag is set, and the current
  2792. ** cursor entry uses one or more overflow pages, this function
  2793. ** allocates space for and lazily popluates the overflow page-list
  2794. ** cache array (BtCursor.aOverflow). Subsequent calls use this
  2795. ** cache to make seeking to the supplied offset more efficient.
  2796. **
  2797. ** Once an overflow page-list cache has been allocated, it may be
  2798. ** invalidated if some other cursor writes to the same table, or if
  2799. ** the cursor is moved to a different row. Additionally, in auto-vacuum
  2800. ** mode, the following events may invalidate an overflow page-list cache.
  2801. **
  2802. ** * An incremental vacuum,
  2803. ** * A commit in auto_vacuum="full" mode,
  2804. ** * Creating a table (may require moving an overflow page).
  2805. */
  2806. static int accessPayload(
  2807. BtCursor *pCur, /* Cursor pointing to entry to read from */
  2808. int offset, /* Begin reading this far into payload */
  2809. int amt, /* Read this many bytes */
  2810. unsigned char *pBuf, /* Write the bytes into this buffer */
  2811. int skipKey, /* offset begins at data if this is true */
  2812. int eOp /* zero to read. non-zero to write. */
  2813. ){
  2814. unsigned char *aPayload;
  2815. int rc = SQLITE_OK;
  2816. u32 nKey;
  2817. int iIdx = 0;
  2818. MemPage *pPage = pCur->pPage; /* Btree page of current cursor entry */
  2819. BtShared *pBt; /* Btree this cursor belongs to */
  2820. assert( pPage );
  2821. assert( pCur->eState==CURSOR_VALID );
  2822. assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
  2823. assert( offset>=0 );
  2824. assert( cursorHoldsMutex(pCur) );
  2825. getCellInfo(pCur);
  2826. aPayload = pCur->info.pCell + pCur->info.nHeader;
  2827. nKey = (pPage->intKey ? 0 : pCur->info.nKey);
  2828. if( skipKey ){
  2829. offset += nKey;
  2830. }
  2831. if( offset+amt > nKey+pCur->info.nData ){
  2832. /* Trying to read or write past the end of the data is an error */
  2833. return SQLITE_ERROR;
  2834. }
  2835. /* Check if data must be read/written to/from the btree page itself. */
  2836. if( offset<pCur->info.nLocal ){
  2837. int a = amt;
  2838. if( a+offset>pCur->info.nLocal ){
  2839. a = pCur->info.nLocal - offset;
  2840. }
  2841. rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage);
  2842. offset = 0;
  2843. pBuf += a;
  2844. amt -= a;
  2845. }else{
  2846. offset -= pCur->info.nLocal;
  2847. }
  2848. pBt = pCur->pBt;
  2849. if( rc==SQLITE_OK && amt>0 ){
  2850. const int ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */
  2851. Pgno nextPage;
  2852. nextPage = get4byte(&aPayload[pCur->info.nLocal]);
  2853. #ifndef SQLITE_OMIT_INCRBLOB
  2854. /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[]
  2855. ** has not been allocated, allocate it now. The array is sized at
  2856. ** one entry for each overflow page in the overflow chain. The
  2857. ** page number of the first overflow page is stored in aOverflow[0],
  2858. ** etc. A value of 0 in the aOverflow[] array means "not yet known"
  2859. ** (the cache is lazily populated).
  2860. */
  2861. if( pCur->isIncrblobHandle && !pCur->aOverflow ){
  2862. int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize;
  2863. pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl);
  2864. if( nOvfl && !pCur->aOverflow ){
  2865. rc = SQLITE_NOMEM;
  2866. }
  2867. }
  2868. /* If the overflow page-list cache has been allocated and the
  2869. ** entry for the first required overflow page is valid, skip
  2870. ** directly to it.
  2871. */
  2872. if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){
  2873. iIdx = (offset/ovflSize);
  2874. nextPage = pCur->aOverflow[iIdx];
  2875. offset = (offset%ovflSize);
  2876. }
  2877. #endif
  2878. for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){
  2879. #ifndef SQLITE_OMIT_INCRBLOB
  2880. /* If required, populate the overflow page-list cache. */
  2881. if( pCur->aOverflow ){
  2882. assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage);
  2883. pCur->aOverflow[iIdx] = nextPage;
  2884. }
  2885. #endif
  2886. if( offset>=ovflSize ){
  2887. /* The only reason to read this page is to obtain the page
  2888. ** number for the next page in the overflow chain. The page
  2889. ** data is not required. So first try to lookup the overflow
  2890. ** page-list cache, if any, then fall back to the getOverflowPage()
  2891. ** function.
  2892. */
  2893. #ifndef SQLITE_OMIT_INCRBLOB
  2894. if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){
  2895. nextPage = pCur->aOverflow[iIdx+1];
  2896. } else
  2897. #endif
  2898. rc = getOverflowPage(pBt, nextPage, 0, &nextPage);
  2899. offset -= ovflSize;
  2900. }else{
  2901. /* Need to read this page properly. It contains some of the
  2902. ** range of data that is being read (eOp==0) or written (eOp!=0).
  2903. */
  2904. DbPage *pDbPage;
  2905. int a = amt;
  2906. rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
  2907. if( rc==SQLITE_OK ){
  2908. aPayload = sqlite3PagerGetData(pDbPage);
  2909. nextPage = get4byte(aPayload);
  2910. if( a + offset > ovflSize ){
  2911. a = ovflSize - offset;
  2912. }
  2913. rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
  2914. sqlite3PagerUnref(pDbPage);
  2915. offset = 0;
  2916. amt -= a;
  2917. pBuf += a;
  2918. }
  2919. }
  2920. }
  2921. }
  2922. if( rc==SQLITE_OK && amt>0 ){
  2923. return SQLITE_CORRUPT_BKPT;
  2924. }
  2925. return rc;
  2926. }
  2927. /*
  2928. ** Read part of the key associated with cursor pCur. Exactly
  2929. ** "amt" bytes will be transfered into pBuf[]. The transfer
  2930. ** begins at "offset".
  2931. **
  2932. ** Return SQLITE_OK on success or an error code if anything goes
  2933. ** wrong. An error is returned if "offset+amt" is larger than
  2934. ** the available payload.
  2935. */
  2936. int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
  2937. int rc;
  2938. assert( cursorHoldsMutex(pCur) );
  2939. rc = restoreOrClearCursorPosition(pCur);
  2940. if( rc==SQLITE_OK ){
  2941. assert( pCur->eState==CURSOR_VALID );
  2942. assert( pCur->pPage!=0 );
  2943. if( pCur->pPage->intKey ){
  2944. return SQLITE_CORRUPT_BKPT;
  2945. }
  2946. assert( pCur->pPage->intKey==0 );
  2947. assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
  2948. rc = accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0, 0);
  2949. }
  2950. return rc;
  2951. }
  2952. /*
  2953. ** Read part of the data associated with cursor pCur. Exactly
  2954. ** "amt" bytes will be transfered into pBuf[]. The transfer
  2955. ** begins at "offset".
  2956. **
  2957. ** Return SQLITE_OK on success or an error code if anything goes
  2958. ** wrong. An error is returned if "offset+amt" is larger than
  2959. ** the available payload.
  2960. */
  2961. int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
  2962. int rc;
  2963. assert( cursorHoldsMutex(pCur) );
  2964. rc = restoreOrClearCursorPosition(pCur);
  2965. if( rc==SQLITE_OK ){
  2966. assert( pCur->eState==CURSOR_VALID );
  2967. assert( pCur->pPage!=0 );
  2968. assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
  2969. rc = accessPayload(pCur, offset, amt, pBuf, 1, 0);
  2970. }
  2971. return rc;
  2972. }
  2973. /*
  2974. ** Return a pointer to payload information from the entry that the
  2975. ** pCur cursor is pointing to. The pointer is to the beginning of
  2976. ** the key if skipKey==0 and it points to the beginning of data if
  2977. ** skipKey==1. The number of bytes of available key/data is written
  2978. ** into *pAmt. If *pAmt==0, then the value returned will not be
  2979. ** a valid pointer.
  2980. **
  2981. ** This routine is an optimization. It is common for the entire key
  2982. ** and data to fit on the local page and for there to be no overflow
  2983. ** pages. When that is so, this routine can be used to access the
  2984. ** key and data without making a copy. If the key and/or data spills
  2985. ** onto overflow pages, then accessPayload() must be used to reassembly
  2986. ** the key/data and copy it into a preallocated buffer.
  2987. **
  2988. ** The pointer returned by this routine looks directly into the cached
  2989. ** page of the database. The data might change or move the next time
  2990. ** any btree routine is called.
  2991. */
  2992. static const unsigned char *fetchPayload(
  2993. BtCursor *pCur, /* Cursor pointing to entry to read from */
  2994. int *pAmt, /* Write the number of available bytes here */
  2995. int skipKey /* read beginning at data if this is true */
  2996. ){
  2997. unsigned char *aPayload;
  2998. MemPage *pPage;
  2999. u32 nKey;
  3000. int nLocal;
  3001. assert( pCur!=0 && pCur->pPage!=0 );
  3002. assert( pCur->eState==CURSOR_VALID );
  3003. assert( cursorHoldsMutex(pCur) );
  3004. pPage = pCur->pPage;
  3005. assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
  3006. getCellInfo(pCur);
  3007. aPayload = pCur->info.pCell;
  3008. aPayload += pCur->info.nHeader;
  3009. if( pPage->intKey ){
  3010. nKey = 0;
  3011. }else{
  3012. nKey = pCur->info.nKey;
  3013. }
  3014. if( skipKey ){
  3015. aPayload += nKey;
  3016. nLocal = pCur->info.nLocal - nKey;
  3017. }else{
  3018. nLocal = pCur->info.nLocal;
  3019. if( nLocal>nKey ){
  3020. nLocal = nKey;
  3021. }
  3022. }
  3023. *pAmt = nLocal;
  3024. return aPayload;
  3025. }
  3026. /*
  3027. ** For the entry that cursor pCur is point to, return as
  3028. ** many bytes of the key or data as are available on the local
  3029. ** b-tree page. Write the number of available bytes into *pAmt.
  3030. **
  3031. ** The pointer returned is ephemeral. The key/data may move
  3032. ** or be destroyed on the next call to any Btree routine,
  3033. ** including calls from other threads against the same cache.
  3034. ** Hence, a mutex on the BtShared should be held prior to calling
  3035. ** this routine.
  3036. **
  3037. ** These routines is used to get quick access to key and data
  3038. ** in the common case where no overflow pages are used.
  3039. */
  3040. const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){
  3041. assert( cursorHoldsMutex(pCur) );
  3042. if( pCur->eState==CURSOR_VALID ){
  3043. return (const void*)fetchPayload(pCur, pAmt, 0);
  3044. }
  3045. return 0;
  3046. }
  3047. const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){
  3048. assert( cursorHoldsMutex(pCur) );
  3049. if( pCur->eState==CURSOR_VALID ){
  3050. return (const void*)fetchPayload(pCur, pAmt, 1);
  3051. }
  3052. return 0;
  3053. }
  3054. /*
  3055. ** Move the cursor down to a new child page. The newPgno argument is the
  3056. ** page number of the child page to move to.
  3057. */
  3058. static int moveToChild(BtCursor *pCur, u32 newPgno){
  3059. int rc;
  3060. MemPage *pNewPage;
  3061. MemPage *pOldPage;
  3062. BtShared *pBt = pCur->pBt;
  3063. assert( cursorHoldsMutex(pCur) );
  3064. assert( pCur->eState==CURSOR_VALID );
  3065. rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage);
  3066. if( rc ) return rc;
  3067. pNewPage->idxParent = pCur->idx;
  3068. pOldPage = pCur->pPage;
  3069. pOldPage->idxShift = 0;
  3070. releasePage(pOldPage);
  3071. pCur->pPage = pNewPage;
  3072. pCur->idx = 0;
  3073. pCur->info.nSize = 0;
  3074. if( pNewPage->nCell<1 ){
  3075. return SQLITE_CORRUPT_BKPT;
  3076. }
  3077. return SQLITE_OK;
  3078. }
  3079. /*
  3080. ** Return true if the page is the virtual root of its table.
  3081. **
  3082. ** The virtual root page is the root page for most tables. But
  3083. ** for the table rooted on page 1, sometime the real root page
  3084. ** is empty except for the right-pointer. In such cases the
  3085. ** virtual root page is the page that the right-pointer of page
  3086. ** 1 is pointing to.
  3087. */
  3088. int sqlite3BtreeIsRootPage(MemPage *pPage){
  3089. MemPage *pParent;
  3090. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  3091. pParent = pPage->pParent;
  3092. if( pParent==0 ) return 1;
  3093. if( pParent->pgno>1 ) return 0;
  3094. if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1;
  3095. return 0;
  3096. }
  3097. /*
  3098. ** Move the cursor up to the parent page.
  3099. **
  3100. ** pCur->idx is set to the cell index that contains the pointer
  3101. ** to the page we are coming from. If we are coming from the
  3102. ** right-most child page then pCur->idx is set to one more than
  3103. ** the largest cell index.
  3104. */
  3105. void sqlite3BtreeMoveToParent(BtCursor *pCur){
  3106. MemPage *pParent;
  3107. MemPage *pPage;
  3108. int idxParent;
  3109. assert( cursorHoldsMutex(pCur) );
  3110. assert( pCur->eState==CURSOR_VALID );
  3111. pPage = pCur->pPage;
  3112. assert( pPage!=0 );
  3113. assert( !sqlite3BtreeIsRootPage(pPage) );
  3114. pParent = pPage->pParent;
  3115. assert( pParent!=0 );
  3116. idxParent = pPage->idxParent;
  3117. sqlite3PagerRef(pParent->pDbPage);
  3118. releasePage(pPage);
  3119. pCur->pPage = pParent;
  3120. pCur->info.nSize = 0;
  3121. assert( pParent->idxShift==0 );
  3122. pCur->idx = idxParent;
  3123. }
  3124. /*
  3125. ** Move the cursor to the root page
  3126. */
  3127. static int moveToRoot(BtCursor *pCur){
  3128. MemPage *pRoot;
  3129. int rc = SQLITE_OK;
  3130. Btree *p = pCur->pBtree;
  3131. BtShared *pBt = p->pBt;
  3132. assert( cursorHoldsMutex(pCur) );
  3133. assert( CURSOR_INVALID < CURSOR_REQUIRESEEK );
  3134. assert( CURSOR_VALID < CURSOR_REQUIRESEEK );
  3135. assert( CURSOR_FAULT > CURSOR_REQUIRESEEK );
  3136. if( pCur->eState>=CURSOR_REQUIRESEEK ){
  3137. if( pCur->eState==CURSOR_FAULT ){
  3138. return pCur->skip;
  3139. }
  3140. clearCursorPosition(pCur);
  3141. }
  3142. pRoot = pCur->pPage;
  3143. if( pRoot && pRoot->pgno==pCur->pgnoRoot ){
  3144. assert( pRoot->isInit );
  3145. }else{
  3146. if(
  3147. SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
  3148. ){
  3149. pCur->eState = CURSOR_INVALID;
  3150. return rc;
  3151. }
  3152. releasePage(pCur->pPage);
  3153. pCur->pPage = pRoot;
  3154. }
  3155. pCur->idx = 0;
  3156. pCur->info.nSize = 0;
  3157. if( pRoot->nCell==0 && !pRoot->leaf ){
  3158. Pgno subpage;
  3159. assert( pRoot->pgno==1 );
  3160. subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
  3161. assert( subpage>0 );
  3162. pCur->eState = CURSOR_VALID;
  3163. rc = moveToChild(pCur, subpage);
  3164. }
  3165. pCur->eState = ((pCur->pPage->nCell>0)?CURSOR_VALID:CURSOR_INVALID);
  3166. return rc;
  3167. }
  3168. /*
  3169. ** Move the cursor down to the left-most leaf entry beneath the
  3170. ** entry to which it is currently pointing.
  3171. **
  3172. ** The left-most leaf is the one with the smallest key - the first
  3173. ** in ascending order.
  3174. */
  3175. static int moveToLeftmost(BtCursor *pCur){
  3176. Pgno pgno;
  3177. int rc = SQLITE_OK;
  3178. MemPage *pPage;
  3179. assert( cursorHoldsMutex(pCur) );
  3180. assert( pCur->eState==CURSOR_VALID );
  3181. while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
  3182. assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
  3183. pgno = get4byte(findCell(pPage, pCur->idx));
  3184. rc = moveToChild(pCur, pgno);
  3185. }
  3186. return rc;
  3187. }
  3188. /*
  3189. ** Move the cursor down to the right-most leaf entry beneath the
  3190. ** page to which it is currently pointing. Notice the difference
  3191. ** between moveToLeftmost() and moveToRightmost(). moveToLeftmost()
  3192. ** finds the left-most entry beneath the *entry* whereas moveToRightmost()
  3193. ** finds the right-most entry beneath the *page*.
  3194. **
  3195. ** The right-most entry is the one with the largest key - the last
  3196. ** key in ascending order.
  3197. */
  3198. static int moveToRightmost(BtCursor *pCur){
  3199. Pgno pgno;
  3200. int rc = SQLITE_OK;
  3201. MemPage *pPage;
  3202. assert( cursorHoldsMutex(pCur) );
  3203. assert( pCur->eState==CURSOR_VALID );
  3204. while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
  3205. pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
  3206. pCur->idx = pPage->nCell;
  3207. rc = moveToChild(pCur, pgno);
  3208. }
  3209. if( rc==SQLITE_OK ){
  3210. pCur->idx = pPage->nCell - 1;
  3211. pCur->info.nSize = 0;
  3212. }
  3213. return SQLITE_OK;
  3214. }
  3215. /* Move the cursor to the first entry in the table. Return SQLITE_OK
  3216. ** on success. Set *pRes to 0 if the cursor actually points to something
  3217. ** or set *pRes to 1 if the table is empty.
  3218. */
  3219. int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
  3220. int rc;
  3221. assert( cursorHoldsMutex(pCur) );
  3222. assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  3223. rc = moveToRoot(pCur);
  3224. if( rc==SQLITE_OK ){
  3225. if( pCur->eState==CURSOR_INVALID ){
  3226. assert( pCur->pPage->nCell==0 );
  3227. *pRes = 1;
  3228. rc = SQLITE_OK;
  3229. }else{
  3230. assert( pCur->pPage->nCell>0 );
  3231. *pRes = 0;
  3232. rc = moveToLeftmost(pCur);
  3233. }
  3234. }
  3235. return rc;
  3236. }
  3237. /* Move the cursor to the last entry in the table. Return SQLITE_OK
  3238. ** on success. Set *pRes to 0 if the cursor actually points to something
  3239. ** or set *pRes to 1 if the table is empty.
  3240. */
  3241. int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
  3242. int rc;
  3243. assert( cursorHoldsMutex(pCur) );
  3244. assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  3245. rc = moveToRoot(pCur);
  3246. if( rc==SQLITE_OK ){
  3247. if( CURSOR_INVALID==pCur->eState ){
  3248. assert( pCur->pPage->nCell==0 );
  3249. *pRes = 1;
  3250. }else{
  3251. assert( pCur->eState==CURSOR_VALID );
  3252. *pRes = 0;
  3253. rc = moveToRightmost(pCur);
  3254. }
  3255. }
  3256. return rc;
  3257. }
  3258. /* Move the cursor so that it points to an entry near pKey/nKey.
  3259. ** Return a success code.
  3260. **
  3261. ** For INTKEY tables, only the nKey parameter is used. pKey is
  3262. ** ignored. For other tables, nKey is the number of bytes of data
  3263. ** in pKey. The comparison function specified when the cursor was
  3264. ** created is used to compare keys.
  3265. **
  3266. ** If an exact match is not found, then the cursor is always
  3267. ** left pointing at a leaf page which would hold the entry if it
  3268. ** were present. The cursor might point to an entry that comes
  3269. ** before or after the key.
  3270. **
  3271. ** The result of comparing the key with the entry to which the
  3272. ** cursor is written to *pRes if pRes!=NULL. The meaning of
  3273. ** this value is as follows:
  3274. **
  3275. ** *pRes<0 The cursor is left pointing at an entry that
  3276. ** is smaller than pKey or if the table is empty
  3277. ** and the cursor is therefore left point to nothing.
  3278. **
  3279. ** *pRes==0 The cursor is left pointing at an entry that
  3280. ** exactly matches pKey.
  3281. **
  3282. ** *pRes>0 The cursor is left pointing at an entry that
  3283. ** is larger than pKey.
  3284. **
  3285. */
  3286. int sqlite3BtreeMoveto(
  3287. BtCursor *pCur, /* The cursor to be moved */
  3288. const void *pKey, /* The key content for indices. Not used by tables */
  3289. i64 nKey, /* Size of pKey. Or the key for tables */
  3290. int biasRight, /* If true, bias the search to the high end */
  3291. int *pRes /* Search result flag */
  3292. ){
  3293. int rc;
  3294. assert( cursorHoldsMutex(pCur) );
  3295. assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  3296. rc = moveToRoot(pCur);
  3297. if( rc ){
  3298. return rc;
  3299. }
  3300. assert( pCur->pPage );
  3301. assert( pCur->pPage->isInit );
  3302. if( pCur->eState==CURSOR_INVALID ){
  3303. *pRes = -1;
  3304. assert( pCur->pPage->nCell==0 );
  3305. return SQLITE_OK;
  3306. }
  3307. for(;;){
  3308. int lwr, upr;
  3309. Pgno chldPg;
  3310. MemPage *pPage = pCur->pPage;
  3311. int c = -1; /* pRes return if table is empty must be -1 */
  3312. lwr = 0;
  3313. upr = pPage->nCell-1;
  3314. if( !pPage->intKey && pKey==0 ){
  3315. return SQLITE_CORRUPT_BKPT;
  3316. }
  3317. if( biasRight ){
  3318. pCur->idx = upr;
  3319. }else{
  3320. pCur->idx = (upr+lwr)/2;
  3321. }
  3322. if( lwr<=upr ) for(;;){
  3323. void *pCellKey;
  3324. i64 nCellKey;
  3325. pCur->info.nSize = 0;
  3326. if( pPage->intKey ){
  3327. u8 *pCell;
  3328. pCell = findCell(pPage, pCur->idx) + pPage->childPtrSize;
  3329. if( pPage->hasData ){
  3330. u32 dummy;
  3331. pCell += getVarint32(pCell, &dummy);
  3332. }
  3333. getVarint(pCell, (u64 *)&nCellKey);
  3334. if( nCellKey<nKey ){
  3335. c = -1;
  3336. }else if( nCellKey>nKey ){
  3337. c = +1;
  3338. }else{
  3339. c = 0;
  3340. }
  3341. }else{
  3342. int available;
  3343. pCellKey = (void *)fetchPayload(pCur, &available, 0);
  3344. nCellKey = pCur->info.nKey;
  3345. if( available>=nCellKey ){
  3346. c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
  3347. }else{
  3348. pCellKey = sqlite3_malloc( nCellKey );
  3349. if( pCellKey==0 ) return SQLITE_NOMEM;
  3350. rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey);
  3351. c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
  3352. sqlite3_free(pCellKey);
  3353. if( rc ){
  3354. return rc;
  3355. }
  3356. }
  3357. }
  3358. if( c==0 ){
  3359. if( pPage->leafData && !pPage->leaf ){
  3360. lwr = pCur->idx;
  3361. upr = lwr - 1;
  3362. break;
  3363. }else{
  3364. if( pRes ) *pRes = 0;
  3365. return SQLITE_OK;
  3366. }
  3367. }
  3368. if( c<0 ){
  3369. lwr = pCur->idx+1;
  3370. }else{
  3371. upr = pCur->idx-1;
  3372. }
  3373. if( lwr>upr ){
  3374. break;
  3375. }
  3376. pCur->idx = (lwr+upr)/2;
  3377. }
  3378. assert( lwr==upr+1 );
  3379. assert( pPage->isInit );
  3380. if( pPage->leaf ){
  3381. chldPg = 0;
  3382. }else if( lwr>=pPage->nCell ){
  3383. chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
  3384. }else{
  3385. chldPg = get4byte(findCell(pPage, lwr));
  3386. }
  3387. if( chldPg==0 ){
  3388. assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
  3389. if( pRes ) *pRes = c;
  3390. return SQLITE_OK;
  3391. }
  3392. pCur->idx = lwr;
  3393. pCur->info.nSize = 0;
  3394. rc = moveToChild(pCur, chldPg);
  3395. if( rc ){
  3396. return rc;
  3397. }
  3398. }
  3399. /* NOT REACHED */
  3400. }
  3401. /*
  3402. ** Return TRUE if the cursor is not pointing at an entry of the table.
  3403. **
  3404. ** TRUE will be returned after a call to sqlite3BtreeNext() moves
  3405. ** past the last entry in the table or sqlite3BtreePrev() moves past
  3406. ** the first entry. TRUE is also returned if the table is empty.
  3407. */
  3408. int sqlite3BtreeEof(BtCursor *pCur){
  3409. /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries
  3410. ** have been deleted? This API will need to change to return an error code
  3411. ** as well as the boolean result value.
  3412. */
  3413. return (CURSOR_VALID!=pCur->eState);
  3414. }
  3415. /*
  3416. ** Return the database connection handle for a cursor.
  3417. */
  3418. sqlite3 *sqlite3BtreeCursorDb(const BtCursor *pCur){
  3419. assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  3420. return pCur->pBtree->db;
  3421. }
  3422. /*
  3423. ** Advance the cursor to the next entry in the database. If
  3424. ** successful then set *pRes=0. If the cursor
  3425. ** was already pointing to the last entry in the database before
  3426. ** this routine was called, then set *pRes=1.
  3427. */
  3428. static int btreeNext(BtCursor *pCur, int *pRes){
  3429. int rc;
  3430. MemPage *pPage;
  3431. assert( cursorHoldsMutex(pCur) );
  3432. rc = restoreOrClearCursorPosition(pCur);
  3433. if( rc!=SQLITE_OK ){
  3434. return rc;
  3435. }
  3436. assert( pRes!=0 );
  3437. pPage = pCur->pPage;
  3438. if( CURSOR_INVALID==pCur->eState ){
  3439. *pRes = 1;
  3440. return SQLITE_OK;
  3441. }
  3442. if( pCur->skip>0 ){
  3443. pCur->skip = 0;
  3444. *pRes = 0;
  3445. return SQLITE_OK;
  3446. }
  3447. pCur->skip = 0;
  3448. assert( pPage->isInit );
  3449. assert( pCur->idx<pPage->nCell );
  3450. pCur->idx++;
  3451. pCur->info.nSize = 0;
  3452. if( pCur->idx>=pPage->nCell ){
  3453. if( !pPage->leaf ){
  3454. rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
  3455. if( rc ) return rc;
  3456. rc = moveToLeftmost(pCur);
  3457. *pRes = 0;
  3458. return rc;
  3459. }
  3460. do{
  3461. if( sqlite3BtreeIsRootPage(pPage) ){
  3462. *pRes = 1;
  3463. pCur->eState = CURSOR_INVALID;
  3464. return SQLITE_OK;
  3465. }
  3466. sqlite3BtreeMoveToParent(pCur);
  3467. pPage = pCur->pPage;
  3468. }while( pCur->idx>=pPage->nCell );
  3469. *pRes = 0;
  3470. if( pPage->leafData ){
  3471. rc = sqlite3BtreeNext(pCur, pRes);
  3472. }else{
  3473. rc = SQLITE_OK;
  3474. }
  3475. return rc;
  3476. }
  3477. *pRes = 0;
  3478. if( pPage->leaf ){
  3479. return SQLITE_OK;
  3480. }
  3481. rc = moveToLeftmost(pCur);
  3482. return rc;
  3483. }
  3484. int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
  3485. int rc;
  3486. assert( cursorHoldsMutex(pCur) );
  3487. rc = btreeNext(pCur, pRes);
  3488. return rc;
  3489. }
  3490. /*
  3491. ** Step the cursor to the back to the previous entry in the database. If
  3492. ** successful then set *pRes=0. If the cursor
  3493. ** was already pointing to the first entry in the database before
  3494. ** this routine was called, then set *pRes=1.
  3495. */
  3496. static int btreePrevious(BtCursor *pCur, int *pRes){
  3497. int rc;
  3498. Pgno pgno;
  3499. MemPage *pPage;
  3500. assert( cursorHoldsMutex(pCur) );
  3501. rc = restoreOrClearCursorPosition(pCur);
  3502. if( rc!=SQLITE_OK ){
  3503. return rc;
  3504. }
  3505. if( CURSOR_INVALID==pCur->eState ){
  3506. *pRes = 1;
  3507. return SQLITE_OK;
  3508. }
  3509. if( pCur->skip<0 ){
  3510. pCur->skip = 0;
  3511. *pRes = 0;
  3512. return SQLITE_OK;
  3513. }
  3514. pCur->skip = 0;
  3515. pPage = pCur->pPage;
  3516. assert( pPage->isInit );
  3517. assert( pCur->idx>=0 );
  3518. if( !pPage->leaf ){
  3519. pgno = get4byte( findCell(pPage, pCur->idx) );
  3520. rc = moveToChild(pCur, pgno);
  3521. if( rc ){
  3522. return rc;
  3523. }
  3524. rc = moveToRightmost(pCur);
  3525. }else{
  3526. while( pCur->idx==0 ){
  3527. if( sqlite3BtreeIsRootPage(pPage) ){
  3528. pCur->eState = CURSOR_INVALID;
  3529. *pRes = 1;
  3530. return SQLITE_OK;
  3531. }
  3532. sqlite3BtreeMoveToParent(pCur);
  3533. pPage = pCur->pPage;
  3534. }
  3535. pCur->idx--;
  3536. pCur->info.nSize = 0;
  3537. if( pPage->leafData && !pPage->leaf ){
  3538. rc = sqlite3BtreePrevious(pCur, pRes);
  3539. }else{
  3540. rc = SQLITE_OK;
  3541. }
  3542. }
  3543. *pRes = 0;
  3544. return rc;
  3545. }
  3546. int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
  3547. int rc;
  3548. assert( cursorHoldsMutex(pCur) );
  3549. rc = btreePrevious(pCur, pRes);
  3550. return rc;
  3551. }
  3552. /*
  3553. ** Allocate a new page from the database file.
  3554. **
  3555. ** The new page is marked as dirty. (In other words, sqlite3PagerWrite()
  3556. ** has already been called on the new page.) The new page has also
  3557. ** been referenced and the calling routine is responsible for calling
  3558. ** sqlite3PagerUnref() on the new page when it is done.
  3559. **
  3560. ** SQLITE_OK is returned on success. Any other return value indicates
  3561. ** an error. *ppPage and *pPgno are undefined in the event of an error.
  3562. ** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned.
  3563. **
  3564. ** If the "nearby" parameter is not 0, then a (feeble) effort is made to
  3565. ** locate a page close to the page number "nearby". This can be used in an
  3566. ** attempt to keep related pages close to each other in the database file,
  3567. ** which in turn can make database access faster.
  3568. **
  3569. ** If the "exact" parameter is not 0, and the page-number nearby exists
  3570. ** anywhere on the free-list, then it is guarenteed to be returned. This
  3571. ** is only used by auto-vacuum databases when allocating a new table.
  3572. */
  3573. static int allocateBtreePage(
  3574. BtShared *pBt,
  3575. MemPage **ppPage,
  3576. Pgno *pPgno,
  3577. Pgno nearby,
  3578. u8 exact
  3579. ){
  3580. MemPage *pPage1;
  3581. int rc;
  3582. int n; /* Number of pages on the freelist */
  3583. int k; /* Number of leaves on the trunk of the freelist */
  3584. MemPage *pTrunk = 0;
  3585. MemPage *pPrevTrunk = 0;
  3586. assert( sqlite3_mutex_held(pBt->mutex) );
  3587. pPage1 = pBt->pPage1;
  3588. n = get4byte(&pPage1->aData[36]);
  3589. if( n>0 ){
  3590. /* There are pages on the freelist. Reuse one of those pages. */
  3591. Pgno iTrunk;
  3592. u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
  3593. /* If the 'exact' parameter was true and a query of the pointer-map
  3594. ** shows that the page 'nearby' is somewhere on the free-list, then
  3595. ** the entire-list will be searched for that page.
  3596. */
  3597. #ifndef SQLITE_OMIT_AUTOVACUUM
  3598. if( exact && nearby<=sqlite3PagerPagecount(pBt->pPager) ){
  3599. u8 eType;
  3600. assert( nearby>0 );
  3601. assert( pBt->autoVacuum );
  3602. rc = ptrmapGet(pBt, nearby, &eType, 0);
  3603. if( rc ) return rc;
  3604. if( eType==PTRMAP_FREEPAGE ){
  3605. searchList = 1;
  3606. }
  3607. *pPgno = nearby;
  3608. }
  3609. #endif
  3610. /* Decrement the free-list count by 1. Set iTrunk to the index of the
  3611. ** first free-list trunk page. iPrevTrunk is initially 1.
  3612. */
  3613. rc = sqlite3PagerWrite(pPage1->pDbPage);
  3614. if( rc ) return rc;
  3615. put4byte(&pPage1->aData[36], n-1);
  3616. /* The code within this loop is run only once if the 'searchList' variable
  3617. ** is not true. Otherwise, it runs once for each trunk-page on the
  3618. ** free-list until the page 'nearby' is located.
  3619. */
  3620. do {
  3621. pPrevTrunk = pTrunk;
  3622. if( pPrevTrunk ){
  3623. iTrunk = get4byte(&pPrevTrunk->aData[0]);
  3624. }else{
  3625. iTrunk = get4byte(&pPage1->aData[32]);
  3626. }
  3627. rc = sqlite3BtreeGetPage(pBt, iTrunk, &pTrunk, 0);
  3628. if( rc ){
  3629. pTrunk = 0;
  3630. goto end_allocate_page;
  3631. }
  3632. k = get4byte(&pTrunk->aData[4]);
  3633. if( k==0 && !searchList ){
  3634. /* The trunk has no leaves and the list is not being searched.
  3635. ** So extract the trunk page itself and use it as the newly
  3636. ** allocated page */
  3637. assert( pPrevTrunk==0 );
  3638. rc = sqlite3PagerWrite(pTrunk->pDbPage);
  3639. if( rc ){
  3640. goto end_allocate_page;
  3641. }
  3642. *pPgno = iTrunk;
  3643. memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
  3644. *ppPage = pTrunk;
  3645. pTrunk = 0;
  3646. TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
  3647. }else if( k>pBt->usableSize/4 - 8 ){
  3648. /* Value of k is out of range. Database corruption */
  3649. rc = SQLITE_CORRUPT_BKPT;
  3650. goto end_allocate_page;
  3651. #ifndef SQLITE_OMIT_AUTOVACUUM
  3652. }else if( searchList && nearby==iTrunk ){
  3653. /* The list is being searched and this trunk page is the page
  3654. ** to allocate, regardless of whether it has leaves.
  3655. */
  3656. assert( *pPgno==iTrunk );
  3657. *ppPage = pTrunk;
  3658. searchList = 0;
  3659. rc = sqlite3PagerWrite(pTrunk->pDbPage);
  3660. if( rc ){
  3661. goto end_allocate_page;
  3662. }
  3663. if( k==0 ){
  3664. if( !pPrevTrunk ){
  3665. memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
  3666. }else{
  3667. memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
  3668. }
  3669. }else{
  3670. /* The trunk page is required by the caller but it contains
  3671. ** pointers to free-list leaves. The first leaf becomes a trunk
  3672. ** page in this case.
  3673. */
  3674. MemPage *pNewTrunk;
  3675. Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
  3676. rc = sqlite3BtreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
  3677. if( rc!=SQLITE_OK ){
  3678. goto end_allocate_page;
  3679. }
  3680. rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
  3681. if( rc!=SQLITE_OK ){
  3682. releasePage(pNewTrunk);
  3683. goto end_allocate_page;
  3684. }
  3685. memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
  3686. put4byte(&pNewTrunk->aData[4], k-1);
  3687. memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
  3688. releasePage(pNewTrunk);
  3689. if( !pPrevTrunk ){
  3690. put4byte(&pPage1->aData[32], iNewTrunk);
  3691. }else{
  3692. rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
  3693. if( rc ){
  3694. goto end_allocate_page;
  3695. }
  3696. put4byte(&pPrevTrunk->aData[0], iNewTrunk);
  3697. }
  3698. }
  3699. pTrunk = 0;
  3700. TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
  3701. #endif
  3702. }else{
  3703. /* Extract a leaf from the trunk */
  3704. int closest;
  3705. Pgno iPage;
  3706. unsigned char *aData = pTrunk->aData;
  3707. rc = sqlite3PagerWrite(pTrunk->pDbPage);
  3708. if( rc ){
  3709. goto end_allocate_page;
  3710. }
  3711. if( nearby>0 ){
  3712. int i, dist;
  3713. closest = 0;
  3714. dist = get4byte(&aData[8]) - nearby;
  3715. if( dist<0 ) dist = -dist;
  3716. for(i=1; i<k; i++){
  3717. int d2 = get4byte(&aData[8+i*4]) - nearby;
  3718. if( d2<0 ) d2 = -d2;
  3719. if( d2<dist ){
  3720. closest = i;
  3721. dist = d2;
  3722. }
  3723. }
  3724. }else{
  3725. closest = 0;
  3726. }
  3727. iPage = get4byte(&aData[8+closest*4]);
  3728. if( !searchList || iPage==nearby ){
  3729. *pPgno = iPage;
  3730. if( *pPgno>sqlite3PagerPagecount(pBt->pPager) ){
  3731. /* Free page off the end of the file */
  3732. return SQLITE_CORRUPT_BKPT;
  3733. }
  3734. TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
  3735. ": %d more free pages\n",
  3736. *pPgno, closest+1, k, pTrunk->pgno, n-1));
  3737. if( closest<k-1 ){
  3738. memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
  3739. }
  3740. put4byte(&aData[4], k-1);
  3741. rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 1);
  3742. if( rc==SQLITE_OK ){
  3743. sqlite3PagerDontRollback((*ppPage)->pDbPage);
  3744. rc = sqlite3PagerWrite((*ppPage)->pDbPage);
  3745. if( rc!=SQLITE_OK ){
  3746. releasePage(*ppPage);
  3747. }
  3748. }
  3749. searchList = 0;
  3750. }
  3751. }
  3752. releasePage(pPrevTrunk);
  3753. pPrevTrunk = 0;
  3754. }while( searchList );
  3755. }else{
  3756. /* There are no pages on the freelist, so create a new page at the
  3757. ** end of the file */
  3758. *pPgno = sqlite3PagerPagecount(pBt->pPager) + 1;
  3759. #ifndef SQLITE_OMIT_AUTOVACUUM
  3760. if( pBt->nTrunc ){
  3761. /* An incr-vacuum has already run within this transaction. So the
  3762. ** page to allocate is not from the physical end of the file, but
  3763. ** at pBt->nTrunc.
  3764. */
  3765. *pPgno = pBt->nTrunc+1;
  3766. if( *pPgno==PENDING_BYTE_PAGE(pBt) ){
  3767. (*pPgno)++;
  3768. }
  3769. }
  3770. if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){
  3771. /* If *pPgno refers to a pointer-map page, allocate two new pages
  3772. ** at the end of the file instead of one. The first allocated page
  3773. ** becomes a new pointer-map page, the second is used by the caller.
  3774. */
  3775. TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno));
  3776. assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
  3777. (*pPgno)++;
  3778. }
  3779. if( pBt->nTrunc ){
  3780. pBt->nTrunc = *pPgno;
  3781. }
  3782. #endif
  3783. assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
  3784. rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 0);
  3785. if( rc ) return rc;
  3786. rc = sqlite3PagerWrite((*ppPage)->pDbPage);
  3787. if( rc!=SQLITE_OK ){
  3788. releasePage(*ppPage);
  3789. }
  3790. TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
  3791. }
  3792. assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
  3793. end_allocate_page:
  3794. releasePage(pTrunk);
  3795. releasePage(pPrevTrunk);
  3796. return rc;
  3797. }
  3798. /*
  3799. ** Add a page of the database file to the freelist.
  3800. **
  3801. ** sqlite3PagerUnref() is NOT called for pPage.
  3802. */
  3803. static int freePage(MemPage *pPage){
  3804. BtShared *pBt = pPage->pBt;
  3805. MemPage *pPage1 = pBt->pPage1;
  3806. int rc, n, k;
  3807. /* Prepare the page for freeing */
  3808. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  3809. assert( pPage->pgno>1 );
  3810. pPage->isInit = 0;
  3811. releasePage(pPage->pParent);
  3812. pPage->pParent = 0;
  3813. /* Increment the free page count on pPage1 */
  3814. rc = sqlite3PagerWrite(pPage1->pDbPage);
  3815. if( rc ) return rc;
  3816. n = get4byte(&pPage1->aData[36]);
  3817. put4byte(&pPage1->aData[36], n+1);
  3818. #ifdef SQLITE_SECURE_DELETE
  3819. /* If the SQLITE_SECURE_DELETE compile-time option is enabled, then
  3820. ** always fully overwrite deleted information with zeros.
  3821. */
  3822. rc = sqlite3PagerWrite(pPage->pDbPage);
  3823. if( rc ) return rc;
  3824. memset(pPage->aData, 0, pPage->pBt->pageSize);
  3825. #endif
  3826. #ifndef SQLITE_OMIT_AUTOVACUUM
  3827. /* If the database supports auto-vacuum, write an entry in the pointer-map
  3828. ** to indicate that the page is free.
  3829. */
  3830. if( pBt->autoVacuum ){
  3831. rc = ptrmapPut(pBt, pPage->pgno, PTRMAP_FREEPAGE, 0);
  3832. if( rc ) return rc;
  3833. }
  3834. #endif
  3835. if( n==0 ){
  3836. /* This is the first free page */
  3837. rc = sqlite3PagerWrite(pPage->pDbPage);
  3838. if( rc ) return rc;
  3839. memset(pPage->aData, 0, 8);
  3840. put4byte(&pPage1->aData[32], pPage->pgno);
  3841. TRACE(("FREE-PAGE: %d first\n", pPage->pgno));
  3842. }else{
  3843. /* Other free pages already exist. Retrive the first trunk page
  3844. ** of the freelist and find out how many leaves it has. */
  3845. MemPage *pTrunk;
  3846. rc = sqlite3BtreeGetPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0);
  3847. if( rc ) return rc;
  3848. k = get4byte(&pTrunk->aData[4]);
  3849. if( k>=pBt->usableSize/4 - 8 ){
  3850. /* The trunk is full. Turn the page being freed into a new
  3851. ** trunk page with no leaves. */
  3852. rc = sqlite3PagerWrite(pPage->pDbPage);
  3853. if( rc==SQLITE_OK ){
  3854. put4byte(pPage->aData, pTrunk->pgno);
  3855. put4byte(&pPage->aData[4], 0);
  3856. put4byte(&pPage1->aData[32], pPage->pgno);
  3857. TRACE(("FREE-PAGE: %d new trunk page replacing %d\n",
  3858. pPage->pgno, pTrunk->pgno));
  3859. }
  3860. }else if( k<0 ){
  3861. rc = SQLITE_CORRUPT;
  3862. }else{
  3863. /* Add the newly freed page as a leaf on the current trunk */
  3864. rc = sqlite3PagerWrite(pTrunk->pDbPage);
  3865. if( rc==SQLITE_OK ){
  3866. put4byte(&pTrunk->aData[4], k+1);
  3867. put4byte(&pTrunk->aData[8+k*4], pPage->pgno);
  3868. #ifndef SQLITE_SECURE_DELETE
  3869. sqlite3PagerDontWrite(pPage->pDbPage);
  3870. #endif
  3871. }
  3872. TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
  3873. }
  3874. releasePage(pTrunk);
  3875. }
  3876. return rc;
  3877. }
  3878. /*
  3879. ** Free any overflow pages associated with the given Cell.
  3880. */
  3881. static int clearCell(MemPage *pPage, unsigned char *pCell){
  3882. BtShared *pBt = pPage->pBt;
  3883. CellInfo info;
  3884. Pgno ovflPgno;
  3885. int rc;
  3886. int nOvfl;
  3887. int ovflPageSize;
  3888. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  3889. sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  3890. if( info.iOverflow==0 ){
  3891. return SQLITE_OK; /* No overflow pages. Return without doing anything */
  3892. }
  3893. ovflPgno = get4byte(&pCell[info.iOverflow]);
  3894. ovflPageSize = pBt->usableSize - 4;
  3895. nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize;
  3896. assert( ovflPgno==0 || nOvfl>0 );
  3897. while( nOvfl-- ){
  3898. MemPage *pOvfl;
  3899. if( ovflPgno==0 || ovflPgno>sqlite3PagerPagecount(pBt->pPager) ){
  3900. return SQLITE_CORRUPT_BKPT;
  3901. }
  3902. rc = getOverflowPage(pBt, ovflPgno, &pOvfl, (nOvfl==0)?0:&ovflPgno);
  3903. if( rc ) return rc;
  3904. rc = freePage(pOvfl);
  3905. sqlite3PagerUnref(pOvfl->pDbPage);
  3906. if( rc ) return rc;
  3907. }
  3908. return SQLITE_OK;
  3909. }
  3910. /*
  3911. ** Create the byte sequence used to represent a cell on page pPage
  3912. ** and write that byte sequence into pCell[]. Overflow pages are
  3913. ** allocated and filled in as necessary. The calling procedure
  3914. ** is responsible for making sure sufficient space has been allocated
  3915. ** for pCell[].
  3916. **
  3917. ** Note that pCell does not necessary need to point to the pPage->aData
  3918. ** area. pCell might point to some temporary storage. The cell will
  3919. ** be constructed in this temporary area then copied into pPage->aData
  3920. ** later.
  3921. */
  3922. static int fillInCell(
  3923. MemPage *pPage, /* The page that contains the cell */
  3924. unsigned char *pCell, /* Complete text of the cell */
  3925. const void *pKey, i64 nKey, /* The key */
  3926. const void *pData,int nData, /* The data */
  3927. int nZero, /* Extra zero bytes to append to pData */
  3928. int *pnSize /* Write cell size here */
  3929. ){
  3930. int nPayload;
  3931. const u8 *pSrc;
  3932. int nSrc, n, rc;
  3933. int spaceLeft;
  3934. MemPage *pOvfl = 0;
  3935. MemPage *pToRelease = 0;
  3936. unsigned char *pPrior;
  3937. unsigned char *pPayload;
  3938. BtShared *pBt = pPage->pBt;
  3939. Pgno pgnoOvfl = 0;
  3940. int nHeader;
  3941. CellInfo info;
  3942. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  3943. /* Fill in the header. */
  3944. nHeader = 0;
  3945. if( !pPage->leaf ){
  3946. nHeader += 4;
  3947. }
  3948. if( pPage->hasData ){
  3949. nHeader += putVarint(&pCell[nHeader], nData+nZero);
  3950. }else{
  3951. nData = nZero = 0;
  3952. }
  3953. nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
  3954. sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  3955. assert( info.nHeader==nHeader );
  3956. assert( info.nKey==nKey );
  3957. assert( info.nData==nData+nZero );
  3958. /* Fill in the payload */
  3959. nPayload = nData + nZero;
  3960. if( pPage->intKey ){
  3961. pSrc = pData;
  3962. nSrc = nData;
  3963. nData = 0;
  3964. }else{
  3965. nPayload += nKey;
  3966. pSrc = pKey;
  3967. nSrc = nKey;
  3968. }
  3969. *pnSize = info.nSize;
  3970. spaceLeft = info.nLocal;
  3971. pPayload = &pCell[nHeader];
  3972. pPrior = &pCell[info.iOverflow];
  3973. while( nPayload>0 ){
  3974. if( spaceLeft==0 ){
  3975. int isExact = 0;
  3976. #ifndef SQLITE_OMIT_AUTOVACUUM
  3977. Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
  3978. if( pBt->autoVacuum ){
  3979. do{
  3980. pgnoOvfl++;
  3981. } while(
  3982. PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt)
  3983. );
  3984. if( pgnoOvfl>1 ){
  3985. /* isExact = 1; */
  3986. }
  3987. }
  3988. #endif
  3989. rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, isExact);
  3990. #ifndef SQLITE_OMIT_AUTOVACUUM
  3991. /* If the database supports auto-vacuum, and the second or subsequent
  3992. ** overflow page is being allocated, add an entry to the pointer-map
  3993. ** for that page now.
  3994. **
  3995. ** If this is the first overflow page, then write a partial entry
  3996. ** to the pointer-map. If we write nothing to this pointer-map slot,
  3997. ** then the optimistic overflow chain processing in clearCell()
  3998. ** may misinterpret the uninitialised values and delete the
  3999. ** wrong pages from the database.
  4000. */
  4001. if( pBt->autoVacuum && rc==SQLITE_OK ){
  4002. u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1);
  4003. rc = ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap);
  4004. if( rc ){
  4005. releasePage(pOvfl);
  4006. }
  4007. }
  4008. #endif
  4009. if( rc ){
  4010. releasePage(pToRelease);
  4011. return rc;
  4012. }
  4013. put4byte(pPrior, pgnoOvfl);
  4014. releasePage(pToRelease);
  4015. pToRelease = pOvfl;
  4016. pPrior = pOvfl->aData;
  4017. put4byte(pPrior, 0);
  4018. pPayload = &pOvfl->aData[4];
  4019. spaceLeft = pBt->usableSize - 4;
  4020. }
  4021. n = nPayload;
  4022. if( n>spaceLeft ) n = spaceLeft;
  4023. if( nSrc>0 ){
  4024. if( n>nSrc ) n = nSrc;
  4025. assert( pSrc );
  4026. memcpy(pPayload, pSrc, n);
  4027. }else{
  4028. memset(pPayload, 0, n);
  4029. }
  4030. nPayload -= n;
  4031. pPayload += n;
  4032. pSrc += n;
  4033. nSrc -= n;
  4034. spaceLeft -= n;
  4035. if( nSrc==0 ){
  4036. nSrc = nData;
  4037. pSrc = pData;
  4038. }
  4039. }
  4040. releasePage(pToRelease);
  4041. return SQLITE_OK;
  4042. }
  4043. /*
  4044. ** Change the MemPage.pParent pointer on the page whose number is
  4045. ** given in the second argument so that MemPage.pParent holds the
  4046. ** pointer in the third argument.
  4047. */
  4048. static int reparentPage(BtShared *pBt, Pgno pgno, MemPage *pNewParent, int idx){
  4049. MemPage *pThis;
  4050. DbPage *pDbPage;
  4051. assert( sqlite3_mutex_held(pBt->mutex) );
  4052. assert( pNewParent!=0 );
  4053. if( pgno==0 ) return SQLITE_OK;
  4054. assert( pBt->pPager!=0 );
  4055. pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
  4056. if( pDbPage ){
  4057. pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage);
  4058. if( pThis->isInit ){
  4059. assert( pThis->aData==sqlite3PagerGetData(pDbPage) );
  4060. if( pThis->pParent!=pNewParent ){
  4061. if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage);
  4062. pThis->pParent = pNewParent;
  4063. sqlite3PagerRef(pNewParent->pDbPage);
  4064. }
  4065. pThis->idxParent = idx;
  4066. }
  4067. sqlite3PagerUnref(pDbPage);
  4068. }
  4069. #ifndef SQLITE_OMIT_AUTOVACUUM
  4070. if( pBt->autoVacuum ){
  4071. return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno);
  4072. }
  4073. #endif
  4074. return SQLITE_OK;
  4075. }
  4076. /*
  4077. ** Change the pParent pointer of all children of pPage to point back
  4078. ** to pPage.
  4079. **
  4080. ** In other words, for every child of pPage, invoke reparentPage()
  4081. ** to make sure that each child knows that pPage is its parent.
  4082. **
  4083. ** This routine gets called after you memcpy() one page into
  4084. ** another.
  4085. */
  4086. static int reparentChildPages(MemPage *pPage){
  4087. int i;
  4088. BtShared *pBt = pPage->pBt;
  4089. int rc = SQLITE_OK;
  4090. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  4091. if( pPage->leaf ) return SQLITE_OK;
  4092. for(i=0; i<pPage->nCell; i++){
  4093. u8 *pCell = findCell(pPage, i);
  4094. if( !pPage->leaf ){
  4095. rc = reparentPage(pBt, get4byte(pCell), pPage, i);
  4096. if( rc!=SQLITE_OK ) return rc;
  4097. }
  4098. }
  4099. if( !pPage->leaf ){
  4100. rc = reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+8]),
  4101. pPage, i);
  4102. pPage->idxShift = 0;
  4103. }
  4104. return rc;
  4105. }
  4106. /*
  4107. ** Remove the i-th cell from pPage. This routine effects pPage only.
  4108. ** The cell content is not freed or deallocated. It is assumed that
  4109. ** the cell content has been copied someplace else. This routine just
  4110. ** removes the reference to the cell from pPage.
  4111. **
  4112. ** "sz" must be the number of bytes in the cell.
  4113. */
  4114. static void dropCell(MemPage *pPage, int idx, int sz){
  4115. int i; /* Loop counter */
  4116. int pc; /* Offset to cell content of cell being deleted */
  4117. u8 *data; /* pPage->aData */
  4118. u8 *ptr; /* Used to move bytes around within data[] */
  4119. assert( idx>=0 && idx<pPage->nCell );
  4120. assert( sz==cellSize(pPage, idx) );
  4121. assert( sqlite3PagerIswriteable(pPage->pDbPage) );
  4122. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  4123. data = pPage->aData;
  4124. ptr = &data[pPage->cellOffset + 2*idx];
  4125. pc = get2byte(ptr);
  4126. assert( pc>10 && pc+sz<=pPage->pBt->usableSize );
  4127. freeSpace(pPage, pc, sz);
  4128. for(i=idx+1; i<pPage->nCell; i++, ptr+=2){
  4129. ptr[0] = ptr[2];
  4130. ptr[1] = ptr[3];
  4131. }
  4132. pPage->nCell--;
  4133. put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
  4134. pPage->nFree += 2;
  4135. pPage->idxShift = 1;
  4136. }
  4137. /*
  4138. ** Insert a new cell on pPage at cell index "i". pCell points to the
  4139. ** content of the cell.
  4140. **
  4141. ** If the cell content will fit on the page, then put it there. If it
  4142. ** will not fit, then make a copy of the cell content into pTemp if
  4143. ** pTemp is not null. Regardless of pTemp, allocate a new entry
  4144. ** in pPage->aOvfl[] and make it point to the cell content (either
  4145. ** in pTemp or the original pCell) and also record its index.
  4146. ** Allocating a new entry in pPage->aCell[] implies that
  4147. ** pPage->nOverflow is incremented.
  4148. **
  4149. ** If nSkip is non-zero, then do not copy the first nSkip bytes of the
  4150. ** cell. The caller will overwrite them after this function returns. If
  4151. ** nSkip is non-zero, then pCell may not point to an invalid memory location
  4152. ** (but pCell+nSkip is always valid).
  4153. */
  4154. static int insertCell(
  4155. MemPage *pPage, /* Page into which we are copying */
  4156. int i, /* New cell becomes the i-th cell of the page */
  4157. u8 *pCell, /* Content of the new cell */
  4158. int sz, /* Bytes of content in pCell */
  4159. u8 *pTemp, /* Temp storage space for pCell, if needed */
  4160. u8 nSkip /* Do not write the first nSkip bytes of the cell */
  4161. ){
  4162. int idx; /* Where to write new cell content in data[] */
  4163. int j; /* Loop counter */
  4164. int top; /* First byte of content for any cell in data[] */
  4165. int end; /* First byte past the last cell pointer in data[] */
  4166. int ins; /* Index in data[] where new cell pointer is inserted */
  4167. int hdr; /* Offset into data[] of the page header */
  4168. int cellOffset; /* Address of first cell pointer in data[] */
  4169. u8 *data; /* The content of the whole page */
  4170. u8 *ptr; /* Used for moving information around in data[] */
  4171. assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
  4172. assert( sz==cellSizePtr(pPage, pCell) );
  4173. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  4174. if( pPage->nOverflow || sz+2>pPage->nFree ){
  4175. if( pTemp ){
  4176. memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
  4177. pCell = pTemp;
  4178. }
  4179. j = pPage->nOverflow++;
  4180. assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
  4181. pPage->aOvfl[j].pCell = pCell;
  4182. pPage->aOvfl[j].idx = i;
  4183. pPage->nFree = 0;
  4184. }else{
  4185. int rc = sqlite3PagerWrite(pPage->pDbPage);
  4186. if( rc!=SQLITE_OK ){
  4187. return rc;
  4188. }
  4189. assert( sqlite3PagerIswriteable(pPage->pDbPage) );
  4190. data = pPage->aData;
  4191. hdr = pPage->hdrOffset;
  4192. top = get2byte(&data[hdr+5]);
  4193. cellOffset = pPage->cellOffset;
  4194. end = cellOffset + 2*pPage->nCell + 2;
  4195. ins = cellOffset + 2*i;
  4196. if( end > top - sz ){
  4197. rc = defragmentPage(pPage);
  4198. if( rc!=SQLITE_OK ) return rc;
  4199. top = get2byte(&data[hdr+5]);
  4200. assert( end + sz <= top );
  4201. }
  4202. idx = allocateSpace(pPage, sz);
  4203. assert( idx>0 );
  4204. assert( end <= get2byte(&data[hdr+5]) );
  4205. pPage->nCell++;
  4206. pPage->nFree -= 2;
  4207. memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip);
  4208. for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){
  4209. ptr[0] = ptr[-2];
  4210. ptr[1] = ptr[-1];
  4211. }
  4212. put2byte(&data[ins], idx);
  4213. put2byte(&data[hdr+3], pPage->nCell);
  4214. pPage->idxShift = 1;
  4215. #ifndef SQLITE_OMIT_AUTOVACUUM
  4216. if( pPage->pBt->autoVacuum ){
  4217. /* The cell may contain a pointer to an overflow page. If so, write
  4218. ** the entry for the overflow page into the pointer map.
  4219. */
  4220. CellInfo info;
  4221. sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  4222. assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
  4223. if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
  4224. Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
  4225. rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno);
  4226. if( rc!=SQLITE_OK ) return rc;
  4227. }
  4228. }
  4229. #endif
  4230. }
  4231. return SQLITE_OK;
  4232. }
  4233. /*
  4234. ** Add a list of cells to a page. The page should be initially empty.
  4235. ** The cells are guaranteed to fit on the page.
  4236. */
  4237. static void assemblePage(
  4238. MemPage *pPage, /* The page to be assemblied */
  4239. int nCell, /* The number of cells to add to this page */
  4240. u8 **apCell, /* Pointers to cell bodies */
  4241. int *aSize /* Sizes of the cells */
  4242. ){
  4243. int i; /* Loop counter */
  4244. int totalSize; /* Total size of all cells */
  4245. int hdr; /* Index of page header */
  4246. int cellptr; /* Address of next cell pointer */
  4247. int cellbody; /* Address of next cell body */
  4248. u8 *data; /* Data for the page */
  4249. assert( pPage->nOverflow==0 );
  4250. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  4251. totalSize = 0;
  4252. for(i=0; i<nCell; i++){
  4253. totalSize += aSize[i];
  4254. }
  4255. assert( totalSize+2*nCell<=pPage->nFree );
  4256. assert( pPage->nCell==0 );
  4257. cellptr = pPage->cellOffset;
  4258. data = pPage->aData;
  4259. hdr = pPage->hdrOffset;
  4260. put2byte(&data[hdr+3], nCell);
  4261. if( nCell ){
  4262. cellbody = allocateSpace(pPage, totalSize);
  4263. assert( cellbody>0 );
  4264. assert( pPage->nFree >= 2*nCell );
  4265. pPage->nFree -= 2*nCell;
  4266. for(i=0; i<nCell; i++){
  4267. put2byte(&data[cellptr], cellbody);
  4268. memcpy(&data[cellbody], apCell[i], aSize[i]);
  4269. cellptr += 2;
  4270. cellbody += aSize[i];
  4271. }
  4272. assert( cellbody==pPage->pBt->usableSize );
  4273. }
  4274. pPage->nCell = nCell;
  4275. }
  4276. /*
  4277. ** The following parameters determine how many adjacent pages get involved
  4278. ** in a balancing operation. NN is the number of neighbors on either side
  4279. ** of the page that participate in the balancing operation. NB is the
  4280. ** total number of pages that participate, including the target page and
  4281. ** NN neighbors on either side.
  4282. **
  4283. ** The minimum value of NN is 1 (of course). Increasing NN above 1
  4284. ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
  4285. ** in exchange for a larger degradation in INSERT and UPDATE performance.
  4286. ** The value of NN appears to give the best results overall.
  4287. */
  4288. #define NN 1 /* Number of neighbors on either side of pPage */
  4289. #define NB (NN*2+1) /* Total pages involved in the balance */
  4290. /* Forward reference */
  4291. static int balance(MemPage*, int);
  4292. #ifndef SQLITE_OMIT_QUICKBALANCE
  4293. /*
  4294. ** This version of balance() handles the common special case where
  4295. ** a new entry is being inserted on the extreme right-end of the
  4296. ** tree, in other words, when the new entry will become the largest
  4297. ** entry in the tree.
  4298. **
  4299. ** Instead of trying balance the 3 right-most leaf pages, just add
  4300. ** a new page to the right-hand side and put the one new entry in
  4301. ** that page. This leaves the right side of the tree somewhat
  4302. ** unbalanced. But odds are that we will be inserting new entries
  4303. ** at the end soon afterwards so the nearly empty page will quickly
  4304. ** fill up. On average.
  4305. **
  4306. ** pPage is the leaf page which is the right-most page in the tree.
  4307. ** pParent is its parent. pPage must have a single overflow entry
  4308. ** which is also the right-most entry on the page.
  4309. */
  4310. static int balance_quick(MemPage *pPage, MemPage *pParent){
  4311. int rc;
  4312. MemPage *pNew;
  4313. Pgno pgnoNew;
  4314. u8 *pCell;
  4315. int szCell;
  4316. CellInfo info;
  4317. BtShared *pBt = pPage->pBt;
  4318. int parentIdx = pParent->nCell; /* pParent new divider cell index */
  4319. int parentSize; /* Size of new divider cell */
  4320. u8 parentCell[64]; /* Space for the new divider cell */
  4321. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  4322. /* Allocate a new page. Insert the overflow cell from pPage
  4323. ** into it. Then remove the overflow cell from pPage.
  4324. */
  4325. rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
  4326. if( rc!=SQLITE_OK ){
  4327. return rc;
  4328. }
  4329. pCell = pPage->aOvfl[0].pCell;
  4330. szCell = cellSizePtr(pPage, pCell);
  4331. zeroPage(pNew, pPage->aData[0]);
  4332. assemblePage(pNew, 1, &pCell, &szCell);
  4333. pPage->nOverflow = 0;
  4334. /* Set the parent of the newly allocated page to pParent. */
  4335. pNew->pParent = pParent;
  4336. sqlite3PagerRef(pParent->pDbPage);
  4337. /* pPage is currently the right-child of pParent. Change this
  4338. ** so that the right-child is the new page allocated above and
  4339. ** pPage is the next-to-right child.
  4340. */
  4341. assert( pPage->nCell>0 );
  4342. pCell = findCell(pPage, pPage->nCell-1);
  4343. sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  4344. rc = fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, 0, &parentSize);
  4345. if( rc!=SQLITE_OK ){
  4346. return rc;
  4347. }
  4348. assert( parentSize<64 );
  4349. rc = insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
  4350. if( rc!=SQLITE_OK ){
  4351. return rc;
  4352. }
  4353. put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
  4354. put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
  4355. #ifndef SQLITE_OMIT_AUTOVACUUM
  4356. /* If this is an auto-vacuum database, update the pointer map
  4357. ** with entries for the new page, and any pointer from the
  4358. ** cell on the page to an overflow page.
  4359. */
  4360. if( pBt->autoVacuum ){
  4361. rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno);
  4362. if( rc==SQLITE_OK ){
  4363. rc = ptrmapPutOvfl(pNew, 0);
  4364. }
  4365. if( rc!=SQLITE_OK ){
  4366. releasePage(pNew);
  4367. return rc;
  4368. }
  4369. }
  4370. #endif
  4371. /* Release the reference to the new page and balance the parent page,
  4372. ** in case the divider cell inserted caused it to become overfull.
  4373. */
  4374. releasePage(pNew);
  4375. return balance(pParent, 0);
  4376. }
  4377. #endif /* SQLITE_OMIT_QUICKBALANCE */
  4378. /*
  4379. ** This routine redistributes Cells on pPage and up to NN*2 siblings
  4380. ** of pPage so that all pages have about the same amount of free space.
  4381. ** Usually NN siblings on either side of pPage is used in the balancing,
  4382. ** though more siblings might come from one side if pPage is the first
  4383. ** or last child of its parent. If pPage has fewer than 2*NN siblings
  4384. ** (something which can only happen if pPage is the root page or a
  4385. ** child of root) then all available siblings participate in the balancing.
  4386. **
  4387. ** The number of siblings of pPage might be increased or decreased by one or
  4388. ** two in an effort to keep pages nearly full but not over full. The root page
  4389. ** is special and is allowed to be nearly empty. If pPage is
  4390. ** the root page, then the depth of the tree might be increased
  4391. ** or decreased by one, as necessary, to keep the root page from being
  4392. ** overfull or completely empty.
  4393. **
  4394. ** Note that when this routine is called, some of the Cells on pPage
  4395. ** might not actually be stored in pPage->aData[]. This can happen
  4396. ** if the page is overfull. Part of the job of this routine is to
  4397. ** make sure all Cells for pPage once again fit in pPage->aData[].
  4398. **
  4399. ** In the course of balancing the siblings of pPage, the parent of pPage
  4400. ** might become overfull or underfull. If that happens, then this routine
  4401. ** is called recursively on the parent.
  4402. **
  4403. ** If this routine fails for any reason, it might leave the database
  4404. ** in a corrupted state. So if this routine fails, the database should
  4405. ** be rolled back.
  4406. */
  4407. static int balance_nonroot(MemPage *pPage){
  4408. MemPage *pParent; /* The parent of pPage */
  4409. BtShared *pBt; /* The whole database */
  4410. int nCell = 0; /* Number of cells in apCell[] */
  4411. int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */
  4412. int nOld; /* Number of pages in apOld[] */
  4413. int nNew; /* Number of pages in apNew[] */
  4414. int nDiv; /* Number of cells in apDiv[] */
  4415. int i, j, k; /* Loop counters */
  4416. int idx; /* Index of pPage in pParent->aCell[] */
  4417. int nxDiv; /* Next divider slot in pParent->aCell[] */
  4418. int rc; /* The return code */
  4419. int leafCorrection; /* 4 if pPage is a leaf. 0 if not */
  4420. int leafData; /* True if pPage is a leaf of a LEAFDATA tree */
  4421. int usableSpace; /* Bytes in pPage beyond the header */
  4422. int pageFlags; /* Value of pPage->aData[0] */
  4423. int subtotal; /* Subtotal of bytes in cells on one page */
  4424. int iSpace = 0; /* First unused byte of aSpace[] */
  4425. MemPage *apOld[NB]; /* pPage and up to two siblings */
  4426. Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
  4427. MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
  4428. MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
  4429. Pgno pgnoNew[NB+2]; /* Page numbers for each page in apNew[] */
  4430. u8 *apDiv[NB]; /* Divider cells in pParent */
  4431. int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */
  4432. int szNew[NB+2]; /* Combined size of cells place on i-th page */
  4433. u8 **apCell = 0; /* All cells begin balanced */
  4434. int *szCell; /* Local size of all cells in apCell[] */
  4435. u8 *aCopy[NB]; /* Space for holding data of apCopy[] */
  4436. u8 *aSpace; /* Space to hold copies of dividers cells */
  4437. #ifndef SQLITE_OMIT_AUTOVACUUM
  4438. u8 *aFrom = 0;
  4439. #endif
  4440. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  4441. /*
  4442. ** Find the parent page.
  4443. */
  4444. assert( pPage->isInit );
  4445. assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
  4446. pBt = pPage->pBt;
  4447. pParent = pPage->pParent;
  4448. assert( pParent );
  4449. if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
  4450. return rc;
  4451. }
  4452. TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
  4453. #ifndef SQLITE_OMIT_QUICKBALANCE
  4454. /*
  4455. ** A special case: If a new entry has just been inserted into a
  4456. ** table (that is, a btree with integer keys and all data at the leaves)
  4457. ** and the new entry is the right-most entry in the tree (it has the
  4458. ** largest key) then use the special balance_quick() routine for
  4459. ** balancing. balance_quick() is much faster and results in a tighter
  4460. ** packing of data in the common case.
  4461. */
  4462. if( pPage->leaf &&
  4463. pPage->intKey &&
  4464. pPage->leafData &&
  4465. pPage->nOverflow==1 &&
  4466. pPage->aOvfl[0].idx==pPage->nCell &&
  4467. pPage->pParent->pgno!=1 &&
  4468. get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
  4469. ){
  4470. /*
  4471. ** TODO: Check the siblings to the left of pPage. It may be that
  4472. ** they are not full and no new page is required.
  4473. */
  4474. return balance_quick(pPage, pParent);
  4475. }
  4476. #endif
  4477. if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){
  4478. return rc;
  4479. }
  4480. /*
  4481. ** Find the cell in the parent page whose left child points back
  4482. ** to pPage. The "idx" variable is the index of that cell. If pPage
  4483. ** is the rightmost child of pParent then set idx to pParent->nCell
  4484. */
  4485. if( pParent->idxShift ){
  4486. Pgno pgno;
  4487. pgno = pPage->pgno;
  4488. assert( pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
  4489. for(idx=0; idx<pParent->nCell; idx++){
  4490. if( get4byte(findCell(pParent, idx))==pgno ){
  4491. break;
  4492. }
  4493. }
  4494. assert( idx<pParent->nCell
  4495. || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
  4496. }else{
  4497. idx = pPage->idxParent;
  4498. }
  4499. /*
  4500. ** Initialize variables so that it will be safe to jump
  4501. ** directly to balance_cleanup at any moment.
  4502. */
  4503. nOld = nNew = 0;
  4504. sqlite3PagerRef(pParent->pDbPage);
  4505. /*
  4506. ** Find sibling pages to pPage and the cells in pParent that divide
  4507. ** the siblings. An attempt is made to find NN siblings on either
  4508. ** side of pPage. More siblings are taken from one side, however, if
  4509. ** pPage there are fewer than NN siblings on the other side. If pParent
  4510. ** has NB or fewer children then all children of pParent are taken.
  4511. */
  4512. nxDiv = idx - NN;
  4513. if( nxDiv + NB > pParent->nCell ){
  4514. nxDiv = pParent->nCell - NB + 1;
  4515. }
  4516. if( nxDiv<0 ){
  4517. nxDiv = 0;
  4518. }
  4519. nDiv = 0;
  4520. for(i=0, k=nxDiv; i<NB; i++, k++){
  4521. if( k<pParent->nCell ){
  4522. apDiv[i] = findCell(pParent, k);
  4523. nDiv++;
  4524. assert( !pParent->leaf );
  4525. pgnoOld[i] = get4byte(apDiv[i]);
  4526. }else if( k==pParent->nCell ){
  4527. pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
  4528. }else{
  4529. break;
  4530. }
  4531. rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i], pParent);
  4532. if( rc ) goto balance_cleanup;
  4533. apOld[i]->idxParent = k;
  4534. apCopy[i] = 0;
  4535. assert( i==nOld );
  4536. nOld++;
  4537. nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
  4538. }
  4539. /* Make nMaxCells a multiple of 2 in order to preserve 8-byte
  4540. ** alignment */
  4541. nMaxCells = (nMaxCells + 1)&~1;
  4542. /*
  4543. ** Allocate space for memory structures
  4544. */
  4545. apCell = sqlite3_malloc(
  4546. nMaxCells*sizeof(u8*) /* apCell */
  4547. + nMaxCells*sizeof(int) /* szCell */
  4548. + ROUND8(sizeof(MemPage))*NB /* aCopy */
  4549. + pBt->pageSize*(5+NB) /* aSpace */
  4550. + (ISAUTOVACUUM ? nMaxCells : 0) /* aFrom */
  4551. );
  4552. if( apCell==0 ){
  4553. rc = SQLITE_NOMEM;
  4554. goto balance_cleanup;
  4555. }
  4556. szCell = (int*)&apCell[nMaxCells];
  4557. aCopy[0] = (u8*)&szCell[nMaxCells];
  4558. assert( ((aCopy[0] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
  4559. for(i=1; i<NB; i++){
  4560. aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
  4561. assert( ((aCopy[i] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
  4562. }
  4563. aSpace = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
  4564. assert( ((aSpace - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
  4565. #ifndef SQLITE_OMIT_AUTOVACUUM
  4566. if( pBt->autoVacuum ){
  4567. aFrom = &aSpace[5*pBt->pageSize];
  4568. }
  4569. #endif
  4570. /*
  4571. ** Make copies of the content of pPage and its siblings into aOld[].
  4572. ** The rest of this function will use data from the copies rather
  4573. ** that the original pages since the original pages will be in the
  4574. ** process of being overwritten.
  4575. */
  4576. for(i=0; i<nOld; i++){
  4577. MemPage *p = apCopy[i] = (MemPage*)aCopy[i];
  4578. memcpy(p, apOld[i], sizeof(MemPage));
  4579. p->aData = (void*)&p[1];
  4580. memcpy(p->aData, apOld[i]->aData, pBt->pageSize);
  4581. }
  4582. /*
  4583. ** Load pointers to all cells on sibling pages and the divider cells
  4584. ** into the local apCell[] array. Make copies of the divider cells
  4585. ** into space obtained form aSpace[] and remove the the divider Cells
  4586. ** from pParent.
  4587. **
  4588. ** If the siblings are on leaf pages, then the child pointers of the
  4589. ** divider cells are stripped from the cells before they are copied
  4590. ** into aSpace[]. In this way, all cells in apCell[] are without
  4591. ** child pointers. If siblings are not leaves, then all cell in
  4592. ** apCell[] include child pointers. Either way, all cells in apCell[]
  4593. ** are alike.
  4594. **
  4595. ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf.
  4596. ** leafData: 1 if pPage holds key+data and pParent holds only keys.
  4597. */
  4598. nCell = 0;
  4599. leafCorrection = pPage->leaf*4;
  4600. leafData = pPage->leafData && pPage->leaf;
  4601. for(i=0; i<nOld; i++){
  4602. MemPage *pOld = apCopy[i];
  4603. int limit = pOld->nCell+pOld->nOverflow;
  4604. for(j=0; j<limit; j++){
  4605. assert( nCell<nMaxCells );
  4606. apCell[nCell] = findOverflowCell(pOld, j);
  4607. szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
  4608. #ifndef SQLITE_OMIT_AUTOVACUUM
  4609. if( pBt->autoVacuum ){
  4610. int a;
  4611. aFrom[nCell] = i;
  4612. for(a=0; a<pOld->nOverflow; a++){
  4613. if( pOld->aOvfl[a].pCell==apCell[nCell] ){
  4614. aFrom[nCell] = 0xFF;
  4615. break;
  4616. }
  4617. }
  4618. }
  4619. #endif
  4620. nCell++;
  4621. }
  4622. if( i<nOld-1 ){
  4623. int sz = cellSizePtr(pParent, apDiv[i]);
  4624. if( leafData ){
  4625. /* With the LEAFDATA flag, pParent cells hold only INTKEYs that
  4626. ** are duplicates of keys on the child pages. We need to remove
  4627. ** the divider cells from pParent, but the dividers cells are not
  4628. ** added to apCell[] because they are duplicates of child cells.
  4629. */
  4630. dropCell(pParent, nxDiv, sz);
  4631. }else{
  4632. u8 *pTemp;
  4633. assert( nCell<nMaxCells );
  4634. szCell[nCell] = sz;
  4635. pTemp = &aSpace[iSpace];
  4636. iSpace += sz;
  4637. assert( iSpace<=pBt->pageSize*5 );
  4638. memcpy(pTemp, apDiv[i], sz);
  4639. apCell[nCell] = pTemp+leafCorrection;
  4640. #ifndef SQLITE_OMIT_AUTOVACUUM
  4641. if( pBt->autoVacuum ){
  4642. aFrom[nCell] = 0xFF;
  4643. }
  4644. #endif
  4645. dropCell(pParent, nxDiv, sz);
  4646. szCell[nCell] -= leafCorrection;
  4647. assert( get4byte(pTemp)==pgnoOld[i] );
  4648. if( !pOld->leaf ){
  4649. assert( leafCorrection==0 );
  4650. /* The right pointer of the child page pOld becomes the left
  4651. ** pointer of the divider cell */
  4652. memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4);
  4653. }else{
  4654. assert( leafCorrection==4 );
  4655. if( szCell[nCell]<4 ){
  4656. /* Do not allow any cells smaller than 4 bytes. */
  4657. szCell[nCell] = 4;
  4658. }
  4659. }
  4660. nCell++;
  4661. }
  4662. }
  4663. }
  4664. /*
  4665. ** Figure out the number of pages needed to hold all nCell cells.
  4666. ** Store this number in "k". Also compute szNew[] which is the total
  4667. ** size of all cells on the i-th page and cntNew[] which is the index
  4668. ** in apCell[] of the cell that divides page i from page i+1.
  4669. ** cntNew[k] should equal nCell.
  4670. **
  4671. ** Values computed by this block:
  4672. **
  4673. ** k: The total number of sibling pages
  4674. ** szNew[i]: Spaced used on the i-th sibling page.
  4675. ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to
  4676. ** the right of the i-th sibling page.
  4677. ** usableSpace: Number of bytes of space available on each sibling.
  4678. **
  4679. */
  4680. usableSpace = pBt->usableSize - 12 + leafCorrection;
  4681. for(subtotal=k=i=0; i<nCell; i++){
  4682. assert( i<nMaxCells );
  4683. subtotal += szCell[i] + 2;
  4684. if( subtotal > usableSpace ){
  4685. szNew[k] = subtotal - szCell[i];
  4686. cntNew[k] = i;
  4687. if( leafData ){ i--; }
  4688. subtotal = 0;
  4689. k++;
  4690. }
  4691. }
  4692. szNew[k] = subtotal;
  4693. cntNew[k] = nCell;
  4694. k++;
  4695. /*
  4696. ** The packing computed by the previous block is biased toward the siblings
  4697. ** on the left side. The left siblings are always nearly full, while the
  4698. ** right-most sibling might be nearly empty. This block of code attempts
  4699. ** to adjust the packing of siblings to get a better balance.
  4700. **
  4701. ** This adjustment is more than an optimization. The packing above might
  4702. ** be so out of balance as to be illegal. For example, the right-most
  4703. ** sibling might be completely empty. This adjustment is not optional.
  4704. */
  4705. for(i=k-1; i>0; i--){
  4706. int szRight = szNew[i]; /* Size of sibling on the right */
  4707. int szLeft = szNew[i-1]; /* Size of sibling on the left */
  4708. int r; /* Index of right-most cell in left sibling */
  4709. int d; /* Index of first cell to the left of right sibling */
  4710. r = cntNew[i-1] - 1;
  4711. d = r + 1 - leafData;
  4712. assert( d<nMaxCells );
  4713. assert( r<nMaxCells );
  4714. while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){
  4715. szRight += szCell[d] + 2;
  4716. szLeft -= szCell[r] + 2;
  4717. cntNew[i-1]--;
  4718. r = cntNew[i-1] - 1;
  4719. d = r + 1 - leafData;
  4720. }
  4721. szNew[i] = szRight;
  4722. szNew[i-1] = szLeft;
  4723. }
  4724. /* Either we found one or more cells (cntnew[0])>0) or we are the
  4725. ** a virtual root page. A virtual root page is when the real root
  4726. ** page is page 1 and we are the only child of that page.
  4727. */
  4728. assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) );
  4729. /*
  4730. ** Allocate k new pages. Reuse old pages where possible.
  4731. */
  4732. assert( pPage->pgno>1 );
  4733. pageFlags = pPage->aData[0];
  4734. for(i=0; i<k; i++){
  4735. MemPage *pNew;
  4736. if( i<nOld ){
  4737. pNew = apNew[i] = apOld[i];
  4738. pgnoNew[i] = pgnoOld[i];
  4739. apOld[i] = 0;
  4740. rc = sqlite3PagerWrite(pNew->pDbPage);
  4741. nNew++;
  4742. if( rc ) goto balance_cleanup;
  4743. }else{
  4744. assert( i>0 );
  4745. rc = allocateBtreePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1], 0);
  4746. if( rc ) goto balance_cleanup;
  4747. apNew[i] = pNew;
  4748. nNew++;
  4749. }
  4750. zeroPage(pNew, pageFlags);
  4751. }
  4752. /* Free any old pages that were not reused as new pages.
  4753. */
  4754. while( i<nOld ){
  4755. rc = freePage(apOld[i]);
  4756. if( rc ) goto balance_cleanup;
  4757. releasePage(apOld[i]);
  4758. apOld[i] = 0;
  4759. i++;
  4760. }
  4761. /*
  4762. ** Put the new pages in accending order. This helps to
  4763. ** keep entries in the disk file in order so that a scan
  4764. ** of the table is a linear scan through the file. That
  4765. ** in turn helps the operating system to deliver pages
  4766. ** from the disk more rapidly.
  4767. **
  4768. ** An O(n^2) insertion sort algorithm is used, but since
  4769. ** n is never more than NB (a small constant), that should
  4770. ** not be a problem.
  4771. **
  4772. ** When NB==3, this one optimization makes the database
  4773. ** about 25% faster for large insertions and deletions.
  4774. */
  4775. for(i=0; i<k-1; i++){
  4776. int minV = pgnoNew[i];
  4777. int minI = i;
  4778. for(j=i+1; j<k; j++){
  4779. if( pgnoNew[j]<(unsigned)minV ){
  4780. minI = j;
  4781. minV = pgnoNew[j];
  4782. }
  4783. }
  4784. if( minI>i ){
  4785. int t;
  4786. MemPage *pT;
  4787. t = pgnoNew[i];
  4788. pT = apNew[i];
  4789. pgnoNew[i] = pgnoNew[minI];
  4790. apNew[i] = apNew[minI];
  4791. pgnoNew[minI] = t;
  4792. apNew[minI] = pT;
  4793. }
  4794. }
  4795. TRACE(("BALANCE: old: %d %d %d new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n",
  4796. pgnoOld[0],
  4797. nOld>=2 ? pgnoOld[1] : 0,
  4798. nOld>=3 ? pgnoOld[2] : 0,
  4799. pgnoNew[0], szNew[0],
  4800. nNew>=2 ? pgnoNew[1] : 0, nNew>=2 ? szNew[1] : 0,
  4801. nNew>=3 ? pgnoNew[2] : 0, nNew>=3 ? szNew[2] : 0,
  4802. nNew>=4 ? pgnoNew[3] : 0, nNew>=4 ? szNew[3] : 0,
  4803. nNew>=5 ? pgnoNew[4] : 0, nNew>=5 ? szNew[4] : 0));
  4804. /*
  4805. ** Evenly distribute the data in apCell[] across the new pages.
  4806. ** Insert divider cells into pParent as necessary.
  4807. */
  4808. j = 0;
  4809. for(i=0; i<nNew; i++){
  4810. /* Assemble the new sibling page. */
  4811. MemPage *pNew = apNew[i];
  4812. assert( j<nMaxCells );
  4813. assert( pNew->pgno==pgnoNew[i] );
  4814. assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
  4815. assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) );
  4816. assert( pNew->nOverflow==0 );
  4817. #ifndef SQLITE_OMIT_AUTOVACUUM
  4818. /* If this is an auto-vacuum database, update the pointer map entries
  4819. ** that point to the siblings that were rearranged. These can be: left
  4820. ** children of cells, the right-child of the page, or overflow pages
  4821. ** pointed to by cells.
  4822. */
  4823. if( pBt->autoVacuum ){
  4824. for(k=j; k<cntNew[i]; k++){
  4825. assert( k<nMaxCells );
  4826. if( aFrom[k]==0xFF || apCopy[aFrom[k]]->pgno!=pNew->pgno ){
  4827. rc = ptrmapPutOvfl(pNew, k-j);
  4828. if( rc!=SQLITE_OK ){
  4829. goto balance_cleanup;
  4830. }
  4831. }
  4832. }
  4833. }
  4834. #endif
  4835. j = cntNew[i];
  4836. /* If the sibling page assembled above was not the right-most sibling,
  4837. ** insert a divider cell into the parent page.
  4838. */
  4839. if( i<nNew-1 && j<nCell ){
  4840. u8 *pCell;
  4841. u8 *pTemp;
  4842. int sz;
  4843. assert( j<nMaxCells );
  4844. pCell = apCell[j];
  4845. sz = szCell[j] + leafCorrection;
  4846. if( !pNew->leaf ){
  4847. memcpy(&pNew->aData[8], pCell, 4);
  4848. pTemp = 0;
  4849. }else if( leafData ){
  4850. /* If the tree is a leaf-data tree, and the siblings are leaves,
  4851. ** then there is no divider cell in apCell[]. Instead, the divider
  4852. ** cell consists of the integer key for the right-most cell of
  4853. ** the sibling-page assembled above only.
  4854. */
  4855. CellInfo info;
  4856. j--;
  4857. sqlite3BtreeParseCellPtr(pNew, apCell[j], &info);
  4858. pCell = &aSpace[iSpace];
  4859. fillInCell(pParent, pCell, 0, info.nKey, 0, 0, 0, &sz);
  4860. iSpace += sz;
  4861. assert( iSpace<=pBt->pageSize*5 );
  4862. pTemp = 0;
  4863. }else{
  4864. pCell -= 4;
  4865. pTemp = &aSpace[iSpace];
  4866. iSpace += sz;
  4867. assert( iSpace<=pBt->pageSize*5 );
  4868. /* Obscure case for non-leaf-data trees: If the cell at pCell was
  4869. ** previously stored on a leaf node, and its reported size was 4
  4870. ** bytes, then it may actually be smaller than this
  4871. ** (see sqlite3BtreeParseCellPtr(), 4 bytes is the minimum size of
  4872. ** any cell). But it is important to pass the correct size to
  4873. ** insertCell(), so reparse the cell now.
  4874. **
  4875. ** Note that this can never happen in an SQLite data file, as all
  4876. ** cells are at least 4 bytes. It only happens in b-trees used
  4877. ** to evaluate "IN (SELECT ...)" and similar clauses.
  4878. */
  4879. if( szCell[j]==4 ){
  4880. assert(leafCorrection==4);
  4881. sz = cellSizePtr(pParent, pCell);
  4882. }
  4883. }
  4884. rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4);
  4885. if( rc!=SQLITE_OK ) goto balance_cleanup;
  4886. put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);
  4887. #ifndef SQLITE_OMIT_AUTOVACUUM
  4888. /* If this is an auto-vacuum database, and not a leaf-data tree,
  4889. ** then update the pointer map with an entry for the overflow page
  4890. ** that the cell just inserted points to (if any).
  4891. */
  4892. if( pBt->autoVacuum && !leafData ){
  4893. rc = ptrmapPutOvfl(pParent, nxDiv);
  4894. if( rc!=SQLITE_OK ){
  4895. goto balance_cleanup;
  4896. }
  4897. }
  4898. #endif
  4899. j++;
  4900. nxDiv++;
  4901. }
  4902. }
  4903. assert( j==nCell );
  4904. assert( nOld>0 );
  4905. assert( nNew>0 );
  4906. if( (pageFlags & PTF_LEAF)==0 ){
  4907. memcpy(&apNew[nNew-1]->aData[8], &apCopy[nOld-1]->aData[8], 4);
  4908. }
  4909. if( nxDiv==pParent->nCell+pParent->nOverflow ){
  4910. /* Right-most sibling is the right-most child of pParent */
  4911. put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]);
  4912. }else{
  4913. /* Right-most sibling is the left child of the first entry in pParent
  4914. ** past the right-most divider entry */
  4915. put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]);
  4916. }
  4917. /*
  4918. ** Reparent children of all cells.
  4919. */
  4920. for(i=0; i<nNew; i++){
  4921. rc = reparentChildPages(apNew[i]);
  4922. if( rc!=SQLITE_OK ) goto balance_cleanup;
  4923. }
  4924. rc = reparentChildPages(pParent);
  4925. if( rc!=SQLITE_OK ) goto balance_cleanup;
  4926. /*
  4927. ** Balance the parent page. Note that the current page (pPage) might
  4928. ** have been added to the freelist so it might no longer be initialized.
  4929. ** But the parent page will always be initialized.
  4930. */
  4931. assert( pParent->isInit );
  4932. rc = balance(pParent, 0);
  4933. /*
  4934. ** Cleanup before returning.
  4935. */
  4936. balance_cleanup:
  4937. sqlite3_free(apCell);
  4938. for(i=0; i<nOld; i++){
  4939. releasePage(apOld[i]);
  4940. }
  4941. for(i=0; i<nNew; i++){
  4942. releasePage(apNew[i]);
  4943. }
  4944. releasePage(pParent);
  4945. TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
  4946. pPage->pgno, nOld, nNew, nCell));
  4947. return rc;
  4948. }
  4949. /*
  4950. ** This routine is called for the root page of a btree when the root
  4951. ** page contains no cells. This is an opportunity to make the tree
  4952. ** shallower by one level.
  4953. */
  4954. static int balance_shallower(MemPage *pPage){
  4955. MemPage *pChild; /* The only child page of pPage */
  4956. Pgno pgnoChild; /* Page number for pChild */
  4957. int rc = SQLITE_OK; /* Return code from subprocedures */
  4958. BtShared *pBt; /* The main BTree structure */
  4959. int mxCellPerPage; /* Maximum number of cells per page */
  4960. u8 **apCell; /* All cells from pages being balanced */
  4961. int *szCell; /* Local size of all cells */
  4962. assert( pPage->pParent==0 );
  4963. assert( pPage->nCell==0 );
  4964. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  4965. pBt = pPage->pBt;
  4966. mxCellPerPage = MX_CELL(pBt);
  4967. apCell = sqlite3_malloc( mxCellPerPage*(sizeof(u8*)+sizeof(int)) );
  4968. if( apCell==0 ) return SQLITE_NOMEM;
  4969. szCell = (int*)&apCell[mxCellPerPage];
  4970. if( pPage->leaf ){
  4971. /* The table is completely empty */
  4972. TRACE(("BALANCE: empty table %d\n", pPage->pgno));
  4973. }else{
  4974. /* The root page is empty but has one child. Transfer the
  4975. ** information from that one child into the root page if it
  4976. ** will fit. This reduces the depth of the tree by one.
  4977. **
  4978. ** If the root page is page 1, it has less space available than
  4979. ** its child (due to the 100 byte header that occurs at the beginning
  4980. ** of the database fle), so it might not be able to hold all of the
  4981. ** information currently contained in the child. If this is the
  4982. ** case, then do not do the transfer. Leave page 1 empty except
  4983. ** for the right-pointer to the child page. The child page becomes
  4984. ** the virtual root of the tree.
  4985. */
  4986. pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
  4987. assert( pgnoChild>0 );
  4988. assert( pgnoChild<=sqlite3PagerPagecount(pPage->pBt->pPager) );
  4989. rc = sqlite3BtreeGetPage(pPage->pBt, pgnoChild, &pChild, 0);
  4990. if( rc ) goto end_shallow_balance;
  4991. if( pPage->pgno==1 ){
  4992. rc = sqlite3BtreeInitPage(pChild, pPage);
  4993. if( rc ) goto end_shallow_balance;
  4994. assert( pChild->nOverflow==0 );
  4995. if( pChild->nFree>=100 ){
  4996. /* The child information will fit on the root page, so do the
  4997. ** copy */
  4998. int i;
  4999. zeroPage(pPage, pChild->aData[0]);
  5000. for(i=0; i<pChild->nCell; i++){
  5001. apCell[i] = findCell(pChild,i);
  5002. szCell[i] = cellSizePtr(pChild, apCell[i]);
  5003. }
  5004. assemblePage(pPage, pChild->nCell, apCell, szCell);
  5005. /* Copy the right-pointer of the child to the parent. */
  5006. put4byte(&pPage->aData[pPage->hdrOffset+8],
  5007. get4byte(&pChild->aData[pChild->hdrOffset+8]));
  5008. freePage(pChild);
  5009. TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
  5010. }else{
  5011. /* The child has more information that will fit on the root.
  5012. ** The tree is already balanced. Do nothing. */
  5013. TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
  5014. }
  5015. }else{
  5016. memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
  5017. pPage->isInit = 0;
  5018. pPage->pParent = 0;
  5019. rc = sqlite3BtreeInitPage(pPage, 0);
  5020. assert( rc==SQLITE_OK );
  5021. freePage(pChild);
  5022. TRACE(("BALANCE: transfer child %d into root %d\n",
  5023. pChild->pgno, pPage->pgno));
  5024. }
  5025. rc = reparentChildPages(pPage);
  5026. assert( pPage->nOverflow==0 );
  5027. #ifndef SQLITE_OMIT_AUTOVACUUM
  5028. if( pBt->autoVacuum ){
  5029. int i;
  5030. for(i=0; i<pPage->nCell; i++){
  5031. rc = ptrmapPutOvfl(pPage, i);
  5032. if( rc!=SQLITE_OK ){
  5033. goto end_shallow_balance;
  5034. }
  5035. }
  5036. }
  5037. #endif
  5038. releasePage(pChild);
  5039. }
  5040. end_shallow_balance:
  5041. sqlite3_free(apCell);
  5042. return rc;
  5043. }
  5044. /*
  5045. ** The root page is overfull
  5046. **
  5047. ** When this happens, Create a new child page and copy the
  5048. ** contents of the root into the child. Then make the root
  5049. ** page an empty page with rightChild pointing to the new
  5050. ** child. Finally, call balance_internal() on the new child
  5051. ** to cause it to split.
  5052. */
  5053. static int balance_deeper(MemPage *pPage){
  5054. int rc; /* Return value from subprocedures */
  5055. MemPage *pChild; /* Pointer to a new child page */
  5056. Pgno pgnoChild; /* Page number of the new child page */
  5057. BtShared *pBt; /* The BTree */
  5058. int usableSize; /* Total usable size of a page */
  5059. u8 *data; /* Content of the parent page */
  5060. u8 *cdata; /* Content of the child page */
  5061. int hdr; /* Offset to page header in parent */
  5062. int brk; /* Offset to content of first cell in parent */
  5063. assert( pPage->pParent==0 );
  5064. assert( pPage->nOverflow>0 );
  5065. pBt = pPage->pBt;
  5066. assert( sqlite3_mutex_held(pBt->mutex) );
  5067. rc = allocateBtreePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0);
  5068. if( rc ) return rc;
  5069. assert( sqlite3PagerIswriteable(pChild->pDbPage) );
  5070. usableSize = pBt->usableSize;
  5071. data = pPage->aData;
  5072. hdr = pPage->hdrOffset;
  5073. brk = get2byte(&data[hdr+5]);
  5074. cdata = pChild->aData;
  5075. memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
  5076. memcpy(&cdata[brk], &data[brk], usableSize-brk);
  5077. assert( pChild->isInit==0 );
  5078. rc = sqlite3BtreeInitPage(pChild, pPage);
  5079. if( rc ) goto balancedeeper_out;
  5080. memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
  5081. pChild->nOverflow = pPage->nOverflow;
  5082. if( pChild->nOverflow ){
  5083. pChild->nFree = 0;
  5084. }
  5085. assert( pChild->nCell==pPage->nCell );
  5086. zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
  5087. put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
  5088. TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno));
  5089. #ifndef SQLITE_OMIT_AUTOVACUUM
  5090. if( pBt->autoVacuum ){
  5091. int i;
  5092. rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno);
  5093. if( rc ) goto balancedeeper_out;
  5094. for(i=0; i<pChild->nCell; i++){
  5095. rc = ptrmapPutOvfl(pChild, i);
  5096. if( rc!=SQLITE_OK ){
  5097. return rc;
  5098. }
  5099. }
  5100. }
  5101. #endif
  5102. rc = balance_nonroot(pChild);
  5103. balancedeeper_out:
  5104. releasePage(pChild);
  5105. return rc;
  5106. }
  5107. /*
  5108. ** Decide if the page pPage needs to be balanced. If balancing is
  5109. ** required, call the appropriate balancing routine.
  5110. */
  5111. static int balance(MemPage *pPage, int insert){
  5112. int rc = SQLITE_OK;
  5113. assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  5114. if( pPage->pParent==0 ){
  5115. rc = sqlite3PagerWrite(pPage->pDbPage);
  5116. if( rc==SQLITE_OK && pPage->nOverflow>0 ){
  5117. rc = balance_deeper(pPage);
  5118. }
  5119. if( rc==SQLITE_OK && pPage->nCell==0 ){
  5120. rc = balance_shallower(pPage);
  5121. }
  5122. }else{
  5123. if( pPage->nOverflow>0 ||
  5124. (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
  5125. rc = balance_nonroot(pPage);
  5126. }
  5127. }
  5128. return rc;
  5129. }
  5130. /*
  5131. ** This routine checks all cursors that point to table pgnoRoot.
  5132. ** If any of those cursors were opened with wrFlag==0 in a different
  5133. ** database connection (a database connection that shares the pager
  5134. ** cache with the current connection) and that other connection
  5135. ** is not in the ReadUncommmitted state, then this routine returns
  5136. ** SQLITE_LOCKED.
  5137. **
  5138. ** In addition to checking for read-locks (where a read-lock
  5139. ** means a cursor opened with wrFlag==0) this routine also moves
  5140. ** all write cursors so that they are pointing to the
  5141. ** first Cell on the root page. This is necessary because an insert
  5142. ** or delete might change the number of cells on a page or delete
  5143. ** a page entirely and we do not want to leave any cursors
  5144. ** pointing to non-existant pages or cells.
  5145. */
  5146. static int checkReadLocks(Btree *pBtree, Pgno pgnoRoot, BtCursor *pExclude){
  5147. BtCursor *p;
  5148. BtShared *pBt = pBtree->pBt;
  5149. sqlite3 *db = pBtree->db;
  5150. assert( sqlite3BtreeHoldsMutex(pBtree) );
  5151. for(p=pBt->pCursor; p; p=p->pNext){
  5152. if( p==pExclude ) continue;
  5153. if( p->eState!=CURSOR_VALID ) continue;
  5154. if( p->pgnoRoot!=pgnoRoot ) continue;
  5155. if( p->wrFlag==0 ){
  5156. sqlite3 *dbOther = p->pBtree->db;
  5157. if( dbOther==0 ||
  5158. (dbOther!=db && (dbOther->flags & SQLITE_ReadUncommitted)==0) ){
  5159. return SQLITE_LOCKED;
  5160. }
  5161. }else if( p->pPage->pgno!=p->pgnoRoot ){
  5162. moveToRoot(p);
  5163. }
  5164. }
  5165. return SQLITE_OK;
  5166. }
  5167. /*
  5168. ** Insert a new record into the BTree. The key is given by (pKey,nKey)
  5169. ** and the data is given by (pData,nData). The cursor is used only to
  5170. ** define what table the record should be inserted into. The cursor
  5171. ** is left pointing at a random location.
  5172. **
  5173. ** For an INTKEY table, only the nKey value of the key is used. pKey is
  5174. ** ignored. For a ZERODATA table, the pData and nData are both ignored.
  5175. */
  5176. int sqlite3BtreeInsert(
  5177. BtCursor *pCur, /* Insert data into the table of this cursor */
  5178. const void *pKey, i64 nKey, /* The key of the new record */
  5179. const void *pData, int nData, /* The data of the new record */
  5180. int nZero, /* Number of extra 0 bytes to append to data */
  5181. int appendBias /* True if this is likely an append */
  5182. ){
  5183. int rc;
  5184. int loc;
  5185. int szNew;
  5186. MemPage *pPage;
  5187. Btree *p = pCur->pBtree;
  5188. BtShared *pBt = p->pBt;
  5189. unsigned char *oldCell;
  5190. unsigned char *newCell = 0;
  5191. assert( cursorHoldsMutex(pCur) );
  5192. if( pBt->inTransaction!=TRANS_WRITE ){
  5193. /* Must start a transaction before doing an insert */
  5194. rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
  5195. return rc;
  5196. }
  5197. assert( !pBt->readOnly );
  5198. if( !pCur->wrFlag ){
  5199. return SQLITE_PERM; /* Cursor not open for writing */
  5200. }
  5201. if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){
  5202. return SQLITE_LOCKED; /* The table pCur points to has a read lock */
  5203. }
  5204. if( pCur->eState==CURSOR_FAULT ){
  5205. return pCur->skip;
  5206. }
  5207. /* Save the positions of any other cursors open on this table */
  5208. clearCursorPosition(pCur);
  5209. if(
  5210. SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) ||
  5211. SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, nKey, appendBias, &loc))
  5212. ){
  5213. return rc;
  5214. }
  5215. pPage = pCur->pPage;
  5216. assert( pPage->intKey || nKey>=0 );
  5217. assert( pPage->leaf || !pPage->leafData );
  5218. TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
  5219. pCur->pgnoRoot, nKey, nData, pPage->pgno,
  5220. loc==0 ? "overwrite" : "new entry"));
  5221. assert( pPage->isInit );
  5222. newCell = sqlite3_malloc( MX_CELL_SIZE(pBt) );
  5223. if( newCell==0 ) return SQLITE_NOMEM;
  5224. rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
  5225. if( rc ) goto end_insert;
  5226. assert( szNew==cellSizePtr(pPage, newCell) );
  5227. assert( szNew<=MX_CELL_SIZE(pBt) );
  5228. if( loc==0 && CURSOR_VALID==pCur->eState ){
  5229. int szOld;
  5230. assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
  5231. rc = sqlite3PagerWrite(pPage->pDbPage);
  5232. if( rc ){
  5233. goto end_insert;
  5234. }
  5235. oldCell = findCell(pPage, pCur->idx);
  5236. if( !pPage->leaf ){
  5237. memcpy(newCell, oldCell, 4);
  5238. }
  5239. szOld = cellSizePtr(pPage, oldCell);
  5240. rc = clearCell(pPage, oldCell);
  5241. if( rc ) goto end_insert;
  5242. dropCell(pPage, pCur->idx, szOld);
  5243. }else if( loc<0 && pPage->nCell>0 ){
  5244. assert( pPage->leaf );
  5245. pCur->idx++;
  5246. pCur->info.nSize = 0;
  5247. }else{
  5248. assert( pPage->leaf );
  5249. }
  5250. rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
  5251. if( rc!=SQLITE_OK ) goto end_insert;
  5252. rc = balance(pPage, 1);
  5253. /* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
  5254. /* fflush(stdout); */
  5255. if( rc==SQLITE_OK ){
  5256. moveToRoot(pCur);
  5257. }
  5258. end_insert:
  5259. sqlite3_free(newCell);
  5260. return rc;
  5261. }
  5262. /*
  5263. ** Delete the entry that the cursor is pointing to. The cursor
  5264. ** is left pointing at a random location.
  5265. */
  5266. int sqlite3BtreeDelete(BtCursor *pCur){
  5267. MemPage *pPage = pCur->pPage;
  5268. unsigned char *pCell;
  5269. int rc;
  5270. Pgno pgnoChild = 0;
  5271. Btree *p = pCur->pBtree;
  5272. BtShared *pBt = p->pBt;
  5273. assert( cursorHoldsMutex(pCur) );
  5274. assert( pPage->isInit );
  5275. if( pBt->inTransaction!=TRANS_WRITE ){
  5276. /* Must start a transaction before doing a delete */
  5277. rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
  5278. return rc;
  5279. }
  5280. assert( !pBt->readOnly );
  5281. if( pCur->eState==CURSOR_FAULT ){
  5282. return pCur->skip;
  5283. }
  5284. if( pCur->idx >= pPage->nCell ){
  5285. return SQLITE_ERROR; /* The cursor is not pointing to anything */
  5286. }
  5287. if( !pCur->wrFlag ){
  5288. return SQLITE_PERM; /* Did not open this cursor for writing */
  5289. }
  5290. if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){
  5291. return SQLITE_LOCKED; /* The table pCur points to has a read lock */
  5292. }
  5293. /* Restore the current cursor position (a no-op if the cursor is not in
  5294. ** CURSOR_REQUIRESEEK state) and save the positions of any other cursors
  5295. ** open on the same table. Then call sqlite3PagerWrite() on the page
  5296. ** that the entry will be deleted from.
  5297. */
  5298. if(
  5299. (rc = restoreOrClearCursorPosition(pCur))!=0 ||
  5300. (rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur))!=0 ||
  5301. (rc = sqlite3PagerWrite(pPage->pDbPage))!=0
  5302. ){
  5303. return rc;
  5304. }
  5305. /* Locate the cell within its page and leave pCell pointing to the
  5306. ** data. The clearCell() call frees any overflow pages associated with the
  5307. ** cell. The cell itself is still intact.
  5308. */
  5309. pCell = findCell(pPage, pCur->idx);
  5310. if( !pPage->leaf ){
  5311. pgnoChild = get4byte(pCell);
  5312. }
  5313. rc = clearCell(pPage, pCell);
  5314. if( rc ){
  5315. return rc;
  5316. }
  5317. if( !pPage->leaf ){
  5318. /*
  5319. ** The entry we are about to delete is not a leaf so if we do not
  5320. ** do something we will leave a hole on an internal page.
  5321. ** We have to fill the hole by moving in a cell from a leaf. The
  5322. ** next Cell after the one to be deleted is guaranteed to exist and
  5323. ** to be a leaf so we can use it.
  5324. */
  5325. BtCursor leafCur;
  5326. unsigned char *pNext;
  5327. int szNext; /* The compiler warning is wrong: szNext is always
  5328. ** initialized before use. Adding an extra initialization
  5329. ** to silence the compiler slows down the code. */
  5330. int notUsed;
  5331. unsigned char *tempCell = 0;
  5332. assert( !pPage->leafData );
  5333. sqlite3BtreeGetTempCursor(pCur, &leafCur);
  5334. rc = sqlite3BtreeNext(&leafCur, &notUsed);
  5335. if( rc==SQLITE_OK ){
  5336. rc = sqlite3PagerWrite(leafCur.pPage->pDbPage);
  5337. }
  5338. if( rc==SQLITE_OK ){
  5339. TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
  5340. pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
  5341. dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
  5342. pNext = findCell(leafCur.pPage, leafCur.idx);
  5343. szNext = cellSizePtr(leafCur.pPage, pNext);
  5344. assert( MX_CELL_SIZE(pBt)>=szNext+4 );
  5345. tempCell = sqlite3_malloc( MX_CELL_SIZE(pBt) );
  5346. if( tempCell==0 ){
  5347. rc = SQLITE_NOMEM;
  5348. }
  5349. }
  5350. if( rc==SQLITE_OK ){
  5351. rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
  5352. }
  5353. if( rc==SQLITE_OK ){
  5354. put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
  5355. rc = balance(pPage, 0);
  5356. }
  5357. if( rc==SQLITE_OK ){
  5358. dropCell(leafCur.pPage, leafCur.idx, szNext);
  5359. rc = balance(leafCur.pPage, 0);
  5360. }
  5361. sqlite3_free(tempCell);
  5362. sqlite3BtreeReleaseTempCursor(&leafCur);
  5363. }else{
  5364. TRACE(("DELETE: table=%d delete from leaf %d\n",
  5365. pCur->pgnoRoot, pPage->pgno));
  5366. dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
  5367. rc = balance(pPage, 0);
  5368. }
  5369. if( rc==SQLITE_OK ){
  5370. moveToRoot(pCur);
  5371. }
  5372. return rc;
  5373. }
  5374. /*
  5375. ** Create a new BTree table. Write into *piTable the page
  5376. ** number for the root page of the new table.
  5377. **
  5378. ** The type of type is determined by the flags parameter. Only the
  5379. ** following values of flags are currently in use. Other values for
  5380. ** flags might not work:
  5381. **
  5382. ** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys
  5383. ** BTREE_ZERODATA Used for SQL indices
  5384. */
  5385. static int btreeCreateTable(Btree *p, int *piTable, int flags){
  5386. BtShared *pBt = p->pBt;
  5387. MemPage *pRoot;
  5388. Pgno pgnoRoot;
  5389. int rc;
  5390. assert( sqlite3BtreeHoldsMutex(p) );
  5391. if( pBt->inTransaction!=TRANS_WRITE ){
  5392. /* Must start a transaction first */
  5393. rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
  5394. return rc;
  5395. }
  5396. assert( !pBt->readOnly );
  5397. #ifdef SQLITE_OMIT_AUTOVACUUM
  5398. rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
  5399. if( rc ){
  5400. return rc;
  5401. }
  5402. #else
  5403. if( pBt->autoVacuum ){
  5404. Pgno pgnoMove; /* Move a page here to make room for the root-page */
  5405. MemPage *pPageMove; /* The page to move to. */
  5406. /* Creating a new table may probably require moving an existing database
  5407. ** to make room for the new tables root page. In case this page turns
  5408. ** out to be an overflow page, delete all overflow page-map caches
  5409. ** held by open cursors.
  5410. */
  5411. invalidateAllOverflowCache(pBt);
  5412. /* Read the value of meta[3] from the database to determine where the
  5413. ** root page of the new table should go. meta[3] is the largest root-page
  5414. ** created so far, so the new root-page is (meta[3]+1).
  5415. */
  5416. rc = sqlite3BtreeGetMeta(p, 4, &pgnoRoot);
  5417. if( rc!=SQLITE_OK ){
  5418. return rc;
  5419. }
  5420. pgnoRoot++;
  5421. /* The new root-page may not be allocated on a pointer-map page, or the
  5422. ** PENDING_BYTE page.
  5423. */
  5424. if( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
  5425. pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
  5426. pgnoRoot++;
  5427. }
  5428. assert( pgnoRoot>=3 );
  5429. /* Allocate a page. The page that currently resides at pgnoRoot will
  5430. ** be moved to the allocated page (unless the allocated page happens
  5431. ** to reside at pgnoRoot).
  5432. */
  5433. rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1);
  5434. if( rc!=SQLITE_OK ){
  5435. return rc;
  5436. }
  5437. if( pgnoMove!=pgnoRoot ){
  5438. /* pgnoRoot is the page that will be used for the root-page of
  5439. ** the new table (assuming an error did not occur). But we were
  5440. ** allocated pgnoMove. If required (i.e. if it was not allocated
  5441. ** by extending the file), the current page at position pgnoMove
  5442. ** is already journaled.
  5443. */
  5444. u8 eType;
  5445. Pgno iPtrPage;
  5446. releasePage(pPageMove);
  5447. /* Move the page currently at pgnoRoot to pgnoMove. */
  5448. rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0);
  5449. if( rc!=SQLITE_OK ){
  5450. return rc;
  5451. }
  5452. rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
  5453. if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
  5454. releasePage(pRoot);
  5455. return rc;
  5456. }
  5457. assert( eType!=PTRMAP_ROOTPAGE );
  5458. assert( eType!=PTRMAP_FREEPAGE );
  5459. rc = sqlite3PagerWrite(pRoot->pDbPage);
  5460. if( rc!=SQLITE_OK ){
  5461. releasePage(pRoot);
  5462. return rc;
  5463. }
  5464. rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove);
  5465. releasePage(pRoot);
  5466. /* Obtain the page at pgnoRoot */
  5467. if( rc!=SQLITE_OK ){
  5468. return rc;
  5469. }
  5470. rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0);
  5471. if( rc!=SQLITE_OK ){
  5472. return rc;
  5473. }
  5474. rc = sqlite3PagerWrite(pRoot->pDbPage);
  5475. if( rc!=SQLITE_OK ){
  5476. releasePage(pRoot);
  5477. return rc;
  5478. }
  5479. }else{
  5480. pRoot = pPageMove;
  5481. }
  5482. /* Update the pointer-map and meta-data with the new root-page number. */
  5483. rc = ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0);
  5484. if( rc ){
  5485. releasePage(pRoot);
  5486. return rc;
  5487. }
  5488. rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot);
  5489. if( rc ){
  5490. releasePage(pRoot);
  5491. return rc;
  5492. }
  5493. }else{
  5494. rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
  5495. if( rc ) return rc;
  5496. }
  5497. #endif
  5498. assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
  5499. zeroPage(pRoot, flags | PTF_LEAF);
  5500. sqlite3PagerUnref(pRoot->pDbPage);
  5501. *piTable = (int)pgnoRoot;
  5502. return SQLITE_OK;
  5503. }
  5504. int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
  5505. int rc;
  5506. sqlite3BtreeEnter(p);
  5507. p->pBt->db = p->db;
  5508. rc = btreeCreateTable(p, piTable, flags);
  5509. sqlite3BtreeLeave(p);
  5510. return rc;
  5511. }
  5512. /*
  5513. ** Erase the given database page and all its children. Return
  5514. ** the page to the freelist.
  5515. */
  5516. static int clearDatabasePage(
  5517. BtShared *pBt, /* The BTree that contains the table */
  5518. Pgno pgno, /* Page number to clear */
  5519. MemPage *pParent, /* Parent page. NULL for the root */
  5520. int freePageFlag /* Deallocate page if true */
  5521. ){
  5522. MemPage *pPage = 0;
  5523. int rc;
  5524. unsigned char *pCell;
  5525. int i;
  5526. assert( sqlite3_mutex_held(pBt->mutex) );
  5527. if( pgno>sqlite3PagerPagecount(pBt->pPager) ){
  5528. return SQLITE_CORRUPT_BKPT;
  5529. }
  5530. rc = getAndInitPage(pBt, pgno, &pPage, pParent);
  5531. if( rc ) goto cleardatabasepage_out;
  5532. for(i=0; i<pPage->nCell; i++){
  5533. pCell = findCell(pPage, i);
  5534. if( !pPage->leaf ){
  5535. rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
  5536. if( rc ) goto cleardatabasepage_out;
  5537. }
  5538. rc = clearCell(pPage, pCell);
  5539. if( rc ) goto cleardatabasepage_out;
  5540. }
  5541. if( !pPage->leaf ){
  5542. rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
  5543. if( rc ) goto cleardatabasepage_out;
  5544. }
  5545. if( freePageFlag ){
  5546. rc = freePage(pPage);
  5547. }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){
  5548. zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
  5549. }
  5550. cleardatabasepage_out:
  5551. releasePage(pPage);
  5552. return rc;
  5553. }
  5554. /*
  5555. ** Delete all information from a single table in the database. iTable is
  5556. ** the page number of the root of the table. After this routine returns,
  5557. ** the root page is empty, but still exists.
  5558. **
  5559. ** This routine will fail with SQLITE_LOCKED if there are any open
  5560. ** read cursors on the table. Open write cursors are moved to the
  5561. ** root of the table.
  5562. */
  5563. int sqlite3BtreeClearTable(Btree *p, int iTable){
  5564. int rc;
  5565. BtShared *pBt = p->pBt;
  5566. sqlite3BtreeEnter(p);
  5567. pBt->db = p->db;
  5568. if( p->inTrans!=TRANS_WRITE ){
  5569. rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
  5570. }else if( (rc = checkReadLocks(p, iTable, 0))!=SQLITE_OK ){
  5571. /* nothing to do */
  5572. }else if( SQLITE_OK!=(rc = saveAllCursors(pBt, iTable, 0)) ){
  5573. /* nothing to do */
  5574. }else{
  5575. rc = clearDatabasePage(pBt, (Pgno)iTable, 0, 0);
  5576. }
  5577. sqlite3BtreeLeave(p);
  5578. return rc;
  5579. }
  5580. /*
  5581. ** Erase all information in a table and add the root of the table to
  5582. ** the freelist. Except, the root of the principle table (the one on
  5583. ** page 1) is never added to the freelist.
  5584. **
  5585. ** This routine will fail with SQLITE_LOCKED if there are any open
  5586. ** cursors on the table.
  5587. **
  5588. ** If AUTOVACUUM is enabled and the page at iTable is not the last
  5589. ** root page in the database file, then the last root page
  5590. ** in the database file is moved into the slot formerly occupied by
  5591. ** iTable and that last slot formerly occupied by the last root page
  5592. ** is added to the freelist instead of iTable. In this say, all
  5593. ** root pages are kept at the beginning of the database file, which
  5594. ** is necessary for AUTOVACUUM to work right. *piMoved is set to the
  5595. ** page number that used to be the last root page in the file before
  5596. ** the move. If no page gets moved, *piMoved is set to 0.
  5597. ** The last root page is recorded in meta[3] and the value of
  5598. ** meta[3] is updated by this procedure.
  5599. */
  5600. static int btreeDropTable(Btree *p, int iTable, int *piMoved){
  5601. int rc;
  5602. MemPage *pPage = 0;
  5603. BtShared *pBt = p->pBt;
  5604. assert( sqlite3BtreeHoldsMutex(p) );
  5605. if( p->inTrans!=TRANS_WRITE ){
  5606. return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
  5607. }
  5608. /* It is illegal to drop a table if any cursors are open on the
  5609. ** database. This is because in auto-vacuum mode the backend may
  5610. ** need to move another root-page to fill a gap left by the deleted
  5611. ** root page. If an open cursor was using this page a problem would
  5612. ** occur.
  5613. */
  5614. if( pBt->pCursor ){
  5615. return SQLITE_LOCKED;
  5616. }
  5617. rc = sqlite3BtreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
  5618. if( rc ) return rc;
  5619. rc = sqlite3BtreeClearTable(p, iTable);
  5620. if( rc ){
  5621. releasePage(pPage);
  5622. return rc;
  5623. }
  5624. *piMoved = 0;
  5625. if( iTable>1 ){
  5626. #ifdef SQLITE_OMIT_AUTOVACUUM
  5627. rc = freePage(pPage);
  5628. releasePage(pPage);
  5629. #else
  5630. if( pBt->autoVacuum ){
  5631. Pgno maxRootPgno;
  5632. rc = sqlite3BtreeGetMeta(p, 4, &maxRootPgno);
  5633. if( rc!=SQLITE_OK ){
  5634. releasePage(pPage);
  5635. return rc;
  5636. }
  5637. if( iTable==maxRootPgno ){
  5638. /* If the table being dropped is the table with the largest root-page
  5639. ** number in the database, put the root page on the free list.
  5640. */
  5641. rc = freePage(pPage);
  5642. releasePage(pPage);
  5643. if( rc!=SQLITE_OK ){
  5644. return rc;
  5645. }
  5646. }else{
  5647. /* The table being dropped does not have the largest root-page
  5648. ** number in the database. So move the page that does into the
  5649. ** gap left by the deleted root-page.
  5650. */
  5651. MemPage *pMove;
  5652. releasePage(pPage);
  5653. rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0);
  5654. if( rc!=SQLITE_OK ){
  5655. return rc;
  5656. }
  5657. rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable);
  5658. releasePage(pMove);
  5659. if( rc!=SQLITE_OK ){
  5660. return rc;
  5661. }
  5662. rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0);
  5663. if( rc!=SQLITE_OK ){
  5664. return rc;
  5665. }
  5666. rc = freePage(pMove);
  5667. releasePage(pMove);
  5668. if( rc!=SQLITE_OK ){
  5669. return rc;
  5670. }
  5671. *piMoved = maxRootPgno;
  5672. }
  5673. /* Set the new 'max-root-page' value in the database header. This
  5674. ** is the old value less one, less one more if that happens to
  5675. ** be a root-page number, less one again if that is the
  5676. ** PENDING_BYTE_PAGE.
  5677. */
  5678. maxRootPgno--;
  5679. if( maxRootPgno==PENDING_BYTE_PAGE(pBt) ){
  5680. maxRootPgno--;
  5681. }
  5682. if( maxRootPgno==PTRMAP_PAGENO(pBt, maxRootPgno) ){
  5683. maxRootPgno--;
  5684. }
  5685. assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
  5686. rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
  5687. }else{
  5688. rc = freePage(pPage);
  5689. releasePage(pPage);
  5690. }
  5691. #endif
  5692. }else{
  5693. /* If sqlite3BtreeDropTable was called on page 1. */
  5694. zeroPage(pPage, PTF_INTKEY|PTF_LEAF );
  5695. releasePage(pPage);
  5696. }
  5697. return rc;
  5698. }
  5699. int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
  5700. int rc;
  5701. sqlite3BtreeEnter(p);
  5702. p->pBt->db = p->db;
  5703. rc = btreeDropTable(p, iTable, piMoved);
  5704. sqlite3BtreeLeave(p);
  5705. return rc;
  5706. }
  5707. /*
  5708. ** Read the meta-information out of a database file. Meta[0]
  5709. ** is the number of free pages currently in the database. Meta[1]
  5710. ** through meta[15] are available for use by higher layers. Meta[0]
  5711. ** is read-only, the others are read/write.
  5712. **
  5713. ** The schema layer numbers meta values differently. At the schema
  5714. ** layer (and the SetCookie and ReadCookie opcodes) the number of
  5715. ** free pages is not visible. So Cookie[0] is the same as Meta[1].
  5716. */
  5717. int sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
  5718. DbPage *pDbPage;
  5719. int rc;
  5720. unsigned char *pP1;
  5721. BtShared *pBt = p->pBt;
  5722. sqlite3BtreeEnter(p);
  5723. pBt->db = p->db;
  5724. /* Reading a meta-data value requires a read-lock on page 1 (and hence
  5725. ** the sqlite_master table. We grab this lock regardless of whether or
  5726. ** not the SQLITE_ReadUncommitted flag is set (the table rooted at page
  5727. ** 1 is treated as a special case by queryTableLock() and lockTable()).
  5728. */
  5729. rc = queryTableLock(p, 1, READ_LOCK);
  5730. if( rc!=SQLITE_OK ){
  5731. sqlite3BtreeLeave(p);
  5732. return rc;
  5733. }
  5734. assert( idx>=0 && idx<=15 );
  5735. rc = sqlite3PagerGet(pBt->pPager, 1, &pDbPage);
  5736. if( rc ){
  5737. sqlite3BtreeLeave(p);
  5738. return rc;
  5739. }
  5740. pP1 = (unsigned char *)sqlite3PagerGetData(pDbPage);
  5741. *pMeta = get4byte(&pP1[36 + idx*4]);
  5742. sqlite3PagerUnref(pDbPage);
  5743. /* If autovacuumed is disabled in this build but we are trying to
  5744. ** access an autovacuumed database, then make the database readonly.
  5745. */
  5746. #ifdef SQLITE_OMIT_AUTOVACUUM
  5747. if( idx==4 && *pMeta>0 ) pBt->readOnly = 1;
  5748. #endif
  5749. /* Grab the read-lock on page 1. */
  5750. rc = lockTable(p, 1, READ_LOCK);
  5751. sqlite3BtreeLeave(p);
  5752. return rc;
  5753. }
  5754. /*
  5755. ** Write meta-information back into the database. Meta[0] is
  5756. ** read-only and may not be written.
  5757. */
  5758. int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
  5759. BtShared *pBt = p->pBt;
  5760. unsigned char *pP1;
  5761. int rc;
  5762. assert( idx>=1 && idx<=15 );
  5763. sqlite3BtreeEnter(p);
  5764. pBt->db = p->db;
  5765. if( p->inTrans!=TRANS_WRITE ){
  5766. rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
  5767. }else{
  5768. assert( pBt->pPage1!=0 );
  5769. pP1 = pBt->pPage1->aData;
  5770. rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
  5771. if( rc==SQLITE_OK ){
  5772. put4byte(&pP1[36 + idx*4], iMeta);
  5773. #ifndef SQLITE_OMIT_AUTOVACUUM
  5774. if( idx==7 ){
  5775. assert( pBt->autoVacuum || iMeta==0 );
  5776. assert( iMeta==0 || iMeta==1 );
  5777. pBt->incrVacuum = iMeta;
  5778. }
  5779. #endif
  5780. }
  5781. }
  5782. sqlite3BtreeLeave(p);
  5783. return rc;
  5784. }
  5785. /*
  5786. ** Return the flag byte at the beginning of the page that the cursor
  5787. ** is currently pointing to.
  5788. */
  5789. int sqlite3BtreeFlags(BtCursor *pCur){
  5790. /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call
  5791. ** restoreOrClearCursorPosition() here.
  5792. */
  5793. MemPage *pPage = pCur->pPage;
  5794. assert( cursorHoldsMutex(pCur) );
  5795. assert( pPage->pBt==pCur->pBt );
  5796. return pPage ? pPage->aData[pPage->hdrOffset] : 0;
  5797. }
  5798. /*
  5799. ** Return the pager associated with a BTree. This routine is used for
  5800. ** testing and debugging only.
  5801. */
  5802. Pager *sqlite3BtreePager(Btree *p){
  5803. return p->pBt->pPager;
  5804. }
  5805. #ifndef SQLITE_OMIT_INTEGRITY_CHECK
  5806. /*
  5807. ** Append a message to the error message string.
  5808. */
  5809. static void checkAppendMsg(
  5810. IntegrityCk *pCheck,
  5811. char *zMsg1,
  5812. const char *zFormat,
  5813. ...
  5814. ){
  5815. va_list ap;
  5816. char *zMsg2;
  5817. if( !pCheck->mxErr ) return;
  5818. pCheck->mxErr--;
  5819. pCheck->nErr++;
  5820. va_start(ap, zFormat);
  5821. zMsg2 = sqlite3VMPrintf(0, zFormat, ap);
  5822. va_end(ap);
  5823. if( zMsg1==0 ) zMsg1 = "";
  5824. if( pCheck->zErrMsg ){
  5825. char *zOld = pCheck->zErrMsg;
  5826. pCheck->zErrMsg = 0;
  5827. sqlite3SetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0);
  5828. sqlite3_free(zOld);
  5829. }else{
  5830. sqlite3SetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0);
  5831. }
  5832. sqlite3_free(zMsg2);
  5833. }
  5834. #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
  5835. #ifndef SQLITE_OMIT_INTEGRITY_CHECK
  5836. /*
  5837. ** Add 1 to the reference count for page iPage. If this is the second
  5838. ** reference to the page, add an error message to pCheck->zErrMsg.
  5839. ** Return 1 if there are 2 ore more references to the page and 0 if
  5840. ** if this is the first reference to the page.
  5841. **
  5842. ** Also check that the page number is in bounds.
  5843. */
  5844. static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
  5845. if( iPage==0 ) return 1;
  5846. if( iPage>pCheck->nPage || iPage<0 ){
  5847. checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage);
  5848. return 1;
  5849. }
  5850. if( pCheck->anRef[iPage]==1 ){
  5851. checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage);
  5852. return 1;
  5853. }
  5854. return (pCheck->anRef[iPage]++)>1;
  5855. }
  5856. #ifndef SQLITE_OMIT_AUTOVACUUM
  5857. /*
  5858. ** Check that the entry in the pointer-map for page iChild maps to
  5859. ** page iParent, pointer type ptrType. If not, append an error message
  5860. ** to pCheck.
  5861. */
  5862. static void checkPtrmap(
  5863. IntegrityCk *pCheck, /* Integrity check context */
  5864. Pgno iChild, /* Child page number */
  5865. u8 eType, /* Expected pointer map type */
  5866. Pgno iParent, /* Expected pointer map parent page number */
  5867. char *zContext /* Context description (used for error msg) */
  5868. ){
  5869. int rc;
  5870. u8 ePtrmapType;
  5871. Pgno iPtrmapParent;
  5872. rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
  5873. if( rc!=SQLITE_OK ){
  5874. checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild);
  5875. return;
  5876. }
  5877. if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
  5878. checkAppendMsg(pCheck, zContext,
  5879. "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)",
  5880. iChild, eType, iParent, ePtrmapType, iPtrmapParent);
  5881. }
  5882. }
  5883. #endif
  5884. /*
  5885. ** Check the integrity of the freelist or of an overflow page list.
  5886. ** Verify that the number of pages on the list is N.
  5887. */
  5888. static void checkList(
  5889. IntegrityCk *pCheck, /* Integrity checking context */
  5890. int isFreeList, /* True for a freelist. False for overflow page list */
  5891. int iPage, /* Page number for first page in the list */
  5892. int N, /* Expected number of pages in the list */
  5893. char *zContext /* Context for error messages */
  5894. ){
  5895. int i;
  5896. int expected = N;
  5897. int iFirst = iPage;
  5898. while( N-- > 0 && pCheck->mxErr ){
  5899. DbPage *pOvflPage;
  5900. unsigned char *pOvflData;
  5901. if( iPage<1 ){
  5902. checkAppendMsg(pCheck, zContext,
  5903. "%d of %d pages missing from overflow list starting at %d",
  5904. N+1, expected, iFirst);
  5905. break;
  5906. }
  5907. if( checkRef(pCheck, iPage, zContext) ) break;
  5908. if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){
  5909. checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage);
  5910. break;
  5911. }
  5912. pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
  5913. if( isFreeList ){
  5914. int n = get4byte(&pOvflData[4]);
  5915. #ifndef SQLITE_OMIT_AUTOVACUUM
  5916. if( pCheck->pBt->autoVacuum ){
  5917. checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext);
  5918. }
  5919. #endif
  5920. if( n>pCheck->pBt->usableSize/4-8 ){
  5921. checkAppendMsg(pCheck, zContext,
  5922. "freelist leaf count too big on page %d", iPage);
  5923. N--;
  5924. }else{
  5925. for(i=0; i<n; i++){
  5926. Pgno iFreePage = get4byte(&pOvflData[8+i*4]);
  5927. #ifndef SQLITE_OMIT_AUTOVACUUM
  5928. if( pCheck->pBt->autoVacuum ){
  5929. checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext);
  5930. }
  5931. #endif
  5932. checkRef(pCheck, iFreePage, zContext);
  5933. }
  5934. N -= n;
  5935. }
  5936. }
  5937. #ifndef SQLITE_OMIT_AUTOVACUUM
  5938. else{
  5939. /* If this database supports auto-vacuum and iPage is not the last
  5940. ** page in this overflow list, check that the pointer-map entry for
  5941. ** the following page matches iPage.
  5942. */
  5943. if( pCheck->pBt->autoVacuum && N>0 ){
  5944. i = get4byte(pOvflData);
  5945. checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext);
  5946. }
  5947. }
  5948. #endif
  5949. iPage = get4byte(pOvflData);
  5950. sqlite3PagerUnref(pOvflPage);
  5951. }
  5952. }
  5953. #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
  5954. #ifndef SQLITE_OMIT_INTEGRITY_CHECK
  5955. /*
  5956. ** Do various sanity checks on a single page of a tree. Return
  5957. ** the tree depth. Root pages return 0. Parents of root pages
  5958. ** return 1, and so forth.
  5959. **
  5960. ** These checks are done:
  5961. **
  5962. ** 1. Make sure that cells and freeblocks do not overlap
  5963. ** but combine to completely cover the page.
  5964. ** NO 2. Make sure cell keys are in order.
  5965. ** NO 3. Make sure no key is less than or equal to zLowerBound.
  5966. ** NO 4. Make sure no key is greater than or equal to zUpperBound.
  5967. ** 5. Check the integrity of overflow pages.
  5968. ** 6. Recursively call checkTreePage on all children.
  5969. ** 7. Verify that the depth of all children is the same.
  5970. ** 8. Make sure this page is at least 33% full or else it is
  5971. ** the root of the tree.
  5972. */
  5973. static int checkTreePage(
  5974. IntegrityCk *pCheck, /* Context for the sanity check */
  5975. int iPage, /* Page number of the page to check */
  5976. MemPage *pParent, /* Parent page */
  5977. char *zParentContext /* Parent context */
  5978. ){
  5979. MemPage *pPage;
  5980. int i, rc, depth, d2, pgno, cnt;
  5981. int hdr, cellStart;
  5982. int nCell;
  5983. u8 *data;
  5984. BtShared *pBt;
  5985. int usableSize;
  5986. char zContext[100];
  5987. char *hit;
  5988. sqlite3_snprintf(sizeof(zContext), zContext, "Page %d: ", iPage);
  5989. /* Check that the page exists
  5990. */
  5991. pBt = pCheck->pBt;
  5992. usableSize = pBt->usableSize;
  5993. if( iPage==0 ) return 0;
  5994. if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  5995. if( (rc = sqlite3BtreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
  5996. checkAppendMsg(pCheck, zContext,
  5997. "unable to get the page. error code=%d", rc);
  5998. return 0;
  5999. }
  6000. if( (rc = sqlite3BtreeInitPage(pPage, pParent))!=0 ){
  6001. checkAppendMsg(pCheck, zContext,
  6002. "sqlite3BtreeInitPage() returns error code %d", rc);
  6003. releasePage(pPage);
  6004. return 0;
  6005. }
  6006. /* Check out all the cells.
  6007. */
  6008. depth = 0;
  6009. for(i=0; i<pPage->nCell && pCheck->mxErr; i++){
  6010. u8 *pCell;
  6011. int sz;
  6012. CellInfo info;
  6013. /* Check payload overflow pages
  6014. */
  6015. sqlite3_snprintf(sizeof(zContext), zContext,
  6016. "On tree page %d cell %d: ", iPage, i);
  6017. pCell = findCell(pPage,i);
  6018. sqlite3BtreeParseCellPtr(pPage, pCell, &info);
  6019. sz = info.nData;
  6020. if( !pPage->intKey ) sz += info.nKey;
  6021. assert( sz==info.nPayload );
  6022. if( sz>info.nLocal ){
  6023. int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4);
  6024. Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
  6025. #ifndef SQLITE_OMIT_AUTOVACUUM
  6026. if( pBt->autoVacuum ){
  6027. checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext);
  6028. }
  6029. #endif
  6030. checkList(pCheck, 0, pgnoOvfl, nPage, zContext);
  6031. }
  6032. /* Check sanity of left child page.
  6033. */
  6034. if( !pPage->leaf ){
  6035. pgno = get4byte(pCell);
  6036. #ifndef SQLITE_OMIT_AUTOVACUUM
  6037. if( pBt->autoVacuum ){
  6038. checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext);
  6039. }
  6040. #endif
  6041. d2 = checkTreePage(pCheck,pgno,pPage,zContext);
  6042. if( i>0 && d2!=depth ){
  6043. checkAppendMsg(pCheck, zContext, "Child page depth differs");
  6044. }
  6045. depth = d2;
  6046. }
  6047. }
  6048. if( !pPage->leaf ){
  6049. pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
  6050. sqlite3_snprintf(sizeof(zContext), zContext,
  6051. "On page %d at right child: ", iPage);
  6052. #ifndef SQLITE_OMIT_AUTOVACUUM
  6053. if( pBt->autoVacuum ){
  6054. checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, 0);
  6055. }
  6056. #endif
  6057. checkTreePage(pCheck, pgno, pPage, zContext);
  6058. }
  6059. /* Check for complete coverage of the page
  6060. */
  6061. data = pPage->aData;
  6062. hdr = pPage->hdrOffset;
  6063. hit = sqlite3MallocZero( usableSize );
  6064. if( hit ){
  6065. memset(hit, 1, get2byte(&data[hdr+5]));
  6066. nCell = get2byte(&data[hdr+3]);
  6067. cellStart = hdr + 12 - 4*pPage->leaf;
  6068. for(i=0; i<nCell; i++){
  6069. int pc = get2byte(&data[cellStart+i*2]);
  6070. int size = cellSizePtr(pPage, &data[pc]);
  6071. int j;
  6072. if( (pc+size-1)>=usableSize || pc<0 ){
  6073. checkAppendMsg(pCheck, 0,
  6074. "Corruption detected in cell %d on page %d",i,iPage,0);
  6075. }else{
  6076. for(j=pc+size-1; j>=pc; j--) hit[j]++;
  6077. }
  6078. }
  6079. for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000;
  6080. cnt++){
  6081. int size = get2byte(&data[i+2]);
  6082. int j;
  6083. if( (i+size-1)>=usableSize || i<0 ){
  6084. checkAppendMsg(pCheck, 0,
  6085. "Corruption detected in cell %d on page %d",i,iPage,0);
  6086. }else{
  6087. for(j=i+size-1; j>=i; j--) hit[j]++;
  6088. }
  6089. i = get2byte(&data[i]);
  6090. }
  6091. for(i=cnt=0; i<usableSize; i++){
  6092. if( hit[i]==0 ){
  6093. cnt++;
  6094. }else if( hit[i]>1 ){
  6095. checkAppendMsg(pCheck, 0,
  6096. "Multiple uses for byte %d of page %d", i, iPage);
  6097. break;
  6098. }
  6099. }
  6100. if( cnt!=data[hdr+7] ){
  6101. checkAppendMsg(pCheck, 0,
  6102. "Fragmented space is %d byte reported as %d on page %d",
  6103. cnt, data[hdr+7], iPage);
  6104. }
  6105. }
  6106. sqlite3_free(hit);
  6107. releasePage(pPage);
  6108. return depth+1;
  6109. }
  6110. #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
  6111. #ifndef SQLITE_OMIT_INTEGRITY_CHECK
  6112. /*
  6113. ** This routine does a complete check of the given BTree file. aRoot[] is
  6114. ** an array of pages numbers were each page number is the root page of
  6115. ** a table. nRoot is the number of entries in aRoot.
  6116. **
  6117. ** If everything checks out, this routine returns NULL. If something is
  6118. ** amiss, an error message is written into memory obtained from malloc()
  6119. ** and a pointer to that error message is returned. The calling function
  6120. ** is responsible for freeing the error message when it is done.
  6121. */
  6122. char *sqlite3BtreeIntegrityCheck(
  6123. Btree *p, /* The btree to be checked */
  6124. int *aRoot, /* An array of root pages numbers for individual trees */
  6125. int nRoot, /* Number of entries in aRoot[] */
  6126. int mxErr, /* Stop reporting errors after this many */
  6127. int *pnErr /* Write number of errors seen to this variable */
  6128. ){
  6129. int i;
  6130. int nRef;
  6131. IntegrityCk sCheck;
  6132. BtShared *pBt = p->pBt;
  6133. sqlite3BtreeEnter(p);
  6134. pBt->db = p->db;
  6135. nRef = sqlite3PagerRefcount(pBt->pPager);
  6136. if( lockBtreeWithRetry(p)!=SQLITE_OK ){
  6137. sqlite3BtreeLeave(p);
  6138. return sqlite3StrDup("Unable to acquire a read lock on the database");
  6139. }
  6140. sCheck.pBt = pBt;
  6141. sCheck.pPager = pBt->pPager;
  6142. sCheck.nPage = sqlite3PagerPagecount(sCheck.pPager);
  6143. sCheck.mxErr = mxErr;
  6144. sCheck.nErr = 0;
  6145. *pnErr = 0;
  6146. #ifndef SQLITE_OMIT_AUTOVACUUM
  6147. if( pBt->nTrunc!=0 ){
  6148. sCheck.nPage = pBt->nTrunc;
  6149. }
  6150. #endif
  6151. if( sCheck.nPage==0 ){
  6152. unlockBtreeIfUnused(pBt);
  6153. sqlite3BtreeLeave(p);
  6154. return 0;
  6155. }
  6156. sCheck.anRef = sqlite3_malloc( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
  6157. if( !sCheck.anRef ){
  6158. unlockBtreeIfUnused(pBt);
  6159. *pnErr = 1;
  6160. sqlite3BtreeLeave(p);
  6161. return sqlite3MPrintf(p->db, "Unable to malloc %d bytes",
  6162. (sCheck.nPage+1)*sizeof(sCheck.anRef[0]));
  6163. }
  6164. for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
  6165. i = PENDING_BYTE_PAGE(pBt);
  6166. if( i<=sCheck.nPage ){
  6167. sCheck.anRef[i] = 1;
  6168. }
  6169. sCheck.zErrMsg = 0;
  6170. /* Check the integrity of the freelist
  6171. */
  6172. checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
  6173. get4byte(&pBt->pPage1->aData[36]), "Main freelist: ");
  6174. /* Check all the tables.
  6175. */
  6176. for(i=0; i<nRoot && sCheck.mxErr; i++){
  6177. if( aRoot[i]==0 ) continue;
  6178. #ifndef SQLITE_OMIT_AUTOVACUUM
  6179. if( pBt->autoVacuum && aRoot[i]>1 ){
  6180. checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0);
  6181. }
  6182. #endif
  6183. checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ");
  6184. }
  6185. /* Make sure every page in the file is referenced
  6186. */
  6187. for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){
  6188. #ifdef SQLITE_OMIT_AUTOVACUUM
  6189. if( sCheck.anRef[i]==0 ){
  6190. checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
  6191. }
  6192. #else
  6193. /* If the database supports auto-vacuum, make sure no tables contain
  6194. ** references to pointer-map pages.
  6195. */
  6196. if( sCheck.anRef[i]==0 &&
  6197. (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
  6198. checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
  6199. }
  6200. if( sCheck.anRef[i]!=0 &&
  6201. (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
  6202. checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i);
  6203. }
  6204. #endif
  6205. }
  6206. /* Make sure this analysis did not leave any unref() pages
  6207. */
  6208. unlockBtreeIfUnused(pBt);
  6209. if( nRef != sqlite3PagerRefcount(pBt->pPager) ){
  6210. checkAppendMsg(&sCheck, 0,
  6211. "Outstanding page count goes from %d to %d during this analysis",
  6212. nRef, sqlite3PagerRefcount(pBt->pPager)
  6213. );
  6214. }
  6215. /* Clean up and report errors.
  6216. */
  6217. sqlite3BtreeLeave(p);
  6218. sqlite3_free(sCheck.anRef);
  6219. *pnErr = sCheck.nErr;
  6220. return sCheck.zErrMsg;
  6221. }
  6222. #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
  6223. /*
  6224. ** Return the full pathname of the underlying database file.
  6225. **
  6226. ** The pager filename is invariant as long as the pager is
  6227. ** open so it is safe to access without the BtShared mutex.
  6228. */
  6229. const char *sqlite3BtreeGetFilename(Btree *p){
  6230. assert( p->pBt->pPager!=0 );
  6231. return sqlite3PagerFilename(p->pBt->pPager);
  6232. }
  6233. /*
  6234. ** Return the pathname of the directory that contains the database file.
  6235. **
  6236. ** The pager directory name is invariant as long as the pager is
  6237. ** open so it is safe to access without the BtShared mutex.
  6238. */
  6239. const char *sqlite3BtreeGetDirname(Btree *p){
  6240. assert( p->pBt->pPager!=0 );
  6241. return sqlite3PagerDirname(p->pBt->pPager);
  6242. }
  6243. /*
  6244. ** Return the pathname of the journal file for this database. The return
  6245. ** value of this routine is the same regardless of whether the journal file
  6246. ** has been created or not.
  6247. **
  6248. ** The pager journal filename is invariant as long as the pager is
  6249. ** open so it is safe to access without the BtShared mutex.
  6250. */
  6251. const char *sqlite3BtreeGetJournalname(Btree *p){
  6252. assert( p->pBt->pPager!=0 );
  6253. return sqlite3PagerJournalname(p->pBt->pPager);
  6254. }
  6255. #ifndef SQLITE_OMIT_VACUUM
  6256. /*
  6257. ** Copy the complete content of pBtFrom into pBtTo. A transaction
  6258. ** must be active for both files.
  6259. **
  6260. ** The size of file pBtFrom may be reduced by this operation.
  6261. ** If anything goes wrong, the transaction on pBtFrom is rolled back.
  6262. */
  6263. static int btreeCopyFile(Btree *pTo, Btree *pFrom){
  6264. int rc = SQLITE_OK;
  6265. Pgno i, nPage, nToPage, iSkip;
  6266. BtShared *pBtTo = pTo->pBt;
  6267. BtShared *pBtFrom = pFrom->pBt;
  6268. pBtTo->db = pTo->db;
  6269. pBtFrom->db = pFrom->db;
  6270. if( pTo->inTrans!=TRANS_WRITE || pFrom->inTrans!=TRANS_WRITE ){
  6271. return SQLITE_ERROR;
  6272. }
  6273. if( pBtTo->pCursor ) return SQLITE_BUSY;
  6274. nToPage = sqlite3PagerPagecount(pBtTo->pPager);
  6275. nPage = sqlite3PagerPagecount(pBtFrom->pPager);
  6276. iSkip = PENDING_BYTE_PAGE(pBtTo);
  6277. for(i=1; rc==SQLITE_OK && i<=nPage; i++){
  6278. DbPage *pDbPage;
  6279. if( i==iSkip ) continue;
  6280. rc = sqlite3PagerGet(pBtFrom->pPager, i, &pDbPage);
  6281. if( rc ) break;
  6282. rc = sqlite3PagerOverwrite(pBtTo->pPager, i, sqlite3PagerGetData(pDbPage));
  6283. sqlite3PagerUnref(pDbPage);
  6284. }
  6285. /* If the file is shrinking, journal the pages that are being truncated
  6286. ** so that they can be rolled back if the commit fails.
  6287. */
  6288. for(i=nPage+1; rc==SQLITE_OK && i<=nToPage; i++){
  6289. DbPage *pDbPage;
  6290. if( i==iSkip ) continue;
  6291. rc = sqlite3PagerGet(pBtTo->pPager, i, &pDbPage);
  6292. if( rc ) break;
  6293. rc = sqlite3PagerWrite(pDbPage);
  6294. sqlite3PagerDontWrite(pDbPage);
  6295. /* Yeah. It seems wierd to call DontWrite() right after Write(). But
  6296. ** that is because the names of those procedures do not exactly
  6297. ** represent what they do. Write() really means "put this page in the
  6298. ** rollback journal and mark it as dirty so that it will be written
  6299. ** to the database file later." DontWrite() undoes the second part of
  6300. ** that and prevents the page from being written to the database. The
  6301. ** page is still on the rollback journal, though. And that is the whole
  6302. ** point of this loop: to put pages on the rollback journal. */
  6303. sqlite3PagerUnref(pDbPage);
  6304. }
  6305. if( !rc && nPage<nToPage ){
  6306. rc = sqlite3PagerTruncate(pBtTo->pPager, nPage);
  6307. }
  6308. if( rc ){
  6309. sqlite3BtreeRollback(pTo);
  6310. }
  6311. return rc;
  6312. }
  6313. int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
  6314. int rc;
  6315. sqlite3BtreeEnter(pTo);
  6316. sqlite3BtreeEnter(pFrom);
  6317. rc = btreeCopyFile(pTo, pFrom);
  6318. sqlite3BtreeLeave(pFrom);
  6319. sqlite3BtreeLeave(pTo);
  6320. return rc;
  6321. }
  6322. #endif /* SQLITE_OMIT_VACUUM */
  6323. /*
  6324. ** Return non-zero if a transaction is active.
  6325. */
  6326. int sqlite3BtreeIsInTrans(Btree *p){
  6327. assert( p==0 || sqlite3_mutex_held(p->db->mutex) );
  6328. return (p && (p->inTrans==TRANS_WRITE));
  6329. }
  6330. /*
  6331. ** Return non-zero if a statement transaction is active.
  6332. */
  6333. int sqlite3BtreeIsInStmt(Btree *p){
  6334. assert( sqlite3BtreeHoldsMutex(p) );
  6335. return (p->pBt && p->pBt->inStmt);
  6336. }
  6337. /*
  6338. ** Return non-zero if a read (or write) transaction is active.
  6339. */
  6340. int sqlite3BtreeIsInReadTrans(Btree *p){
  6341. assert( sqlite3_mutex_held(p->db->mutex) );
  6342. return (p && (p->inTrans!=TRANS_NONE));
  6343. }
  6344. /*
  6345. ** This function returns a pointer to a blob of memory associated with
  6346. ** a single shared-btree. The memory is used by client code for its own
  6347. ** purposes (for example, to store a high-level schema associated with
  6348. ** the shared-btree). The btree layer manages reference counting issues.
  6349. **
  6350. ** The first time this is called on a shared-btree, nBytes bytes of memory
  6351. ** are allocated, zeroed, and returned to the caller. For each subsequent
  6352. ** call the nBytes parameter is ignored and a pointer to the same blob
  6353. ** of memory returned.
  6354. **
  6355. ** Just before the shared-btree is closed, the function passed as the
  6356. ** xFree argument when the memory allocation was made is invoked on the
  6357. ** blob of allocated memory. This function should not call sqlite3_free()
  6358. ** on the memory, the btree layer does that.
  6359. */
  6360. void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){
  6361. BtShared *pBt = p->pBt;
  6362. sqlite3BtreeEnter(p);
  6363. if( !pBt->pSchema ){
  6364. pBt->pSchema = sqlite3MallocZero(nBytes);
  6365. pBt->xFreeSchema = xFree;
  6366. }
  6367. sqlite3BtreeLeave(p);
  6368. return pBt->pSchema;
  6369. }
  6370. /*
  6371. ** Return true if another user of the same shared btree as the argument
  6372. ** handle holds an exclusive lock on the sqlite_master table.
  6373. */
  6374. int sqlite3BtreeSchemaLocked(Btree *p){
  6375. int rc;
  6376. assert( sqlite3_mutex_held(p->db->mutex) );
  6377. sqlite3BtreeEnter(p);
  6378. rc = (queryTableLock(p, MASTER_ROOT, READ_LOCK)!=SQLITE_OK);
  6379. sqlite3BtreeLeave(p);
  6380. return rc;
  6381. }
  6382. #ifndef SQLITE_OMIT_SHARED_CACHE
  6383. /*
  6384. ** Obtain a lock on the table whose root page is iTab. The
  6385. ** lock is a write lock if isWritelock is true or a read lock
  6386. ** if it is false.
  6387. */
  6388. int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){
  6389. int rc = SQLITE_OK;
  6390. u8 lockType = (isWriteLock?WRITE_LOCK:READ_LOCK);
  6391. sqlite3BtreeEnter(p);
  6392. rc = queryTableLock(p, iTab, lockType);
  6393. if( rc==SQLITE_OK ){
  6394. rc = lockTable(p, iTab, lockType);
  6395. }
  6396. sqlite3BtreeLeave(p);
  6397. return rc;
  6398. }
  6399. #endif
  6400. #ifndef SQLITE_OMIT_INCRBLOB
  6401. /*
  6402. ** Argument pCsr must be a cursor opened for writing on an
  6403. ** INTKEY table currently pointing at a valid table entry.
  6404. ** This function modifies the data stored as part of that entry.
  6405. ** Only the data content may only be modified, it is not possible
  6406. ** to change the length of the data stored.
  6407. */
  6408. int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
  6409. assert( cursorHoldsMutex(pCsr) );
  6410. assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) );
  6411. assert(pCsr->isIncrblobHandle);
  6412. if( pCsr->eState>=CURSOR_REQUIRESEEK ){
  6413. if( pCsr->eState==CURSOR_FAULT ){
  6414. return pCsr->skip;
  6415. }else{
  6416. return SQLITE_ABORT;
  6417. }
  6418. }
  6419. /* Check some preconditions:
  6420. ** (a) the cursor is open for writing,
  6421. ** (b) there is no read-lock on the table being modified and
  6422. ** (c) the cursor points at a valid row of an intKey table.
  6423. */
  6424. if( !pCsr->wrFlag ){
  6425. return SQLITE_READONLY;
  6426. }
  6427. assert( !pCsr->pBt->readOnly
  6428. && pCsr->pBt->inTransaction==TRANS_WRITE );
  6429. if( checkReadLocks(pCsr->pBtree, pCsr->pgnoRoot, pCsr) ){
  6430. return SQLITE_LOCKED; /* The table pCur points to has a read lock */
  6431. }
  6432. if( pCsr->eState==CURSOR_INVALID || !pCsr->pPage->intKey ){
  6433. return SQLITE_ERROR;
  6434. }
  6435. return accessPayload(pCsr, offset, amt, (unsigned char *)z, 0, 1);
  6436. }
  6437. /*
  6438. ** Set a flag on this cursor to cache the locations of pages from the
  6439. ** overflow list for the current row. This is used by cursors opened
  6440. ** for incremental blob IO only.
  6441. **
  6442. ** This function sets a flag only. The actual page location cache
  6443. ** (stored in BtCursor.aOverflow[]) is allocated and used by function
  6444. ** accessPayload() (the worker function for sqlite3BtreeData() and
  6445. ** sqlite3BtreePutData()).
  6446. */
  6447. void sqlite3BtreeCacheOverflow(BtCursor *pCur){
  6448. assert( cursorHoldsMutex(pCur) );
  6449. assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  6450. assert(!pCur->isIncrblobHandle);
  6451. assert(!pCur->aOverflow);
  6452. pCur->isIncrblobHandle = 1;
  6453. }
  6454. #endif