pager.c 168 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124
  1. /*
  2. ** 2001 September 15
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. *************************************************************************
  12. ** This is the implementation of the page cache subsystem or "pager".
  13. **
  14. ** The pager is used to access a database disk file. It implements
  15. ** atomic commit and rollback through the use of a journal file that
  16. ** is separate from the database file. The pager also implements file
  17. ** locking to prevent two processes from writing the same database
  18. ** file simultaneously, or one process from reading the database while
  19. ** another is writing.
  20. **
  21. ** @(#) $Id: pager.c,v 1.400 2007/12/13 21:54:11 drh Exp $
  22. */
  23. #ifndef SQLITE_OMIT_DISKIO
  24. #include "sqliteInt.h"
  25. #include <assert.h>
  26. #include <string.h>
  27. /*
  28. ** Macros for troubleshooting. Normally turned off
  29. */
  30. #if 0
  31. #define sqlite3DebugPrintf printf
  32. #define PAGERTRACE1(X) sqlite3DebugPrintf(X)
  33. #define PAGERTRACE2(X,Y) sqlite3DebugPrintf(X,Y)
  34. #define PAGERTRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z)
  35. #define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
  36. #define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
  37. #else
  38. #define PAGERTRACE1(X)
  39. #define PAGERTRACE2(X,Y)
  40. #define PAGERTRACE3(X,Y,Z)
  41. #define PAGERTRACE4(X,Y,Z,W)
  42. #define PAGERTRACE5(X,Y,Z,W,V)
  43. #endif
  44. /*
  45. ** The following two macros are used within the PAGERTRACEX() macros above
  46. ** to print out file-descriptors.
  47. **
  48. ** PAGERID() takes a pointer to a Pager struct as its argument. The
  49. ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
  50. ** struct as its argument.
  51. */
  52. #define PAGERID(p) ((int)(p->fd))
  53. #define FILEHANDLEID(fd) ((int)fd)
  54. /*
  55. ** The page cache as a whole is always in one of the following
  56. ** states:
  57. **
  58. ** PAGER_UNLOCK The page cache is not currently reading or
  59. ** writing the database file. There is no
  60. ** data held in memory. This is the initial
  61. ** state.
  62. **
  63. ** PAGER_SHARED The page cache is reading the database.
  64. ** Writing is not permitted. There can be
  65. ** multiple readers accessing the same database
  66. ** file at the same time.
  67. **
  68. ** PAGER_RESERVED This process has reserved the database for writing
  69. ** but has not yet made any changes. Only one process
  70. ** at a time can reserve the database. The original
  71. ** database file has not been modified so other
  72. ** processes may still be reading the on-disk
  73. ** database file.
  74. **
  75. ** PAGER_EXCLUSIVE The page cache is writing the database.
  76. ** Access is exclusive. No other processes or
  77. ** threads can be reading or writing while one
  78. ** process is writing.
  79. **
  80. ** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE
  81. ** after all dirty pages have been written to the
  82. ** database file and the file has been synced to
  83. ** disk. All that remains to do is to remove or
  84. ** truncate the journal file and the transaction
  85. ** will be committed.
  86. **
  87. ** The page cache comes up in PAGER_UNLOCK. The first time a
  88. ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
  89. ** After all pages have been released using sqlite_page_unref(),
  90. ** the state transitions back to PAGER_UNLOCK. The first time
  91. ** that sqlite3PagerWrite() is called, the state transitions to
  92. ** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be
  93. ** called on an outstanding page which means that the pager must
  94. ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
  95. ** PAGER_RESERVED means that there is an open rollback journal.
  96. ** The transition to PAGER_EXCLUSIVE occurs before any changes
  97. ** are made to the database file, though writes to the rollback
  98. ** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback()
  99. ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
  100. ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
  101. */
  102. #define PAGER_UNLOCK 0
  103. #define PAGER_SHARED 1 /* same as SHARED_LOCK */
  104. #define PAGER_RESERVED 2 /* same as RESERVED_LOCK */
  105. #define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */
  106. #define PAGER_SYNCED 5
  107. /*
  108. ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
  109. ** then failed attempts to get a reserved lock will invoke the busy callback.
  110. ** This is off by default. To see why, consider the following scenario:
  111. **
  112. ** Suppose thread A already has a shared lock and wants a reserved lock.
  113. ** Thread B already has a reserved lock and wants an exclusive lock. If
  114. ** both threads are using their busy callbacks, it might be a long time
  115. ** be for one of the threads give up and allows the other to proceed.
  116. ** But if the thread trying to get the reserved lock gives up quickly
  117. ** (if it never invokes its busy callback) then the contention will be
  118. ** resolved quickly.
  119. */
  120. #ifndef SQLITE_BUSY_RESERVED_LOCK
  121. # define SQLITE_BUSY_RESERVED_LOCK 0
  122. #endif
  123. /*
  124. ** This macro rounds values up so that if the value is an address it
  125. ** is guaranteed to be an address that is aligned to an 8-byte boundary.
  126. */
  127. #define FORCE_ALIGNMENT(X) (((X)+7)&~7)
  128. typedef struct PgHdr PgHdr;
  129. /*
  130. ** Each pager stores all currently unreferenced pages in a list sorted
  131. ** in least-recently-used (LRU) order (i.e. the first item on the list has
  132. ** not been referenced in a long time, the last item has been recently
  133. ** used). An instance of this structure is included as part of each
  134. ** pager structure for this purpose (variable Pager.lru).
  135. **
  136. ** Additionally, if memory-management is enabled, all unreferenced pages
  137. ** are stored in a global LRU list (global variable sqlite3LruPageList).
  138. **
  139. ** In both cases, the PagerLruList.pFirstSynced variable points to
  140. ** the first page in the corresponding list that does not require an
  141. ** fsync() operation before its memory can be reclaimed. If no such
  142. ** page exists, PagerLruList.pFirstSynced is set to NULL.
  143. */
  144. typedef struct PagerLruList PagerLruList;
  145. struct PagerLruList {
  146. PgHdr *pFirst; /* First page in LRU list */
  147. PgHdr *pLast; /* Last page in LRU list (the most recently used) */
  148. PgHdr *pFirstSynced; /* First page in list with PgHdr.needSync==0 */
  149. };
  150. /*
  151. ** The following structure contains the next and previous pointers used
  152. ** to link a PgHdr structure into a PagerLruList linked list.
  153. */
  154. typedef struct PagerLruLink PagerLruLink;
  155. struct PagerLruLink {
  156. PgHdr *pNext;
  157. PgHdr *pPrev;
  158. };
  159. /*
  160. ** Each in-memory image of a page begins with the following header.
  161. ** This header is only visible to this pager module. The client
  162. ** code that calls pager sees only the data that follows the header.
  163. **
  164. ** Client code should call sqlite3PagerWrite() on a page prior to making
  165. ** any modifications to that page. The first time sqlite3PagerWrite()
  166. ** is called, the original page contents are written into the rollback
  167. ** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
  168. ** the journal page has made it onto the disk surface, PgHdr.needSync
  169. ** is cleared. The modified page cannot be written back into the original
  170. ** database file until the journal pages has been synced to disk and the
  171. ** PgHdr.needSync has been cleared.
  172. **
  173. ** The PgHdr.dirty flag is set when sqlite3PagerWrite() is called and
  174. ** is cleared again when the page content is written back to the original
  175. ** database file.
  176. **
  177. ** Details of important structure elements:
  178. **
  179. ** needSync
  180. **
  181. ** If this is true, this means that it is not safe to write the page
  182. ** content to the database because the original content needed
  183. ** for rollback has not by synced to the main rollback journal.
  184. ** The original content may have been written to the rollback journal
  185. ** but it has not yet been synced. So we cannot write to the database
  186. ** file because power failure might cause the page in the journal file
  187. ** to never reach the disk. It is as if the write to the journal file
  188. ** does not occur until the journal file is synced.
  189. **
  190. ** This flag is false if the page content exactly matches what
  191. ** currently exists in the database file. The needSync flag is also
  192. ** false if the original content has been written to the main rollback
  193. ** journal and synced. If the page represents a new page that has
  194. ** been added onto the end of the database during the current
  195. ** transaction, the needSync flag is true until the original database
  196. ** size in the journal header has been synced to disk.
  197. **
  198. ** inJournal
  199. **
  200. ** This is true if the original page has been written into the main
  201. ** rollback journal. This is always false for new pages added to
  202. ** the end of the database file during the current transaction.
  203. ** And this flag says nothing about whether or not the journal
  204. ** has been synced to disk. For pages that are in the original
  205. ** database file, the following expression should always be true:
  206. **
  207. ** inJournal = (pPager->aInJournal[(pgno-1)/8] & (1<<((pgno-1)%8))!=0
  208. **
  209. ** The pPager->aInJournal[] array is only valid for the original
  210. ** pages of the database, not new pages that are added to the end
  211. ** of the database, so obviously the above expression cannot be
  212. ** valid for new pages. For new pages inJournal is always 0.
  213. **
  214. ** dirty
  215. **
  216. ** When true, this means that the content of the page has been
  217. ** modified and needs to be written back to the database file.
  218. ** If false, it means that either the content of the page is
  219. ** unchanged or else the content is unimportant and we do not
  220. ** care whether or not it is preserved.
  221. **
  222. ** alwaysRollback
  223. **
  224. ** This means that the sqlite3PagerDontRollback() API should be
  225. ** ignored for this page. The DontRollback() API attempts to say
  226. ** that the content of the page on disk is unimportant (it is an
  227. ** unused page on the freelist) so that it is unnecessary to
  228. ** rollback changes to this page because the content of the page
  229. ** can change without changing the meaning of the database. This
  230. ** flag overrides any DontRollback() attempt. This flag is set
  231. ** when a page that originally contained valid data is added to
  232. ** the freelist. Later in the same transaction, this page might
  233. ** be pulled from the freelist and reused for something different
  234. ** and at that point the DontRollback() API will be called because
  235. ** pages taken from the freelist do not need to be protected by
  236. ** the rollback journal. But this flag says that the page was
  237. ** not originally part of the freelist so that it still needs to
  238. ** be rolled back in spite of any subsequent DontRollback() calls.
  239. **
  240. ** needRead
  241. **
  242. ** This flag means (when true) that the content of the page has
  243. ** not yet been loaded from disk. The in-memory content is just
  244. ** garbage. (Actually, we zero the content, but you should not
  245. ** make any assumptions about the content nevertheless.) If the
  246. ** content is needed in the future, it should be read from the
  247. ** original database file.
  248. */
  249. struct PgHdr {
  250. Pager *pPager; /* The pager to which this page belongs */
  251. Pgno pgno; /* The page number for this page */
  252. PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
  253. PagerLruLink free; /* Next and previous free pages */
  254. PgHdr *pNextAll; /* A list of all pages */
  255. u8 inJournal; /* TRUE if has been written to journal */
  256. u8 dirty; /* TRUE if we need to write back changes */
  257. u8 needSync; /* Sync journal before writing this page */
  258. u8 alwaysRollback; /* Disable DontRollback() for this page */
  259. u8 needRead; /* Read content if PagerWrite() is called */
  260. short int nRef; /* Number of users of this page */
  261. PgHdr *pDirty, *pPrevDirty; /* Dirty pages */
  262. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  263. PagerLruLink gfree; /* Global list of nRef==0 pages */
  264. #endif
  265. #ifdef SQLITE_CHECK_PAGES
  266. u32 pageHash;
  267. #endif
  268. void *pData; /* Page data */
  269. /* Pager.nExtra bytes of local data appended to this header */
  270. };
  271. /*
  272. ** For an in-memory only database, some extra information is recorded about
  273. ** each page so that changes can be rolled back. (Journal files are not
  274. ** used for in-memory databases.) The following information is added to
  275. ** the end of every EXTRA block for in-memory databases.
  276. **
  277. ** This information could have been added directly to the PgHdr structure.
  278. ** But then it would take up an extra 8 bytes of storage on every PgHdr
  279. ** even for disk-based databases. Splitting it out saves 8 bytes. This
  280. ** is only a savings of 0.8% but those percentages add up.
  281. */
  282. typedef struct PgHistory PgHistory;
  283. struct PgHistory {
  284. u8 *pOrig; /* Original page text. Restore to this on a full rollback */
  285. u8 *pStmt; /* Text as it was at the beginning of the current statement */
  286. PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */
  287. u8 inStmt; /* TRUE if in the statement subjournal */
  288. };
  289. /*
  290. ** A macro used for invoking the codec if there is one
  291. */
  292. #ifdef SQLITE_HAS_CODEC
  293. # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
  294. # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
  295. #else
  296. # define CODEC1(P,D,N,X) /* NO-OP */
  297. # define CODEC2(P,D,N,X) ((char*)D)
  298. #endif
  299. /*
  300. ** Convert a pointer to a PgHdr into a pointer to its data
  301. ** and back again.
  302. */
  303. #define PGHDR_TO_DATA(P) ((P)->pData)
  304. #define PGHDR_TO_EXTRA(G,P) ((void*)&((G)[1]))
  305. #define PGHDR_TO_HIST(P,PGR) \
  306. ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->nExtra])
  307. /*
  308. ** A open page cache is an instance of the following structure.
  309. **
  310. ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
  311. ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
  312. ** and is returned as the result of every major pager API call. The
  313. ** SQLITE_FULL return code is slightly different. It persists only until the
  314. ** next successful rollback is performed on the pager cache. Also,
  315. ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
  316. ** APIs, they may still be used successfully.
  317. */
  318. struct Pager {
  319. sqlite3_vfs *pVfs; /* OS functions to use for IO */
  320. u8 journalOpen; /* True if journal file descriptors is valid */
  321. u8 journalStarted; /* True if header of journal is synced */
  322. u8 useJournal; /* Use a rollback journal on this file */
  323. u8 noReadlock; /* Do not bother to obtain readlocks */
  324. u8 stmtOpen; /* True if the statement subjournal is open */
  325. u8 stmtInUse; /* True we are in a statement subtransaction */
  326. u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/
  327. u8 noSync; /* Do not sync the journal if true */
  328. u8 fullSync; /* Do extra syncs of the journal for robustness */
  329. u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */
  330. u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
  331. u8 tempFile; /* zFilename is a temporary file */
  332. u8 readOnly; /* True for a read-only database */
  333. u8 needSync; /* True if an fsync() is needed on the journal */
  334. u8 dirtyCache; /* True if cached pages have changed */
  335. u8 alwaysRollback; /* Disable DontRollback() for all pages */
  336. u8 memDb; /* True to inhibit all file I/O */
  337. u8 setMaster; /* True if a m-j name has been written to jrnl */
  338. u8 doNotSync; /* Boolean. While true, do not spill the cache */
  339. u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
  340. u8 changeCountDone; /* Set after incrementing the change-counter */
  341. u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */
  342. int errCode; /* One of several kinds of errors */
  343. int dbSize; /* Number of pages in the file */
  344. int origDbSize; /* dbSize before the current change */
  345. int stmtSize; /* Size of database (in pages) at stmt_begin() */
  346. int nRec; /* Number of pages written to the journal */
  347. u32 cksumInit; /* Quasi-random value added to every checksum */
  348. int stmtNRec; /* Number of records in stmt subjournal */
  349. int nExtra; /* Add this many bytes to each in-memory page */
  350. int pageSize; /* Number of bytes in a page */
  351. int nPage; /* Total number of in-memory pages */
  352. int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
  353. int mxPage; /* Maximum number of pages to hold in cache */
  354. Pgno mxPgno; /* Maximum allowed size of the database */
  355. u8 *aInJournal; /* One bit for each page in the database file */
  356. u8 *aInStmt; /* One bit for each page in the database */
  357. char *zFilename; /* Name of the database file */
  358. char *zJournal; /* Name of the journal file */
  359. char *zDirectory; /* Directory hold database and journal files */
  360. char *zStmtJrnl; /* Name of the statement journal file */
  361. sqlite3_file *fd, *jfd; /* File descriptors for database and journal */
  362. sqlite3_file *stfd; /* File descriptor for the statement subjournal*/
  363. BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */
  364. PagerLruList lru; /* LRU list of free pages */
  365. PgHdr *pAll; /* List of all pages */
  366. PgHdr *pStmt; /* List of pages in the statement subjournal */
  367. PgHdr *pDirty; /* List of all dirty pages */
  368. i64 journalOff; /* Current byte offset in the journal file */
  369. i64 journalHdr; /* Byte offset to previous journal header */
  370. i64 stmtHdrOff; /* First journal header written this statement */
  371. i64 stmtCksum; /* cksumInit when statement was started */
  372. i64 stmtJSize; /* Size of journal at stmt_begin() */
  373. int sectorSize; /* Assumed sector size during rollback */
  374. #ifdef SQLITE_TEST
  375. int nHit, nMiss; /* Cache hits and missing */
  376. int nRead, nWrite; /* Database pages read/written */
  377. #endif
  378. void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */
  379. void (*xReiniter)(DbPage*,int); /* Call this routine when reloading pages */
  380. #ifdef SQLITE_HAS_CODEC
  381. void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
  382. void *pCodecArg; /* First argument to xCodec() */
  383. #endif
  384. int nHash; /* Size of the pager hash table */
  385. PgHdr **aHash; /* Hash table to map page number to PgHdr */
  386. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  387. Pager *pNext; /* Doubly linked list of pagers on which */
  388. Pager *pPrev; /* sqlite3_release_memory() will work */
  389. int iInUseMM; /* Non-zero if unavailable to MM */
  390. int iInUseDB; /* Non-zero if in sqlite3_release_memory() */
  391. #endif
  392. char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
  393. char dbFileVers[16]; /* Changes whenever database file changes */
  394. };
  395. /*
  396. ** The following global variables hold counters used for
  397. ** testing purposes only. These variables do not exist in
  398. ** a non-testing build. These variables are not thread-safe.
  399. */
  400. #ifdef SQLITE_TEST
  401. int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */
  402. int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */
  403. int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */
  404. int sqlite3_pager_pgfree_count = 0; /* Number of cache pages freed */
  405. # define PAGER_INCR(v) v++
  406. #else
  407. # define PAGER_INCR(v)
  408. #endif
  409. /*
  410. ** The following variable points to the head of a double-linked list
  411. ** of all pagers that are eligible for page stealing by the
  412. ** sqlite3_release_memory() interface. Access to this list is
  413. ** protected by the SQLITE_MUTEX_STATIC_MEM2 mutex.
  414. */
  415. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  416. static Pager *sqlite3PagerList = 0;
  417. static PagerLruList sqlite3LruPageList = {0, 0, 0};
  418. #endif
  419. /*
  420. ** Journal files begin with the following magic string. The data
  421. ** was obtained from /dev/random. It is used only as a sanity check.
  422. **
  423. ** Since version 2.8.0, the journal format contains additional sanity
  424. ** checking information. If the power fails while the journal is begin
  425. ** written, semi-random garbage data might appear in the journal
  426. ** file after power is restored. If an attempt is then made
  427. ** to roll the journal back, the database could be corrupted. The additional
  428. ** sanity checking data is an attempt to discover the garbage in the
  429. ** journal and ignore it.
  430. **
  431. ** The sanity checking information for the new journal format consists
  432. ** of a 32-bit checksum on each page of data. The checksum covers both
  433. ** the page number and the pPager->pageSize bytes of data for the page.
  434. ** This cksum is initialized to a 32-bit random value that appears in the
  435. ** journal file right after the header. The random initializer is important,
  436. ** because garbage data that appears at the end of a journal is likely
  437. ** data that was once in other files that have now been deleted. If the
  438. ** garbage data came from an obsolete journal file, the checksums might
  439. ** be correct. But by initializing the checksum to random value which
  440. ** is different for every journal, we minimize that risk.
  441. */
  442. static const unsigned char aJournalMagic[] = {
  443. 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
  444. };
  445. /*
  446. ** The size of the header and of each page in the journal is determined
  447. ** by the following macros.
  448. */
  449. #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
  450. /*
  451. ** The journal header size for this pager. In the future, this could be
  452. ** set to some value read from the disk controller. The important
  453. ** characteristic is that it is the same size as a disk sector.
  454. */
  455. #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
  456. /*
  457. ** The macro MEMDB is true if we are dealing with an in-memory database.
  458. ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
  459. ** the value of MEMDB will be a constant and the compiler will optimize
  460. ** out code that would never execute.
  461. */
  462. #ifdef SQLITE_OMIT_MEMORYDB
  463. # define MEMDB 0
  464. #else
  465. # define MEMDB pPager->memDb
  466. #endif
  467. /*
  468. ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
  469. ** reserved for working around a windows/posix incompatibility). It is
  470. ** used in the journal to signify that the remainder of the journal file
  471. ** is devoted to storing a master journal name - there are no more pages to
  472. ** roll back. See comments for function writeMasterJournal() for details.
  473. */
  474. /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
  475. #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
  476. /*
  477. ** The maximum legal page number is (2^31 - 1).
  478. */
  479. #define PAGER_MAX_PGNO 2147483647
  480. /*
  481. ** The pagerEnter() and pagerLeave() routines acquire and release
  482. ** a mutex on each pager. The mutex is recursive.
  483. **
  484. ** This is a special-purpose mutex. It only provides mutual exclusion
  485. ** between the Btree and the Memory Management sqlite3_release_memory()
  486. ** function. It does not prevent, for example, two Btrees from accessing
  487. ** the same pager at the same time. Other general-purpose mutexes in
  488. ** the btree layer handle that chore.
  489. */
  490. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  491. static void pagerEnter(Pager *p){
  492. p->iInUseDB++;
  493. if( p->iInUseMM && p->iInUseDB==1 ){
  494. sqlite3_mutex *mutex;
  495. mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
  496. p->iInUseDB = 0;
  497. sqlite3_mutex_enter(mutex);
  498. p->iInUseDB = 1;
  499. sqlite3_mutex_leave(mutex);
  500. }
  501. assert( p->iInUseMM==0 );
  502. }
  503. static void pagerLeave(Pager *p){
  504. p->iInUseDB--;
  505. assert( p->iInUseDB>=0 );
  506. }
  507. #else
  508. # define pagerEnter(X)
  509. # define pagerLeave(X)
  510. #endif
  511. /*
  512. ** Enable reference count tracking (for debugging) here:
  513. */
  514. #ifdef SQLITE_DEBUG
  515. int pager3_refinfo_enable = 0;
  516. static void pager_refinfo(PgHdr *p){
  517. static int cnt = 0;
  518. if( !pager3_refinfo_enable ) return;
  519. sqlite3DebugPrintf(
  520. "REFCNT: %4d addr=%p nRef=%-3d total=%d\n",
  521. p->pgno, PGHDR_TO_DATA(p), p->nRef, p->pPager->nRef
  522. );
  523. cnt++; /* Something to set a breakpoint on */
  524. }
  525. # define REFINFO(X) pager_refinfo(X)
  526. #else
  527. # define REFINFO(X)
  528. #endif
  529. /*
  530. ** Add page pPg to the end of the linked list managed by structure
  531. ** pList (pPg becomes the last entry in the list - the most recently
  532. ** used). Argument pLink should point to either pPg->free or pPg->gfree,
  533. ** depending on whether pPg is being added to the pager-specific or
  534. ** global LRU list.
  535. */
  536. static void listAdd(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
  537. pLink->pNext = 0;
  538. pLink->pPrev = pList->pLast;
  539. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  540. assert(pLink==&pPg->free || pLink==&pPg->gfree);
  541. assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
  542. #endif
  543. if( pList->pLast ){
  544. int iOff = (char *)pLink - (char *)pPg;
  545. PagerLruLink *pLastLink = (PagerLruLink *)(&((u8 *)pList->pLast)[iOff]);
  546. pLastLink->pNext = pPg;
  547. }else{
  548. assert(!pList->pFirst);
  549. pList->pFirst = pPg;
  550. }
  551. pList->pLast = pPg;
  552. if( !pList->pFirstSynced && pPg->needSync==0 ){
  553. pList->pFirstSynced = pPg;
  554. }
  555. }
  556. /*
  557. ** Remove pPg from the list managed by the structure pointed to by pList.
  558. **
  559. ** Argument pLink should point to either pPg->free or pPg->gfree, depending
  560. ** on whether pPg is being added to the pager-specific or global LRU list.
  561. */
  562. static void listRemove(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
  563. int iOff = (char *)pLink - (char *)pPg;
  564. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  565. assert(pLink==&pPg->free || pLink==&pPg->gfree);
  566. assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
  567. #endif
  568. if( pPg==pList->pFirst ){
  569. pList->pFirst = pLink->pNext;
  570. }
  571. if( pPg==pList->pLast ){
  572. pList->pLast = pLink->pPrev;
  573. }
  574. if( pLink->pPrev ){
  575. PagerLruLink *pPrevLink = (PagerLruLink *)(&((u8 *)pLink->pPrev)[iOff]);
  576. pPrevLink->pNext = pLink->pNext;
  577. }
  578. if( pLink->pNext ){
  579. PagerLruLink *pNextLink = (PagerLruLink *)(&((u8 *)pLink->pNext)[iOff]);
  580. pNextLink->pPrev = pLink->pPrev;
  581. }
  582. if( pPg==pList->pFirstSynced ){
  583. PgHdr *p = pLink->pNext;
  584. while( p && p->needSync ){
  585. PagerLruLink *pL = (PagerLruLink *)(&((u8 *)p)[iOff]);
  586. p = pL->pNext;
  587. }
  588. pList->pFirstSynced = p;
  589. }
  590. pLink->pNext = pLink->pPrev = 0;
  591. }
  592. /*
  593. ** Add page pPg to the list of free pages for the pager. If
  594. ** memory-management is enabled, also add the page to the global
  595. ** list of free pages.
  596. */
  597. static void lruListAdd(PgHdr *pPg){
  598. listAdd(&pPg->pPager->lru, &pPg->free, pPg);
  599. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  600. if( !pPg->pPager->memDb ){
  601. sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
  602. listAdd(&sqlite3LruPageList, &pPg->gfree, pPg);
  603. sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
  604. }
  605. #endif
  606. }
  607. /*
  608. ** Remove page pPg from the list of free pages for the associated pager.
  609. ** If memory-management is enabled, also remove pPg from the global list
  610. ** of free pages.
  611. */
  612. static void lruListRemove(PgHdr *pPg){
  613. listRemove(&pPg->pPager->lru, &pPg->free, pPg);
  614. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  615. if( !pPg->pPager->memDb ){
  616. sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
  617. listRemove(&sqlite3LruPageList, &pPg->gfree, pPg);
  618. sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
  619. }
  620. #endif
  621. }
  622. /*
  623. ** This function is called just after the needSync flag has been cleared
  624. ** from all pages managed by pPager (usually because the journal file
  625. ** has just been synced). It updates the pPager->lru.pFirstSynced variable
  626. ** and, if memory-management is enabled, the sqlite3LruPageList.pFirstSynced
  627. ** variable also.
  628. */
  629. static void lruListSetFirstSynced(Pager *pPager){
  630. pPager->lru.pFirstSynced = pPager->lru.pFirst;
  631. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  632. if( !pPager->memDb ){
  633. PgHdr *p;
  634. sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
  635. for(p=sqlite3LruPageList.pFirst; p && p->needSync; p=p->gfree.pNext);
  636. assert(p==pPager->lru.pFirstSynced || p==sqlite3LruPageList.pFirstSynced);
  637. sqlite3LruPageList.pFirstSynced = p;
  638. sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
  639. }
  640. #endif
  641. }
  642. /*
  643. ** Return true if page *pPg has already been written to the statement
  644. ** journal (or statement snapshot has been created, if *pPg is part
  645. ** of an in-memory database).
  646. */
  647. static int pageInStatement(PgHdr *pPg){
  648. Pager *pPager = pPg->pPager;
  649. if( MEMDB ){
  650. return PGHDR_TO_HIST(pPg, pPager)->inStmt;
  651. }else{
  652. Pgno pgno = pPg->pgno;
  653. u8 *a = pPager->aInStmt;
  654. return (a && (int)pgno<=pPager->stmtSize && (a[pgno/8] & (1<<(pgno&7))));
  655. }
  656. }
  657. /*
  658. ** Change the size of the pager hash table to N. N must be a power
  659. ** of two.
  660. */
  661. static void pager_resize_hash_table(Pager *pPager, int N){
  662. PgHdr **aHash, *pPg;
  663. assert( N>0 && (N&(N-1))==0 );
  664. pagerLeave(pPager);
  665. sqlite3MallocBenignFailure((int)pPager->aHash);
  666. aHash = sqlite3MallocZero( sizeof(aHash[0])*N );
  667. pagerEnter(pPager);
  668. if( aHash==0 ){
  669. /* Failure to rehash is not an error. It is only a performance hit. */
  670. return;
  671. }
  672. sqlite3_free(pPager->aHash);
  673. pPager->nHash = N;
  674. pPager->aHash = aHash;
  675. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  676. int h;
  677. if( pPg->pgno==0 ){
  678. assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
  679. continue;
  680. }
  681. h = pPg->pgno & (N-1);
  682. pPg->pNextHash = aHash[h];
  683. if( aHash[h] ){
  684. aHash[h]->pPrevHash = pPg;
  685. }
  686. aHash[h] = pPg;
  687. pPg->pPrevHash = 0;
  688. }
  689. }
  690. /*
  691. ** Read a 32-bit integer from the given file descriptor. Store the integer
  692. ** that is read in *pRes. Return SQLITE_OK if everything worked, or an
  693. ** error code is something goes wrong.
  694. **
  695. ** All values are stored on disk as big-endian.
  696. */
  697. static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
  698. unsigned char ac[4];
  699. int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
  700. if( rc==SQLITE_OK ){
  701. *pRes = sqlite3Get4byte(ac);
  702. }
  703. return rc;
  704. }
  705. /*
  706. ** Write a 32-bit integer into a string buffer in big-endian byte order.
  707. */
  708. #define put32bits(A,B) sqlite3Put4byte((u8*)A,B)
  709. /*
  710. ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
  711. ** on success or an error code is something goes wrong.
  712. */
  713. static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
  714. char ac[4];
  715. put32bits(ac, val);
  716. return sqlite3OsWrite(fd, ac, 4, offset);
  717. }
  718. /*
  719. ** If file pFd is open, call sqlite3OsUnlock() on it.
  720. */
  721. static int osUnlock(sqlite3_file *pFd, int eLock){
  722. if( !pFd->pMethods ){
  723. return SQLITE_OK;
  724. }
  725. return sqlite3OsUnlock(pFd, eLock);
  726. }
  727. /*
  728. ** This function determines whether or not the atomic-write optimization
  729. ** can be used with this pager. The optimization can be used if:
  730. **
  731. ** (a) the value returned by OsDeviceCharacteristics() indicates that
  732. ** a database page may be written atomically, and
  733. ** (b) the value returned by OsSectorSize() is less than or equal
  734. ** to the page size.
  735. **
  736. ** If the optimization cannot be used, 0 is returned. If it can be used,
  737. ** then the value returned is the size of the journal file when it
  738. ** contains rollback data for exactly one page.
  739. */
  740. #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  741. static int jrnlBufferSize(Pager *pPager){
  742. int dc; /* Device characteristics */
  743. int nSector; /* Sector size */
  744. int nPage; /* Page size */
  745. sqlite3_file *fd = pPager->fd;
  746. if( fd->pMethods ){
  747. dc = sqlite3OsDeviceCharacteristics(fd);
  748. nSector = sqlite3OsSectorSize(fd);
  749. nPage = pPager->pageSize;
  750. }
  751. assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
  752. assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
  753. if( !fd->pMethods || (dc&(SQLITE_IOCAP_ATOMIC|(nPage>>8))&&nSector<=nPage) ){
  754. return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
  755. }
  756. return 0;
  757. }
  758. #endif
  759. /*
  760. ** This function should be called when an error occurs within the pager
  761. ** code. The first argument is a pointer to the pager structure, the
  762. ** second the error-code about to be returned by a pager API function.
  763. ** The value returned is a copy of the second argument to this function.
  764. **
  765. ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
  766. ** the error becomes persistent. Until the persisten error is cleared,
  767. ** subsequent API calls on this Pager will immediately return the same
  768. ** error code.
  769. **
  770. ** A persistent error indicates that the contents of the pager-cache
  771. ** cannot be trusted. This state can be cleared by completely discarding
  772. ** the contents of the pager-cache. If a transaction was active when
  773. ** the persistent error occured, then the rollback journal may need
  774. ** to be replayed.
  775. */
  776. static void pager_unlock(Pager *pPager);
  777. static int pager_error(Pager *pPager, int rc){
  778. int rc2 = rc & 0xff;
  779. assert(
  780. pPager->errCode==SQLITE_FULL ||
  781. pPager->errCode==SQLITE_OK ||
  782. (pPager->errCode & 0xff)==SQLITE_IOERR
  783. );
  784. if(
  785. rc2==SQLITE_FULL ||
  786. rc2==SQLITE_IOERR ||
  787. rc2==SQLITE_CORRUPT
  788. ){
  789. pPager->errCode = rc;
  790. if( pPager->state==PAGER_UNLOCK && pPager->nRef==0 ){
  791. /* If the pager is already unlocked, call pager_unlock() now to
  792. ** clear the error state and ensure that the pager-cache is
  793. ** completely empty.
  794. */
  795. pager_unlock(pPager);
  796. }
  797. }
  798. return rc;
  799. }
  800. /*
  801. ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
  802. ** on the cache using a hash function. This is used for testing
  803. ** and debugging only.
  804. */
  805. #ifdef SQLITE_CHECK_PAGES
  806. /*
  807. ** Return a 32-bit hash of the page data for pPage.
  808. */
  809. static u32 pager_datahash(int nByte, unsigned char *pData){
  810. u32 hash = 0;
  811. int i;
  812. for(i=0; i<nByte; i++){
  813. hash = (hash*1039) + pData[i];
  814. }
  815. return hash;
  816. }
  817. static u32 pager_pagehash(PgHdr *pPage){
  818. return pager_datahash(pPage->pPager->pageSize,
  819. (unsigned char *)PGHDR_TO_DATA(pPage));
  820. }
  821. /*
  822. ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
  823. ** is defined, and NDEBUG is not defined, an assert() statement checks
  824. ** that the page is either dirty or still matches the calculated page-hash.
  825. */
  826. #define CHECK_PAGE(x) checkPage(x)
  827. static void checkPage(PgHdr *pPg){
  828. Pager *pPager = pPg->pPager;
  829. assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty ||
  830. pPg->pageHash==pager_pagehash(pPg) );
  831. }
  832. #else
  833. #define pager_datahash(X,Y) 0
  834. #define pager_pagehash(X) 0
  835. #define CHECK_PAGE(x)
  836. #endif
  837. /*
  838. ** When this is called the journal file for pager pPager must be open.
  839. ** The master journal file name is read from the end of the file and
  840. ** written into memory supplied by the caller.
  841. **
  842. ** zMaster must point to a buffer of at least nMaster bytes allocated by
  843. ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
  844. ** enough space to write the master journal name). If the master journal
  845. ** name in the journal is longer than nMaster bytes (including a
  846. ** nul-terminator), then this is handled as if no master journal name
  847. ** were present in the journal.
  848. **
  849. ** If no master journal file name is present zMaster[0] is set to 0 and
  850. ** SQLITE_OK returned.
  851. */
  852. static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
  853. int rc;
  854. u32 len;
  855. i64 szJ;
  856. u32 cksum;
  857. int i;
  858. unsigned char aMagic[8]; /* A buffer to hold the magic header */
  859. zMaster[0] = '\0';
  860. rc = sqlite3OsFileSize(pJrnl, &szJ);
  861. if( rc!=SQLITE_OK || szJ<16 ) return rc;
  862. rc = read32bits(pJrnl, szJ-16, &len);
  863. if( rc!=SQLITE_OK ) return rc;
  864. if( len>=nMaster ){
  865. return SQLITE_OK;
  866. }
  867. rc = read32bits(pJrnl, szJ-12, &cksum);
  868. if( rc!=SQLITE_OK ) return rc;
  869. rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
  870. if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
  871. rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
  872. if( rc!=SQLITE_OK ){
  873. return rc;
  874. }
  875. zMaster[len] = '\0';
  876. /* See if the checksum matches the master journal name */
  877. for(i=0; i<len; i++){
  878. cksum -= zMaster[i];
  879. }
  880. if( cksum ){
  881. /* If the checksum doesn't add up, then one or more of the disk sectors
  882. ** containing the master journal filename is corrupted. This means
  883. ** definitely roll back, so just return SQLITE_OK and report a (nul)
  884. ** master-journal filename.
  885. */
  886. zMaster[0] = '\0';
  887. }
  888. return SQLITE_OK;
  889. }
  890. /*
  891. ** Seek the journal file descriptor to the next sector boundary where a
  892. ** journal header may be read or written. Pager.journalOff is updated with
  893. ** the new seek offset.
  894. **
  895. ** i.e for a sector size of 512:
  896. **
  897. ** Input Offset Output Offset
  898. ** ---------------------------------------
  899. ** 0 0
  900. ** 512 512
  901. ** 100 512
  902. ** 2000 2048
  903. **
  904. */
  905. static void seekJournalHdr(Pager *pPager){
  906. i64 offset = 0;
  907. i64 c = pPager->journalOff;
  908. if( c ){
  909. offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
  910. }
  911. assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
  912. assert( offset>=c );
  913. assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
  914. pPager->journalOff = offset;
  915. }
  916. /*
  917. ** The journal file must be open when this routine is called. A journal
  918. ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
  919. ** current location.
  920. **
  921. ** The format for the journal header is as follows:
  922. ** - 8 bytes: Magic identifying journal format.
  923. ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
  924. ** - 4 bytes: Random number used for page hash.
  925. ** - 4 bytes: Initial database page count.
  926. ** - 4 bytes: Sector size used by the process that wrote this journal.
  927. **
  928. ** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space.
  929. */
  930. static int writeJournalHdr(Pager *pPager){
  931. char zHeader[sizeof(aJournalMagic)+16];
  932. int rc;
  933. if( pPager->stmtHdrOff==0 ){
  934. pPager->stmtHdrOff = pPager->journalOff;
  935. }
  936. seekJournalHdr(pPager);
  937. pPager->journalHdr = pPager->journalOff;
  938. memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
  939. /*
  940. ** Write the nRec Field - the number of page records that follow this
  941. ** journal header. Normally, zero is written to this value at this time.
  942. ** After the records are added to the journal (and the journal synced,
  943. ** if in full-sync mode), the zero is overwritten with the true number
  944. ** of records (see syncJournal()).
  945. **
  946. ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
  947. ** reading the journal this value tells SQLite to assume that the
  948. ** rest of the journal file contains valid page records. This assumption
  949. ** is dangerous, as if a failure occured whilst writing to the journal
  950. ** file it may contain some garbage data. There are two scenarios
  951. ** where this risk can be ignored:
  952. **
  953. ** * When the pager is in no-sync mode. Corruption can follow a
  954. ** power failure in this case anyway.
  955. **
  956. ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
  957. ** that garbage data is never appended to the journal file.
  958. */
  959. assert(pPager->fd->pMethods||pPager->noSync);
  960. if( (pPager->noSync)
  961. || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
  962. ){
  963. put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
  964. }else{
  965. put32bits(&zHeader[sizeof(aJournalMagic)], 0);
  966. }
  967. /* The random check-hash initialiser */
  968. sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
  969. put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
  970. /* The initial database size */
  971. put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
  972. /* The assumed sector size for this process */
  973. put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
  974. IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, sizeof(zHeader)))
  975. rc = sqlite3OsWrite(pPager->jfd, zHeader, sizeof(zHeader),pPager->journalOff);
  976. pPager->journalOff += JOURNAL_HDR_SZ(pPager);
  977. /* The journal header has been written successfully. Seek the journal
  978. ** file descriptor to the end of the journal header sector.
  979. */
  980. if( rc==SQLITE_OK ){
  981. IOTRACE(("JTAIL %p %lld\n", pPager, pPager->journalOff-1))
  982. rc = sqlite3OsWrite(pPager->jfd, "\000", 1, pPager->journalOff-1);
  983. }
  984. return rc;
  985. }
  986. /*
  987. ** The journal file must be open when this is called. A journal header file
  988. ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
  989. ** file. See comments above function writeJournalHdr() for a description of
  990. ** the journal header format.
  991. **
  992. ** If the header is read successfully, *nRec is set to the number of
  993. ** page records following this header and *dbSize is set to the size of the
  994. ** database before the transaction began, in pages. Also, pPager->cksumInit
  995. ** is set to the value read from the journal header. SQLITE_OK is returned
  996. ** in this case.
  997. **
  998. ** If the journal header file appears to be corrupted, SQLITE_DONE is
  999. ** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes
  1000. ** cannot be read from the journal file an error code is returned.
  1001. */
  1002. static int readJournalHdr(
  1003. Pager *pPager,
  1004. i64 journalSize,
  1005. u32 *pNRec,
  1006. u32 *pDbSize
  1007. ){
  1008. int rc;
  1009. unsigned char aMagic[8]; /* A buffer to hold the magic header */
  1010. i64 jrnlOff;
  1011. seekJournalHdr(pPager);
  1012. if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
  1013. return SQLITE_DONE;
  1014. }
  1015. jrnlOff = pPager->journalOff;
  1016. rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff);
  1017. if( rc ) return rc;
  1018. jrnlOff += sizeof(aMagic);
  1019. if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
  1020. return SQLITE_DONE;
  1021. }
  1022. rc = read32bits(pPager->jfd, jrnlOff, pNRec);
  1023. if( rc ) return rc;
  1024. rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit);
  1025. if( rc ) return rc;
  1026. rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize);
  1027. if( rc ) return rc;
  1028. /* Update the assumed sector-size to match the value used by
  1029. ** the process that created this journal. If this journal was
  1030. ** created by a process other than this one, then this routine
  1031. ** is being called from within pager_playback(). The local value
  1032. ** of Pager.sectorSize is restored at the end of that routine.
  1033. */
  1034. rc = read32bits(pPager->jfd, jrnlOff+12, (u32 *)&pPager->sectorSize);
  1035. if( rc ) return rc;
  1036. pPager->journalOff += JOURNAL_HDR_SZ(pPager);
  1037. return SQLITE_OK;
  1038. }
  1039. /*
  1040. ** Write the supplied master journal name into the journal file for pager
  1041. ** pPager at the current location. The master journal name must be the last
  1042. ** thing written to a journal file. If the pager is in full-sync mode, the
  1043. ** journal file descriptor is advanced to the next sector boundary before
  1044. ** anything is written. The format is:
  1045. **
  1046. ** + 4 bytes: PAGER_MJ_PGNO.
  1047. ** + N bytes: length of master journal name.
  1048. ** + 4 bytes: N
  1049. ** + 4 bytes: Master journal name checksum.
  1050. ** + 8 bytes: aJournalMagic[].
  1051. **
  1052. ** The master journal page checksum is the sum of the bytes in the master
  1053. ** journal name.
  1054. **
  1055. ** If zMaster is a NULL pointer (occurs for a single database transaction),
  1056. ** this call is a no-op.
  1057. */
  1058. static int writeMasterJournal(Pager *pPager, const char *zMaster){
  1059. int rc;
  1060. int len;
  1061. int i;
  1062. i64 jrnlOff;
  1063. u32 cksum = 0;
  1064. char zBuf[sizeof(aJournalMagic)+2*4];
  1065. if( !zMaster || pPager->setMaster) return SQLITE_OK;
  1066. pPager->setMaster = 1;
  1067. len = strlen(zMaster);
  1068. for(i=0; i<len; i++){
  1069. cksum += zMaster[i];
  1070. }
  1071. /* If in full-sync mode, advance to the next disk sector before writing
  1072. ** the master journal name. This is in case the previous page written to
  1073. ** the journal has already been synced.
  1074. */
  1075. if( pPager->fullSync ){
  1076. seekJournalHdr(pPager);
  1077. }
  1078. jrnlOff = pPager->journalOff;
  1079. pPager->journalOff += (len+20);
  1080. rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager));
  1081. if( rc!=SQLITE_OK ) return rc;
  1082. jrnlOff += 4;
  1083. rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff);
  1084. if( rc!=SQLITE_OK ) return rc;
  1085. jrnlOff += len;
  1086. put32bits(zBuf, len);
  1087. put32bits(&zBuf[4], cksum);
  1088. memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
  1089. rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff);
  1090. pPager->needSync = !pPager->noSync;
  1091. return rc;
  1092. }
  1093. /*
  1094. ** Add or remove a page from the list of all pages that are in the
  1095. ** statement journal.
  1096. **
  1097. ** The Pager keeps a separate list of pages that are currently in
  1098. ** the statement journal. This helps the sqlite3PagerStmtCommit()
  1099. ** routine run MUCH faster for the common case where there are many
  1100. ** pages in memory but only a few are in the statement journal.
  1101. */
  1102. static void page_add_to_stmt_list(PgHdr *pPg){
  1103. Pager *pPager = pPg->pPager;
  1104. PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  1105. assert( MEMDB );
  1106. if( !pHist->inStmt ){
  1107. assert( pHist->pPrevStmt==0 && pHist->pNextStmt==0 );
  1108. if( pPager->pStmt ){
  1109. PGHDR_TO_HIST(pPager->pStmt, pPager)->pPrevStmt = pPg;
  1110. }
  1111. pHist->pNextStmt = pPager->pStmt;
  1112. pPager->pStmt = pPg;
  1113. pHist->inStmt = 1;
  1114. }
  1115. }
  1116. /*
  1117. ** Find a page in the hash table given its page number. Return
  1118. ** a pointer to the page or NULL if not found.
  1119. */
  1120. static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
  1121. PgHdr *p;
  1122. if( pPager->aHash==0 ) return 0;
  1123. p = pPager->aHash[pgno & (pPager->nHash-1)];
  1124. while( p && p->pgno!=pgno ){
  1125. p = p->pNextHash;
  1126. }
  1127. return p;
  1128. }
  1129. /*
  1130. ** Clear the in-memory cache. This routine
  1131. ** sets the state of the pager back to what it was when it was first
  1132. ** opened. Any outstanding pages are invalidated and subsequent attempts
  1133. ** to access those pages will likely result in a coredump.
  1134. */
  1135. static void pager_reset(Pager *pPager){
  1136. PgHdr *pPg, *pNext;
  1137. if( pPager->errCode ) return;
  1138. for(pPg=pPager->pAll; pPg; pPg=pNext){
  1139. IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
  1140. PAGER_INCR(sqlite3_pager_pgfree_count);
  1141. pNext = pPg->pNextAll;
  1142. lruListRemove(pPg);
  1143. sqlite3_free(pPg);
  1144. }
  1145. assert(pPager->lru.pFirst==0);
  1146. assert(pPager->lru.pFirstSynced==0);
  1147. assert(pPager->lru.pLast==0);
  1148. pPager->pStmt = 0;
  1149. pPager->pAll = 0;
  1150. pPager->pDirty = 0;
  1151. pPager->nHash = 0;
  1152. sqlite3_free(pPager->aHash);
  1153. pPager->nPage = 0;
  1154. pPager->aHash = 0;
  1155. pPager->nRef = 0;
  1156. }
  1157. /*
  1158. ** Unlock the database file.
  1159. **
  1160. ** If the pager is currently in error state, discard the contents of
  1161. ** the cache and reset the Pager structure internal state. If there is
  1162. ** an open journal-file, then the next time a shared-lock is obtained
  1163. ** on the pager file (by this or any other process), it will be
  1164. ** treated as a hot-journal and rolled back.
  1165. */
  1166. static void pager_unlock(Pager *pPager){
  1167. if( !pPager->exclusiveMode ){
  1168. if( !MEMDB ){
  1169. if( pPager->fd->pMethods ){
  1170. osUnlock(pPager->fd, NO_LOCK);
  1171. }
  1172. pPager->dbSize = -1;
  1173. IOTRACE(("UNLOCK %p\n", pPager))
  1174. /* If Pager.errCode is set, the contents of the pager cache cannot be
  1175. ** trusted. Now that the pager file is unlocked, the contents of the
  1176. ** cache can be discarded and the error code safely cleared.
  1177. */
  1178. if( pPager->errCode ){
  1179. pPager->errCode = SQLITE_OK;
  1180. pager_reset(pPager);
  1181. if( pPager->stmtOpen ){
  1182. sqlite3OsClose(pPager->stfd);
  1183. sqlite3_free(pPager->aInStmt);
  1184. pPager->aInStmt = 0;
  1185. }
  1186. if( pPager->journalOpen ){
  1187. sqlite3OsClose(pPager->jfd);
  1188. pPager->journalOpen = 0;
  1189. sqlite3_free(pPager->aInJournal);
  1190. pPager->aInJournal = 0;
  1191. }
  1192. pPager->stmtOpen = 0;
  1193. pPager->stmtInUse = 0;
  1194. pPager->journalOff = 0;
  1195. pPager->journalStarted = 0;
  1196. pPager->stmtAutoopen = 0;
  1197. pPager->origDbSize = 0;
  1198. }
  1199. }
  1200. if( !MEMDB || pPager->errCode==SQLITE_OK ){
  1201. pPager->state = PAGER_UNLOCK;
  1202. pPager->changeCountDone = 0;
  1203. }
  1204. }
  1205. }
  1206. /*
  1207. ** Execute a rollback if a transaction is active and unlock the
  1208. ** database file. If the pager has already entered the error state,
  1209. ** do not attempt the rollback.
  1210. */
  1211. static void pagerUnlockAndRollback(Pager *p){
  1212. assert( p->state>=PAGER_RESERVED || p->journalOpen==0 );
  1213. if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){
  1214. sqlite3PagerRollback(p);
  1215. }
  1216. pager_unlock(p);
  1217. assert( p->errCode || !p->journalOpen || (p->exclusiveMode&&!p->journalOff) );
  1218. assert( p->errCode || !p->stmtOpen || p->exclusiveMode );
  1219. }
  1220. /*
  1221. ** This routine ends a transaction. A transaction is ended by either
  1222. ** a COMMIT or a ROLLBACK.
  1223. **
  1224. ** When this routine is called, the pager has the journal file open and
  1225. ** a RESERVED or EXCLUSIVE lock on the database. This routine will release
  1226. ** the database lock and acquires a SHARED lock in its place if that is
  1227. ** the appropriate thing to do. Release locks usually is appropriate,
  1228. ** unless we are in exclusive access mode or unless this is a
  1229. ** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
  1230. **
  1231. ** The journal file is either deleted or truncated.
  1232. **
  1233. ** TODO: Consider keeping the journal file open for temporary databases.
  1234. ** This might give a performance improvement on windows where opening
  1235. ** a file is an expensive operation.
  1236. */
  1237. static int pager_end_transaction(Pager *pPager){
  1238. PgHdr *pPg;
  1239. int rc = SQLITE_OK;
  1240. int rc2 = SQLITE_OK;
  1241. assert( !MEMDB );
  1242. if( pPager->state<PAGER_RESERVED ){
  1243. return SQLITE_OK;
  1244. }
  1245. sqlite3PagerStmtCommit(pPager);
  1246. if( pPager->stmtOpen && !pPager->exclusiveMode ){
  1247. sqlite3OsClose(pPager->stfd);
  1248. pPager->stmtOpen = 0;
  1249. }
  1250. if( pPager->journalOpen ){
  1251. if( pPager->exclusiveMode
  1252. && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){;
  1253. pPager->journalOff = 0;
  1254. pPager->journalStarted = 0;
  1255. }else{
  1256. sqlite3OsClose(pPager->jfd);
  1257. pPager->journalOpen = 0;
  1258. if( rc==SQLITE_OK ){
  1259. rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
  1260. }
  1261. }
  1262. sqlite3_free( pPager->aInJournal );
  1263. pPager->aInJournal = 0;
  1264. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  1265. pPg->inJournal = 0;
  1266. pPg->dirty = 0;
  1267. pPg->needSync = 0;
  1268. pPg->alwaysRollback = 0;
  1269. #ifdef SQLITE_CHECK_PAGES
  1270. pPg->pageHash = pager_pagehash(pPg);
  1271. #endif
  1272. }
  1273. pPager->pDirty = 0;
  1274. pPager->dirtyCache = 0;
  1275. pPager->nRec = 0;
  1276. }else{
  1277. assert( pPager->aInJournal==0 );
  1278. assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
  1279. }
  1280. if( !pPager->exclusiveMode ){
  1281. rc2 = osUnlock(pPager->fd, SHARED_LOCK);
  1282. pPager->state = PAGER_SHARED;
  1283. }else if( pPager->state==PAGER_SYNCED ){
  1284. pPager->state = PAGER_EXCLUSIVE;
  1285. }
  1286. pPager->origDbSize = 0;
  1287. pPager->setMaster = 0;
  1288. pPager->needSync = 0;
  1289. lruListSetFirstSynced(pPager);
  1290. pPager->dbSize = -1;
  1291. return (rc==SQLITE_OK?rc2:rc);
  1292. }
  1293. /*
  1294. ** Compute and return a checksum for the page of data.
  1295. **
  1296. ** This is not a real checksum. It is really just the sum of the
  1297. ** random initial value and the page number. We experimented with
  1298. ** a checksum of the entire data, but that was found to be too slow.
  1299. **
  1300. ** Note that the page number is stored at the beginning of data and
  1301. ** the checksum is stored at the end. This is important. If journal
  1302. ** corruption occurs due to a power failure, the most likely scenario
  1303. ** is that one end or the other of the record will be changed. It is
  1304. ** much less likely that the two ends of the journal record will be
  1305. ** correct and the middle be corrupt. Thus, this "checksum" scheme,
  1306. ** though fast and simple, catches the mostly likely kind of corruption.
  1307. **
  1308. ** FIX ME: Consider adding every 200th (or so) byte of the data to the
  1309. ** checksum. That way if a single page spans 3 or more disk sectors and
  1310. ** only the middle sector is corrupt, we will still have a reasonable
  1311. ** chance of failing the checksum and thus detecting the problem.
  1312. */
  1313. static u32 pager_cksum(Pager *pPager, const u8 *aData){
  1314. u32 cksum = pPager->cksumInit;
  1315. int i = pPager->pageSize-200;
  1316. while( i>0 ){
  1317. cksum += aData[i];
  1318. i -= 200;
  1319. }
  1320. return cksum;
  1321. }
  1322. /* Forward declaration */
  1323. static void makeClean(PgHdr*);
  1324. /*
  1325. ** Read a single page from the journal file opened on file descriptor
  1326. ** jfd. Playback this one page.
  1327. **
  1328. ** If useCksum==0 it means this journal does not use checksums. Checksums
  1329. ** are not used in statement journals because statement journals do not
  1330. ** need to survive power failures.
  1331. */
  1332. static int pager_playback_one_page(
  1333. Pager *pPager,
  1334. sqlite3_file *jfd,
  1335. i64 offset,
  1336. int useCksum
  1337. ){
  1338. int rc;
  1339. PgHdr *pPg; /* An existing page in the cache */
  1340. Pgno pgno; /* The page number of a page in journal */
  1341. u32 cksum; /* Checksum used for sanity checking */
  1342. u8 *aData = (u8 *)pPager->pTmpSpace; /* Temp storage for a page */
  1343. /* useCksum should be true for the main journal and false for
  1344. ** statement journals. Verify that this is always the case
  1345. */
  1346. assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) );
  1347. assert( aData );
  1348. rc = read32bits(jfd, offset, &pgno);
  1349. if( rc!=SQLITE_OK ) return rc;
  1350. rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
  1351. if( rc!=SQLITE_OK ) return rc;
  1352. pPager->journalOff += pPager->pageSize + 4;
  1353. /* Sanity checking on the page. This is more important that I originally
  1354. ** thought. If a power failure occurs while the journal is being written,
  1355. ** it could cause invalid data to be written into the journal. We need to
  1356. ** detect this invalid data (with high probability) and ignore it.
  1357. */
  1358. if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
  1359. return SQLITE_DONE;
  1360. }
  1361. if( pgno>(unsigned)pPager->dbSize ){
  1362. return SQLITE_OK;
  1363. }
  1364. if( useCksum ){
  1365. rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
  1366. if( rc ) return rc;
  1367. pPager->journalOff += 4;
  1368. if( pager_cksum(pPager, aData)!=cksum ){
  1369. return SQLITE_DONE;
  1370. }
  1371. }
  1372. assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
  1373. /* If the pager is in RESERVED state, then there must be a copy of this
  1374. ** page in the pager cache. In this case just update the pager cache,
  1375. ** not the database file. The page is left marked dirty in this case.
  1376. **
  1377. ** An exception to the above rule: If the database is in no-sync mode
  1378. ** and a page is moved during an incremental vacuum then the page may
  1379. ** not be in the pager cache. Later: if a malloc() or IO error occurs
  1380. ** during a Movepage() call, then the page may not be in the cache
  1381. ** either. So the condition described in the above paragraph is not
  1382. ** assert()able.
  1383. **
  1384. ** If in EXCLUSIVE state, then we update the pager cache if it exists
  1385. ** and the main file. The page is then marked not dirty.
  1386. **
  1387. ** Ticket #1171: The statement journal might contain page content that is
  1388. ** different from the page content at the start of the transaction.
  1389. ** This occurs when a page is changed prior to the start of a statement
  1390. ** then changed again within the statement. When rolling back such a
  1391. ** statement we must not write to the original database unless we know
  1392. ** for certain that original page contents are synced into the main rollback
  1393. ** journal. Otherwise, a power loss might leave modified data in the
  1394. ** database file without an entry in the rollback journal that can
  1395. ** restore the database to its original form. Two conditions must be
  1396. ** met before writing to the database files. (1) the database must be
  1397. ** locked. (2) we know that the original page content is fully synced
  1398. ** in the main journal either because the page is not in cache or else
  1399. ** the page is marked as needSync==0.
  1400. */
  1401. pPg = pager_lookup(pPager, pgno);
  1402. PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n",
  1403. PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData));
  1404. if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){
  1405. i64 offset = (pgno-1)*(i64)pPager->pageSize;
  1406. rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, offset);
  1407. if( pPg ){
  1408. makeClean(pPg);
  1409. }
  1410. }
  1411. if( pPg ){
  1412. /* No page should ever be explicitly rolled back that is in use, except
  1413. ** for page 1 which is held in use in order to keep the lock on the
  1414. ** database active. However such a page may be rolled back as a result
  1415. ** of an internal error resulting in an automatic call to
  1416. ** sqlite3PagerRollback().
  1417. */
  1418. void *pData;
  1419. /* assert( pPg->nRef==0 || pPg->pgno==1 ); */
  1420. pData = PGHDR_TO_DATA(pPg);
  1421. memcpy(pData, aData, pPager->pageSize);
  1422. if( pPager->xReiniter ){
  1423. pPager->xReiniter(pPg, pPager->pageSize);
  1424. }
  1425. #ifdef SQLITE_CHECK_PAGES
  1426. pPg->pageHash = pager_pagehash(pPg);
  1427. #endif
  1428. /* If this was page 1, then restore the value of Pager.dbFileVers.
  1429. ** Do this before any decoding. */
  1430. if( pgno==1 ){
  1431. memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
  1432. }
  1433. /* Decode the page just read from disk */
  1434. CODEC1(pPager, pData, pPg->pgno, 3);
  1435. }
  1436. return rc;
  1437. }
  1438. /*
  1439. ** Parameter zMaster is the name of a master journal file. A single journal
  1440. ** file that referred to the master journal file has just been rolled back.
  1441. ** This routine checks if it is possible to delete the master journal file,
  1442. ** and does so if it is.
  1443. **
  1444. ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not
  1445. ** available for use within this function.
  1446. **
  1447. **
  1448. ** The master journal file contains the names of all child journals.
  1449. ** To tell if a master journal can be deleted, check to each of the
  1450. ** children. If all children are either missing or do not refer to
  1451. ** a different master journal, then this master journal can be deleted.
  1452. */
  1453. static int pager_delmaster(Pager *pPager, const char *zMaster){
  1454. sqlite3_vfs *pVfs = pPager->pVfs;
  1455. int rc;
  1456. int master_open = 0;
  1457. sqlite3_file *pMaster;
  1458. sqlite3_file *pJournal;
  1459. char *zMasterJournal = 0; /* Contents of master journal file */
  1460. i64 nMasterJournal; /* Size of master journal file */
  1461. /* Open the master journal file exclusively in case some other process
  1462. ** is running this routine also. Not that it makes too much difference.
  1463. */
  1464. pMaster = (sqlite3_file *)sqlite3_malloc(pVfs->szOsFile * 2);
  1465. pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
  1466. if( !pMaster ){
  1467. rc = SQLITE_NOMEM;
  1468. }else{
  1469. int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
  1470. rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
  1471. }
  1472. if( rc!=SQLITE_OK ) goto delmaster_out;
  1473. master_open = 1;
  1474. rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
  1475. if( rc!=SQLITE_OK ) goto delmaster_out;
  1476. if( nMasterJournal>0 ){
  1477. char *zJournal;
  1478. char *zMasterPtr = 0;
  1479. int nMasterPtr = pPager->pVfs->mxPathname+1;
  1480. /* Load the entire master journal file into space obtained from
  1481. ** sqlite3_malloc() and pointed to by zMasterJournal.
  1482. */
  1483. zMasterJournal = (char *)sqlite3_malloc(nMasterJournal + nMasterPtr);
  1484. if( !zMasterJournal ){
  1485. rc = SQLITE_NOMEM;
  1486. goto delmaster_out;
  1487. }
  1488. zMasterPtr = &zMasterJournal[nMasterJournal];
  1489. rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0);
  1490. if( rc!=SQLITE_OK ) goto delmaster_out;
  1491. zJournal = zMasterJournal;
  1492. while( (zJournal-zMasterJournal)<nMasterJournal ){
  1493. if( sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS) ){
  1494. /* One of the journals pointed to by the master journal exists.
  1495. ** Open it and check if it points at the master journal. If
  1496. ** so, return without deleting the master journal file.
  1497. */
  1498. int c;
  1499. int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
  1500. rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
  1501. if( rc!=SQLITE_OK ){
  1502. goto delmaster_out;
  1503. }
  1504. rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
  1505. sqlite3OsClose(pJournal);
  1506. if( rc!=SQLITE_OK ){
  1507. goto delmaster_out;
  1508. }
  1509. c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
  1510. if( c ){
  1511. /* We have a match. Do not delete the master journal file. */
  1512. goto delmaster_out;
  1513. }
  1514. }
  1515. zJournal += (strlen(zJournal)+1);
  1516. }
  1517. }
  1518. rc = sqlite3OsDelete(pVfs, zMaster, 0);
  1519. delmaster_out:
  1520. if( zMasterJournal ){
  1521. sqlite3_free(zMasterJournal);
  1522. }
  1523. if( master_open ){
  1524. sqlite3OsClose(pMaster);
  1525. }
  1526. sqlite3_free(pMaster);
  1527. return rc;
  1528. }
  1529. static void pager_truncate_cache(Pager *pPager);
  1530. /*
  1531. ** Truncate the main file of the given pager to the number of pages
  1532. ** indicated. Also truncate the cached representation of the file.
  1533. **
  1534. ** Might might be the case that the file on disk is smaller than nPage.
  1535. ** This can happen, for example, if we are in the middle of a transaction
  1536. ** which has extended the file size and the new pages are still all held
  1537. ** in cache, then an INSERT or UPDATE does a statement rollback. Some
  1538. ** operating system implementations can get confused if you try to
  1539. ** truncate a file to some size that is larger than it currently is,
  1540. ** so detect this case and do not do the truncation.
  1541. */
  1542. static int pager_truncate(Pager *pPager, int nPage){
  1543. int rc = SQLITE_OK;
  1544. if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){
  1545. i64 currentSize, newSize;
  1546. rc = sqlite3OsFileSize(pPager->fd, &currentSize);
  1547. newSize = pPager->pageSize*(i64)nPage;
  1548. if( rc==SQLITE_OK && currentSize>newSize ){
  1549. rc = sqlite3OsTruncate(pPager->fd, newSize);
  1550. }
  1551. }
  1552. if( rc==SQLITE_OK ){
  1553. pPager->dbSize = nPage;
  1554. pager_truncate_cache(pPager);
  1555. }
  1556. return rc;
  1557. }
  1558. /*
  1559. ** Set the sectorSize for the given pager.
  1560. **
  1561. ** The sector size is the larger of the sector size reported
  1562. ** by sqlite3OsSectorSize() and the pageSize.
  1563. */
  1564. static void setSectorSize(Pager *pPager){
  1565. assert(pPager->fd->pMethods||pPager->tempFile);
  1566. if( !pPager->tempFile ){
  1567. /* Sector size doesn't matter for temporary files. Also, the file
  1568. ** may not have been opened yet, in whcih case the OsSectorSize()
  1569. ** call will segfault.
  1570. */
  1571. pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
  1572. }
  1573. if( pPager->sectorSize<pPager->pageSize ){
  1574. pPager->sectorSize = pPager->pageSize;
  1575. }
  1576. }
  1577. /*
  1578. ** Playback the journal and thus restore the database file to
  1579. ** the state it was in before we started making changes.
  1580. **
  1581. ** The journal file format is as follows:
  1582. **
  1583. ** (1) 8 byte prefix. A copy of aJournalMagic[].
  1584. ** (2) 4 byte big-endian integer which is the number of valid page records
  1585. ** in the journal. If this value is 0xffffffff, then compute the
  1586. ** number of page records from the journal size.
  1587. ** (3) 4 byte big-endian integer which is the initial value for the
  1588. ** sanity checksum.
  1589. ** (4) 4 byte integer which is the number of pages to truncate the
  1590. ** database to during a rollback.
  1591. ** (5) 4 byte integer which is the number of bytes in the master journal
  1592. ** name. The value may be zero (indicate that there is no master
  1593. ** journal.)
  1594. ** (6) N bytes of the master journal name. The name will be nul-terminated
  1595. ** and might be shorter than the value read from (5). If the first byte
  1596. ** of the name is \000 then there is no master journal. The master
  1597. ** journal name is stored in UTF-8.
  1598. ** (7) Zero or more pages instances, each as follows:
  1599. ** + 4 byte page number.
  1600. ** + pPager->pageSize bytes of data.
  1601. ** + 4 byte checksum
  1602. **
  1603. ** When we speak of the journal header, we mean the first 6 items above.
  1604. ** Each entry in the journal is an instance of the 7th item.
  1605. **
  1606. ** Call the value from the second bullet "nRec". nRec is the number of
  1607. ** valid page entries in the journal. In most cases, you can compute the
  1608. ** value of nRec from the size of the journal file. But if a power
  1609. ** failure occurred while the journal was being written, it could be the
  1610. ** case that the size of the journal file had already been increased but
  1611. ** the extra entries had not yet made it safely to disk. In such a case,
  1612. ** the value of nRec computed from the file size would be too large. For
  1613. ** that reason, we always use the nRec value in the header.
  1614. **
  1615. ** If the nRec value is 0xffffffff it means that nRec should be computed
  1616. ** from the file size. This value is used when the user selects the
  1617. ** no-sync option for the journal. A power failure could lead to corruption
  1618. ** in this case. But for things like temporary table (which will be
  1619. ** deleted when the power is restored) we don't care.
  1620. **
  1621. ** If the file opened as the journal file is not a well-formed
  1622. ** journal file then all pages up to the first corrupted page are rolled
  1623. ** back (or no pages if the journal header is corrupted). The journal file
  1624. ** is then deleted and SQLITE_OK returned, just as if no corruption had
  1625. ** been encountered.
  1626. **
  1627. ** If an I/O or malloc() error occurs, the journal-file is not deleted
  1628. ** and an error code is returned.
  1629. */
  1630. static int pager_playback(Pager *pPager, int isHot){
  1631. sqlite3_vfs *pVfs = pPager->pVfs;
  1632. i64 szJ; /* Size of the journal file in bytes */
  1633. u32 nRec; /* Number of Records in the journal */
  1634. int i; /* Loop counter */
  1635. Pgno mxPg = 0; /* Size of the original file in pages */
  1636. int rc; /* Result code of a subroutine */
  1637. char *zMaster = 0; /* Name of master journal file if any */
  1638. /* Figure out how many records are in the journal. Abort early if
  1639. ** the journal is empty.
  1640. */
  1641. assert( pPager->journalOpen );
  1642. rc = sqlite3OsFileSize(pPager->jfd, &szJ);
  1643. if( rc!=SQLITE_OK || szJ==0 ){
  1644. goto end_playback;
  1645. }
  1646. /* Read the master journal name from the journal, if it is present.
  1647. ** If a master journal file name is specified, but the file is not
  1648. ** present on disk, then the journal is not hot and does not need to be
  1649. ** played back.
  1650. */
  1651. zMaster = pPager->pTmpSpace;
  1652. rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
  1653. assert( rc!=SQLITE_DONE );
  1654. if( rc!=SQLITE_OK
  1655. || (zMaster[0] && !sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS))
  1656. ){
  1657. zMaster = 0;
  1658. if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  1659. goto end_playback;
  1660. }
  1661. pPager->journalOff = 0;
  1662. zMaster = 0;
  1663. /* This loop terminates either when the readJournalHdr() call returns
  1664. ** SQLITE_DONE or an IO error occurs. */
  1665. while( 1 ){
  1666. /* Read the next journal header from the journal file. If there are
  1667. ** not enough bytes left in the journal file for a complete header, or
  1668. ** it is corrupted, then a process must of failed while writing it.
  1669. ** This indicates nothing more needs to be rolled back.
  1670. */
  1671. rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
  1672. if( rc!=SQLITE_OK ){
  1673. if( rc==SQLITE_DONE ){
  1674. rc = SQLITE_OK;
  1675. }
  1676. goto end_playback;
  1677. }
  1678. /* If nRec is 0xffffffff, then this journal was created by a process
  1679. ** working in no-sync mode. This means that the rest of the journal
  1680. ** file consists of pages, there are no more journal headers. Compute
  1681. ** the value of nRec based on this assumption.
  1682. */
  1683. if( nRec==0xffffffff ){
  1684. assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
  1685. nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
  1686. }
  1687. /* If nRec is 0 and this rollback is of a transaction created by this
  1688. ** process and if this is the final header in the journal, then it means
  1689. ** that this part of the journal was being filled but has not yet been
  1690. ** synced to disk. Compute the number of pages based on the remaining
  1691. ** size of the file.
  1692. **
  1693. ** The third term of the test was added to fix ticket #2565.
  1694. */
  1695. if( nRec==0 && !isHot &&
  1696. pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
  1697. nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
  1698. }
  1699. /* If this is the first header read from the journal, truncate the
  1700. ** database file back to its original size.
  1701. */
  1702. if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
  1703. rc = pager_truncate(pPager, mxPg);
  1704. if( rc!=SQLITE_OK ){
  1705. goto end_playback;
  1706. }
  1707. }
  1708. /* Copy original pages out of the journal and back into the database file.
  1709. */
  1710. for(i=0; i<nRec; i++){
  1711. rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1712. if( rc!=SQLITE_OK ){
  1713. if( rc==SQLITE_DONE ){
  1714. rc = SQLITE_OK;
  1715. pPager->journalOff = szJ;
  1716. break;
  1717. }else{
  1718. goto end_playback;
  1719. }
  1720. }
  1721. }
  1722. }
  1723. /*NOTREACHED*/
  1724. assert( 0 );
  1725. end_playback:
  1726. if( rc==SQLITE_OK ){
  1727. zMaster = pPager->pTmpSpace;
  1728. rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
  1729. }
  1730. if( rc==SQLITE_OK ){
  1731. rc = pager_end_transaction(pPager);
  1732. }
  1733. if( rc==SQLITE_OK && zMaster[0] ){
  1734. /* If there was a master journal and this routine will return success,
  1735. ** see if it is possible to delete the master journal.
  1736. */
  1737. rc = pager_delmaster(pPager, zMaster);
  1738. }
  1739. /* The Pager.sectorSize variable may have been updated while rolling
  1740. ** back a journal created by a process with a different sector size
  1741. ** value. Reset it to the correct value for this process.
  1742. */
  1743. setSectorSize(pPager);
  1744. return rc;
  1745. }
  1746. /*
  1747. ** Playback the statement journal.
  1748. **
  1749. ** This is similar to playing back the transaction journal but with
  1750. ** a few extra twists.
  1751. **
  1752. ** (1) The number of pages in the database file at the start of
  1753. ** the statement is stored in pPager->stmtSize, not in the
  1754. ** journal file itself.
  1755. **
  1756. ** (2) In addition to playing back the statement journal, also
  1757. ** playback all pages of the transaction journal beginning
  1758. ** at offset pPager->stmtJSize.
  1759. */
  1760. static int pager_stmt_playback(Pager *pPager){
  1761. i64 szJ; /* Size of the full journal */
  1762. i64 hdrOff;
  1763. int nRec; /* Number of Records */
  1764. int i; /* Loop counter */
  1765. int rc;
  1766. szJ = pPager->journalOff;
  1767. #ifndef NDEBUG
  1768. {
  1769. i64 os_szJ;
  1770. rc = sqlite3OsFileSize(pPager->jfd, &os_szJ);
  1771. if( rc!=SQLITE_OK ) return rc;
  1772. assert( szJ==os_szJ );
  1773. }
  1774. #endif
  1775. /* Set hdrOff to be the offset just after the end of the last journal
  1776. ** page written before the first journal-header for this statement
  1777. ** transaction was written, or the end of the file if no journal
  1778. ** header was written.
  1779. */
  1780. hdrOff = pPager->stmtHdrOff;
  1781. assert( pPager->fullSync || !hdrOff );
  1782. if( !hdrOff ){
  1783. hdrOff = szJ;
  1784. }
  1785. /* Truncate the database back to its original size.
  1786. */
  1787. rc = pager_truncate(pPager, pPager->stmtSize);
  1788. assert( pPager->state>=PAGER_SHARED );
  1789. /* Figure out how many records are in the statement journal.
  1790. */
  1791. assert( pPager->stmtInUse && pPager->journalOpen );
  1792. nRec = pPager->stmtNRec;
  1793. /* Copy original pages out of the statement journal and back into the
  1794. ** database file. Note that the statement journal omits checksums from
  1795. ** each record since power-failure recovery is not important to statement
  1796. ** journals.
  1797. */
  1798. for(i=0; i<nRec; i++){
  1799. i64 offset = i*(4+pPager->pageSize);
  1800. rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0);
  1801. assert( rc!=SQLITE_DONE );
  1802. if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1803. }
  1804. /* Now roll some pages back from the transaction journal. Pager.stmtJSize
  1805. ** was the size of the journal file when this statement was started, so
  1806. ** everything after that needs to be rolled back, either into the
  1807. ** database, the memory cache, or both.
  1808. **
  1809. ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
  1810. ** of the first journal header written during this statement transaction.
  1811. */
  1812. pPager->journalOff = pPager->stmtJSize;
  1813. pPager->cksumInit = pPager->stmtCksum;
  1814. while( pPager->journalOff < hdrOff ){
  1815. rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1816. assert( rc!=SQLITE_DONE );
  1817. if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1818. }
  1819. while( pPager->journalOff < szJ ){
  1820. u32 nJRec; /* Number of Journal Records */
  1821. u32 dummy;
  1822. rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
  1823. if( rc!=SQLITE_OK ){
  1824. assert( rc!=SQLITE_DONE );
  1825. goto end_stmt_playback;
  1826. }
  1827. if( nJRec==0 ){
  1828. nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
  1829. }
  1830. for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
  1831. rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
  1832. assert( rc!=SQLITE_DONE );
  1833. if( rc!=SQLITE_OK ) goto end_stmt_playback;
  1834. }
  1835. }
  1836. pPager->journalOff = szJ;
  1837. end_stmt_playback:
  1838. if( rc==SQLITE_OK) {
  1839. pPager->journalOff = szJ;
  1840. /* pager_reload_cache(pPager); */
  1841. }
  1842. return rc;
  1843. }
  1844. /*
  1845. ** Change the maximum number of in-memory pages that are allowed.
  1846. */
  1847. void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
  1848. if( mxPage>10 ){
  1849. pPager->mxPage = mxPage;
  1850. }else{
  1851. pPager->mxPage = 10;
  1852. }
  1853. }
  1854. /*
  1855. ** Adjust the robustness of the database to damage due to OS crashes
  1856. ** or power failures by changing the number of syncs()s when writing
  1857. ** the rollback journal. There are three levels:
  1858. **
  1859. ** OFF sqlite3OsSync() is never called. This is the default
  1860. ** for temporary and transient files.
  1861. **
  1862. ** NORMAL The journal is synced once before writes begin on the
  1863. ** database. This is normally adequate protection, but
  1864. ** it is theoretically possible, though very unlikely,
  1865. ** that an inopertune power failure could leave the journal
  1866. ** in a state which would cause damage to the database
  1867. ** when it is rolled back.
  1868. **
  1869. ** FULL The journal is synced twice before writes begin on the
  1870. ** database (with some additional information - the nRec field
  1871. ** of the journal header - being written in between the two
  1872. ** syncs). If we assume that writing a
  1873. ** single disk sector is atomic, then this mode provides
  1874. ** assurance that the journal will not be corrupted to the
  1875. ** point of causing damage to the database during rollback.
  1876. **
  1877. ** Numeric values associated with these states are OFF==1, NORMAL=2,
  1878. ** and FULL=3.
  1879. */
  1880. #ifndef SQLITE_OMIT_PAGER_PRAGMAS
  1881. void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int full_fsync){
  1882. pPager->noSync = level==1 || pPager->tempFile;
  1883. pPager->fullSync = level==3 && !pPager->tempFile;
  1884. pPager->sync_flags = (full_fsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
  1885. if( pPager->noSync ) pPager->needSync = 0;
  1886. }
  1887. #endif
  1888. /*
  1889. ** The following global variable is incremented whenever the library
  1890. ** attempts to open a temporary file. This information is used for
  1891. ** testing and analysis only.
  1892. */
  1893. #ifdef SQLITE_TEST
  1894. int sqlite3_opentemp_count = 0;
  1895. #endif
  1896. /*
  1897. ** Open a temporary file.
  1898. **
  1899. ** Write the file descriptor into *fd. Return SQLITE_OK on success or some
  1900. ** other error code if we fail. The OS will automatically delete the temporary
  1901. ** file when it is closed.
  1902. */
  1903. static int sqlite3PagerOpentemp(
  1904. sqlite3_vfs *pVfs, /* The virtual file system layer */
  1905. sqlite3_file *pFile, /* Write the file descriptor here */
  1906. char *zFilename, /* Name of the file. Might be NULL */
  1907. int vfsFlags /* Flags passed through to the VFS */
  1908. ){
  1909. int rc;
  1910. assert( zFilename!=0 );
  1911. #ifdef SQLITE_TEST
  1912. sqlite3_opentemp_count++; /* Used for testing and analysis only */
  1913. #endif
  1914. vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
  1915. SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
  1916. rc = sqlite3OsOpen(pVfs, zFilename, pFile, vfsFlags, 0);
  1917. assert( rc!=SQLITE_OK || pFile->pMethods );
  1918. return rc;
  1919. }
  1920. /*
  1921. ** Create a new page cache and put a pointer to the page cache in *ppPager.
  1922. ** The file to be cached need not exist. The file is not locked until
  1923. ** the first call to sqlite3PagerGet() and is only held open until the
  1924. ** last page is released using sqlite3PagerUnref().
  1925. **
  1926. ** If zFilename is NULL then a randomly-named temporary file is created
  1927. ** and used as the file to be cached. The file will be deleted
  1928. ** automatically when it is closed.
  1929. **
  1930. ** If zFilename is ":memory:" then all information is held in cache.
  1931. ** It is never written to disk. This can be used to implement an
  1932. ** in-memory database.
  1933. */
  1934. int sqlite3PagerOpen(
  1935. sqlite3_vfs *pVfs, /* The virtual file system to use */
  1936. Pager **ppPager, /* Return the Pager structure here */
  1937. const char *zFilename, /* Name of the database file to open */
  1938. int nExtra, /* Extra bytes append to each in-memory page */
  1939. int flags, /* flags controlling this file */
  1940. int vfsFlags /* flags passed through to sqlite3_vfs.xOpen() */
  1941. ){
  1942. u8 *pPtr;
  1943. Pager *pPager = 0;
  1944. int rc = SQLITE_OK;
  1945. int i;
  1946. int tempFile = 0;
  1947. int memDb = 0;
  1948. int readOnly = 0;
  1949. int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
  1950. int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
  1951. int journalFileSize = sqlite3JournalSize(pVfs);
  1952. int nDefaultPage = SQLITE_DEFAULT_PAGE_SIZE;
  1953. char *zPathname;
  1954. int nPathname;
  1955. /* The default return is a NULL pointer */
  1956. *ppPager = 0;
  1957. /* Compute the full pathname */
  1958. nPathname = pVfs->mxPathname+1;
  1959. zPathname = sqlite3_malloc(nPathname);
  1960. if( zPathname==0 ){
  1961. return SQLITE_NOMEM;
  1962. }
  1963. if( zFilename && zFilename[0] ){
  1964. #ifndef SQLITE_OMIT_MEMORYDB
  1965. if( strcmp(zFilename,":memory:")==0 ){
  1966. memDb = 1;
  1967. zPathname[0] = 0;
  1968. }else
  1969. #endif
  1970. {
  1971. rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
  1972. }
  1973. }else{
  1974. rc = sqlite3OsGetTempname(pVfs, nPathname, zPathname);
  1975. }
  1976. if( rc!=SQLITE_OK ){
  1977. sqlite3_free(zPathname);
  1978. return rc;
  1979. }
  1980. nPathname = strlen(zPathname);
  1981. /* Allocate memory for the pager structure */
  1982. pPager = sqlite3MallocZero(
  1983. sizeof(*pPager) + /* Pager structure */
  1984. journalFileSize + /* The journal file structure */
  1985. pVfs->szOsFile * 2 + /* The db and stmt journal files */
  1986. 4*nPathname + 40 /* zFilename, zDirectory, zJournal, zStmtJrnl */
  1987. );
  1988. if( !pPager ){
  1989. sqlite3_free(zPathname);
  1990. return SQLITE_NOMEM;
  1991. }
  1992. pPtr = (u8 *)&pPager[1];
  1993. pPager->vfsFlags = vfsFlags;
  1994. pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
  1995. pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
  1996. pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
  1997. pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize];
  1998. pPager->zDirectory = &pPager->zFilename[nPathname+1];
  1999. pPager->zJournal = &pPager->zDirectory[nPathname+1];
  2000. pPager->zStmtJrnl = &pPager->zJournal[nPathname+10];
  2001. pPager->pVfs = pVfs;
  2002. memcpy(pPager->zFilename, zPathname, nPathname+1);
  2003. sqlite3_free(zPathname);
  2004. /* Open the pager file.
  2005. */
  2006. if( zFilename && zFilename[0] && !memDb ){
  2007. if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){
  2008. rc = SQLITE_CANTOPEN;
  2009. }else{
  2010. int fout = 0;
  2011. rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd,
  2012. pPager->vfsFlags, &fout);
  2013. readOnly = (fout&SQLITE_OPEN_READONLY);
  2014. /* If the file was successfully opened for read/write access,
  2015. ** choose a default page size in case we have to create the
  2016. ** database file. The default page size is the maximum of:
  2017. **
  2018. ** + SQLITE_DEFAULT_PAGE_SIZE,
  2019. ** + The value returned by sqlite3OsSectorSize()
  2020. ** + The largest page size that can be written atomically.
  2021. */
  2022. if( rc==SQLITE_OK && !readOnly ){
  2023. int iSectorSize = sqlite3OsSectorSize(pPager->fd);
  2024. if( nDefaultPage<iSectorSize ){
  2025. nDefaultPage = iSectorSize;
  2026. }
  2027. #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  2028. {
  2029. int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  2030. int ii;
  2031. assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
  2032. assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
  2033. assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
  2034. for(ii=nDefaultPage; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
  2035. if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) nDefaultPage = ii;
  2036. }
  2037. }
  2038. #endif
  2039. if( nDefaultPage>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
  2040. nDefaultPage = SQLITE_MAX_DEFAULT_PAGE_SIZE;
  2041. }
  2042. }
  2043. }
  2044. }else if( !memDb ){
  2045. /* If a temporary file is requested, it is not opened immediately.
  2046. ** In this case we accept the default page size and delay actually
  2047. ** opening the file until the first call to OsWrite().
  2048. */
  2049. tempFile = 1;
  2050. pPager->state = PAGER_EXCLUSIVE;
  2051. }
  2052. if( pPager && rc==SQLITE_OK ){
  2053. pPager->pTmpSpace = (char *)sqlite3_malloc(nDefaultPage);
  2054. }
  2055. /* If an error occured in either of the blocks above.
  2056. ** Free the Pager structure and close the file.
  2057. ** Since the pager is not allocated there is no need to set
  2058. ** any Pager.errMask variables.
  2059. */
  2060. if( !pPager || !pPager->pTmpSpace ){
  2061. sqlite3OsClose(pPager->fd);
  2062. sqlite3_free(pPager);
  2063. return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
  2064. }
  2065. PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
  2066. IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
  2067. /* Fill in Pager.zDirectory[] */
  2068. memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
  2069. for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){}
  2070. if( i>0 ) pPager->zDirectory[i-1] = 0;
  2071. /* Fill in Pager.zJournal[] and Pager.zStmtJrnl[] */
  2072. memcpy(pPager->zJournal, pPager->zFilename, nPathname);
  2073. memcpy(&pPager->zJournal[nPathname], "-journal", 9);
  2074. memcpy(pPager->zStmtJrnl, pPager->zFilename, nPathname);
  2075. memcpy(&pPager->zStmtJrnl[nPathname], "-stmtjrnl", 10);
  2076. /* pPager->journalOpen = 0; */
  2077. pPager->useJournal = useJournal && !memDb;
  2078. pPager->noReadlock = noReadlock && readOnly;
  2079. /* pPager->stmtOpen = 0; */
  2080. /* pPager->stmtInUse = 0; */
  2081. /* pPager->nRef = 0; */
  2082. pPager->dbSize = memDb-1;
  2083. pPager->pageSize = nDefaultPage;
  2084. /* pPager->stmtSize = 0; */
  2085. /* pPager->stmtJSize = 0; */
  2086. /* pPager->nPage = 0; */
  2087. pPager->mxPage = 100;
  2088. pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
  2089. /* pPager->state = PAGER_UNLOCK; */
  2090. assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
  2091. /* pPager->errMask = 0; */
  2092. pPager->tempFile = tempFile;
  2093. assert( tempFile==PAGER_LOCKINGMODE_NORMAL
  2094. || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
  2095. assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
  2096. pPager->exclusiveMode = tempFile;
  2097. pPager->memDb = memDb;
  2098. pPager->readOnly = readOnly;
  2099. /* pPager->needSync = 0; */
  2100. pPager->noSync = pPager->tempFile || !useJournal;
  2101. pPager->fullSync = (pPager->noSync?0:1);
  2102. pPager->sync_flags = SQLITE_SYNC_NORMAL;
  2103. /* pPager->pFirst = 0; */
  2104. /* pPager->pFirstSynced = 0; */
  2105. /* pPager->pLast = 0; */
  2106. pPager->nExtra = FORCE_ALIGNMENT(nExtra);
  2107. assert(pPager->fd->pMethods||memDb||tempFile);
  2108. if( !memDb ){
  2109. setSectorSize(pPager);
  2110. }
  2111. /* pPager->pBusyHandler = 0; */
  2112. /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
  2113. *ppPager = pPager;
  2114. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  2115. pPager->iInUseMM = 0;
  2116. pPager->iInUseDB = 0;
  2117. if( !memDb ){
  2118. sqlite3_mutex *mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
  2119. sqlite3_mutex_enter(mutex);
  2120. pPager->pNext = sqlite3PagerList;
  2121. if( sqlite3PagerList ){
  2122. assert( sqlite3PagerList->pPrev==0 );
  2123. sqlite3PagerList->pPrev = pPager;
  2124. }
  2125. pPager->pPrev = 0;
  2126. sqlite3PagerList = pPager;
  2127. sqlite3_mutex_leave(mutex);
  2128. }
  2129. #endif
  2130. return SQLITE_OK;
  2131. }
  2132. /*
  2133. ** Set the busy handler function.
  2134. */
  2135. void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
  2136. pPager->pBusyHandler = pBusyHandler;
  2137. }
  2138. /*
  2139. ** Set the destructor for this pager. If not NULL, the destructor is called
  2140. ** when the reference count on each page reaches zero. The destructor can
  2141. ** be used to clean up information in the extra segment appended to each page.
  2142. **
  2143. ** The destructor is not called as a result sqlite3PagerClose().
  2144. ** Destructors are only called by sqlite3PagerUnref().
  2145. */
  2146. void sqlite3PagerSetDestructor(Pager *pPager, void (*xDesc)(DbPage*,int)){
  2147. pPager->xDestructor = xDesc;
  2148. }
  2149. /*
  2150. ** Set the reinitializer for this pager. If not NULL, the reinitializer
  2151. ** is called when the content of a page in cache is restored to its original
  2152. ** value as a result of a rollback. The callback gives higher-level code
  2153. ** an opportunity to restore the EXTRA section to agree with the restored
  2154. ** page data.
  2155. */
  2156. void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*,int)){
  2157. pPager->xReiniter = xReinit;
  2158. }
  2159. /*
  2160. ** Set the page size to *pPageSize. If the suggest new page size is
  2161. ** inappropriate, then an alternative page size is set to that
  2162. ** value before returning.
  2163. */
  2164. int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){
  2165. int rc = SQLITE_OK;
  2166. u16 pageSize = *pPageSize;
  2167. assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
  2168. if( pageSize && pageSize!=pPager->pageSize
  2169. && !pPager->memDb && pPager->nRef==0
  2170. ){
  2171. char *pNew = (char *)sqlite3_malloc(pageSize);
  2172. if( !pNew ){
  2173. rc = SQLITE_NOMEM;
  2174. }else{
  2175. pagerEnter(pPager);
  2176. pager_reset(pPager);
  2177. pPager->pageSize = pageSize;
  2178. setSectorSize(pPager);
  2179. sqlite3_free(pPager->pTmpSpace);
  2180. pPager->pTmpSpace = pNew;
  2181. pagerLeave(pPager);
  2182. }
  2183. }
  2184. *pPageSize = pPager->pageSize;
  2185. return rc;
  2186. }
  2187. /*
  2188. ** Return a pointer to the "temporary page" buffer held internally
  2189. ** by the pager. This is a buffer that is big enough to hold the
  2190. ** entire content of a database page. This buffer is used internally
  2191. ** during rollback and will be overwritten whenever a rollback
  2192. ** occurs. But other modules are free to use it too, as long as
  2193. ** no rollbacks are happening.
  2194. */
  2195. void *sqlite3PagerTempSpace(Pager *pPager){
  2196. return pPager->pTmpSpace;
  2197. }
  2198. /*
  2199. ** Attempt to set the maximum database page count if mxPage is positive.
  2200. ** Make no changes if mxPage is zero or negative. And never reduce the
  2201. ** maximum page count below the current size of the database.
  2202. **
  2203. ** Regardless of mxPage, return the current maximum page count.
  2204. */
  2205. int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
  2206. if( mxPage>0 ){
  2207. pPager->mxPgno = mxPage;
  2208. }
  2209. sqlite3PagerPagecount(pPager);
  2210. return pPager->mxPgno;
  2211. }
  2212. /*
  2213. ** The following set of routines are used to disable the simulated
  2214. ** I/O error mechanism. These routines are used to avoid simulated
  2215. ** errors in places where we do not care about errors.
  2216. **
  2217. ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
  2218. ** and generate no code.
  2219. */
  2220. #ifdef SQLITE_TEST
  2221. extern int sqlite3_io_error_pending;
  2222. extern int sqlite3_io_error_hit;
  2223. static int saved_cnt;
  2224. void disable_simulated_io_errors(void){
  2225. saved_cnt = sqlite3_io_error_pending;
  2226. sqlite3_io_error_pending = -1;
  2227. }
  2228. void enable_simulated_io_errors(void){
  2229. sqlite3_io_error_pending = saved_cnt;
  2230. }
  2231. #else
  2232. # define disable_simulated_io_errors()
  2233. # define enable_simulated_io_errors()
  2234. #endif
  2235. /*
  2236. ** Read the first N bytes from the beginning of the file into memory
  2237. ** that pDest points to.
  2238. **
  2239. ** No error checking is done. The rational for this is that this function
  2240. ** may be called even if the file does not exist or contain a header. In
  2241. ** these cases sqlite3OsRead() will return an error, to which the correct
  2242. ** response is to zero the memory at pDest and continue. A real IO error
  2243. ** will presumably recur and be picked up later (Todo: Think about this).
  2244. */
  2245. int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
  2246. int rc = SQLITE_OK;
  2247. memset(pDest, 0, N);
  2248. assert(MEMDB||pPager->fd->pMethods||pPager->tempFile);
  2249. if( pPager->fd->pMethods ){
  2250. IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
  2251. rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
  2252. if( rc==SQLITE_IOERR_SHORT_READ ){
  2253. rc = SQLITE_OK;
  2254. }
  2255. }
  2256. return rc;
  2257. }
  2258. /*
  2259. ** Return the total number of pages in the disk file associated with
  2260. ** pPager.
  2261. **
  2262. ** If the PENDING_BYTE lies on the page directly after the end of the
  2263. ** file, then consider this page part of the file too. For example, if
  2264. ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
  2265. ** file is 4096 bytes, 5 is returned instead of 4.
  2266. */
  2267. int sqlite3PagerPagecount(Pager *pPager){
  2268. i64 n = 0;
  2269. int rc;
  2270. assert( pPager!=0 );
  2271. if( pPager->errCode ){
  2272. return 0;
  2273. }
  2274. if( pPager->dbSize>=0 ){
  2275. n = pPager->dbSize;
  2276. } else {
  2277. assert(pPager->fd->pMethods||pPager->tempFile);
  2278. if( (pPager->fd->pMethods)
  2279. && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
  2280. pPager->nRef++;
  2281. pager_error(pPager, rc);
  2282. pPager->nRef--;
  2283. return 0;
  2284. }
  2285. if( n>0 && n<pPager->pageSize ){
  2286. n = 1;
  2287. }else{
  2288. n /= pPager->pageSize;
  2289. }
  2290. if( pPager->state!=PAGER_UNLOCK ){
  2291. pPager->dbSize = n;
  2292. }
  2293. }
  2294. if( n==(PENDING_BYTE/pPager->pageSize) ){
  2295. n++;
  2296. }
  2297. if( n>pPager->mxPgno ){
  2298. pPager->mxPgno = n;
  2299. }
  2300. return n;
  2301. }
  2302. #ifndef SQLITE_OMIT_MEMORYDB
  2303. /*
  2304. ** Clear a PgHistory block
  2305. */
  2306. static void clearHistory(PgHistory *pHist){
  2307. sqlite3_free(pHist->pOrig);
  2308. sqlite3_free(pHist->pStmt);
  2309. pHist->pOrig = 0;
  2310. pHist->pStmt = 0;
  2311. }
  2312. #else
  2313. #define clearHistory(x)
  2314. #endif
  2315. /*
  2316. ** Forward declaration
  2317. */
  2318. static int syncJournal(Pager*);
  2319. /*
  2320. ** Unlink pPg from its hash chain. Also set the page number to 0 to indicate
  2321. ** that the page is not part of any hash chain. This is required because the
  2322. ** sqlite3PagerMovepage() routine can leave a page in the
  2323. ** pNextFree/pPrevFree list that is not a part of any hash-chain.
  2324. */
  2325. static void unlinkHashChain(Pager *pPager, PgHdr *pPg){
  2326. if( pPg->pgno==0 ){
  2327. assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
  2328. return;
  2329. }
  2330. if( pPg->pNextHash ){
  2331. pPg->pNextHash->pPrevHash = pPg->pPrevHash;
  2332. }
  2333. if( pPg->pPrevHash ){
  2334. assert( pPager->aHash[pPg->pgno & (pPager->nHash-1)]!=pPg );
  2335. pPg->pPrevHash->pNextHash = pPg->pNextHash;
  2336. }else{
  2337. int h = pPg->pgno & (pPager->nHash-1);
  2338. pPager->aHash[h] = pPg->pNextHash;
  2339. }
  2340. if( MEMDB ){
  2341. clearHistory(PGHDR_TO_HIST(pPg, pPager));
  2342. }
  2343. pPg->pgno = 0;
  2344. pPg->pNextHash = pPg->pPrevHash = 0;
  2345. }
  2346. /*
  2347. ** Unlink a page from the free list (the list of all pages where nRef==0)
  2348. ** and from its hash collision chain.
  2349. */
  2350. static void unlinkPage(PgHdr *pPg){
  2351. Pager *pPager = pPg->pPager;
  2352. /* Unlink from free page list */
  2353. lruListRemove(pPg);
  2354. /* Unlink from the pgno hash table */
  2355. unlinkHashChain(pPager, pPg);
  2356. }
  2357. /*
  2358. ** This routine is used to truncate the cache when a database
  2359. ** is truncated. Drop from the cache all pages whose pgno is
  2360. ** larger than pPager->dbSize and is unreferenced.
  2361. **
  2362. ** Referenced pages larger than pPager->dbSize are zeroed.
  2363. **
  2364. ** Actually, at the point this routine is called, it would be
  2365. ** an error to have a referenced page. But rather than delete
  2366. ** that page and guarantee a subsequent segfault, it seems better
  2367. ** to zero it and hope that we error out sanely.
  2368. */
  2369. static void pager_truncate_cache(Pager *pPager){
  2370. PgHdr *pPg;
  2371. PgHdr **ppPg;
  2372. int dbSize = pPager->dbSize;
  2373. ppPg = &pPager->pAll;
  2374. while( (pPg = *ppPg)!=0 ){
  2375. if( pPg->pgno<=dbSize ){
  2376. ppPg = &pPg->pNextAll;
  2377. }else if( pPg->nRef>0 ){
  2378. memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
  2379. ppPg = &pPg->pNextAll;
  2380. }else{
  2381. *ppPg = pPg->pNextAll;
  2382. IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
  2383. PAGER_INCR(sqlite3_pager_pgfree_count);
  2384. unlinkPage(pPg);
  2385. makeClean(pPg);
  2386. sqlite3_free(pPg);
  2387. pPager->nPage--;
  2388. }
  2389. }
  2390. }
  2391. /*
  2392. ** Try to obtain a lock on a file. Invoke the busy callback if the lock
  2393. ** is currently not available. Repeat until the busy callback returns
  2394. ** false or until the lock succeeds.
  2395. **
  2396. ** Return SQLITE_OK on success and an error code if we cannot obtain
  2397. ** the lock.
  2398. */
  2399. static int pager_wait_on_lock(Pager *pPager, int locktype){
  2400. int rc;
  2401. /* The OS lock values must be the same as the Pager lock values */
  2402. assert( PAGER_SHARED==SHARED_LOCK );
  2403. assert( PAGER_RESERVED==RESERVED_LOCK );
  2404. assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
  2405. /* If the file is currently unlocked then the size must be unknown */
  2406. assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB );
  2407. if( pPager->state>=locktype ){
  2408. rc = SQLITE_OK;
  2409. }else{
  2410. do {
  2411. rc = sqlite3OsLock(pPager->fd, locktype);
  2412. }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
  2413. if( rc==SQLITE_OK ){
  2414. pPager->state = locktype;
  2415. IOTRACE(("LOCK %p %d\n", pPager, locktype))
  2416. }
  2417. }
  2418. return rc;
  2419. }
  2420. /*
  2421. ** Truncate the file to the number of pages specified.
  2422. */
  2423. int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
  2424. int rc;
  2425. assert( pPager->state>=PAGER_SHARED || MEMDB );
  2426. sqlite3PagerPagecount(pPager);
  2427. if( pPager->errCode ){
  2428. rc = pPager->errCode;
  2429. return rc;
  2430. }
  2431. if( nPage>=(unsigned)pPager->dbSize ){
  2432. return SQLITE_OK;
  2433. }
  2434. if( MEMDB ){
  2435. pPager->dbSize = nPage;
  2436. pager_truncate_cache(pPager);
  2437. return SQLITE_OK;
  2438. }
  2439. pagerEnter(pPager);
  2440. rc = syncJournal(pPager);
  2441. pagerLeave(pPager);
  2442. if( rc!=SQLITE_OK ){
  2443. return rc;
  2444. }
  2445. /* Get an exclusive lock on the database before truncating. */
  2446. pagerEnter(pPager);
  2447. rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  2448. pagerLeave(pPager);
  2449. if( rc!=SQLITE_OK ){
  2450. return rc;
  2451. }
  2452. rc = pager_truncate(pPager, nPage);
  2453. return rc;
  2454. }
  2455. /*
  2456. ** Shutdown the page cache. Free all memory and close all files.
  2457. **
  2458. ** If a transaction was in progress when this routine is called, that
  2459. ** transaction is rolled back. All outstanding pages are invalidated
  2460. ** and their memory is freed. Any attempt to use a page associated
  2461. ** with this page cache after this function returns will likely
  2462. ** result in a coredump.
  2463. **
  2464. ** This function always succeeds. If a transaction is active an attempt
  2465. ** is made to roll it back. If an error occurs during the rollback
  2466. ** a hot journal may be left in the filesystem but no error is returned
  2467. ** to the caller.
  2468. */
  2469. int sqlite3PagerClose(Pager *pPager){
  2470. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  2471. if( !MEMDB ){
  2472. sqlite3_mutex *mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
  2473. sqlite3_mutex_enter(mutex);
  2474. if( pPager->pPrev ){
  2475. pPager->pPrev->pNext = pPager->pNext;
  2476. }else{
  2477. sqlite3PagerList = pPager->pNext;
  2478. }
  2479. if( pPager->pNext ){
  2480. pPager->pNext->pPrev = pPager->pPrev;
  2481. }
  2482. sqlite3_mutex_leave(mutex);
  2483. }
  2484. #endif
  2485. disable_simulated_io_errors();
  2486. pPager->errCode = 0;
  2487. pPager->exclusiveMode = 0;
  2488. pager_reset(pPager);
  2489. pagerUnlockAndRollback(pPager);
  2490. enable_simulated_io_errors();
  2491. PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
  2492. IOTRACE(("CLOSE %p\n", pPager))
  2493. assert( pPager->errCode || (pPager->journalOpen==0 && pPager->stmtOpen==0) );
  2494. if( pPager->journalOpen ){
  2495. sqlite3OsClose(pPager->jfd);
  2496. }
  2497. sqlite3_free(pPager->aInJournal);
  2498. if( pPager->stmtOpen ){
  2499. sqlite3OsClose(pPager->stfd);
  2500. }
  2501. sqlite3OsClose(pPager->fd);
  2502. /* Temp files are automatically deleted by the OS
  2503. ** if( pPager->tempFile ){
  2504. ** sqlite3OsDelete(pPager->zFilename);
  2505. ** }
  2506. */
  2507. sqlite3_free(pPager->aHash);
  2508. sqlite3_free(pPager->pTmpSpace);
  2509. sqlite3_free(pPager);
  2510. return SQLITE_OK;
  2511. }
  2512. #if !defined(NDEBUG) || defined(SQLITE_TEST)
  2513. /*
  2514. ** Return the page number for the given page data.
  2515. */
  2516. Pgno sqlite3PagerPagenumber(DbPage *p){
  2517. return p->pgno;
  2518. }
  2519. #endif
  2520. /*
  2521. ** The page_ref() function increments the reference count for a page.
  2522. ** If the page is currently on the freelist (the reference count is zero) then
  2523. ** remove it from the freelist.
  2524. **
  2525. ** For non-test systems, page_ref() is a macro that calls _page_ref()
  2526. ** online of the reference count is zero. For test systems, page_ref()
  2527. ** is a real function so that we can set breakpoints and trace it.
  2528. */
  2529. static void _page_ref(PgHdr *pPg){
  2530. if( pPg->nRef==0 ){
  2531. /* The page is currently on the freelist. Remove it. */
  2532. lruListRemove(pPg);
  2533. pPg->pPager->nRef++;
  2534. }
  2535. pPg->nRef++;
  2536. REFINFO(pPg);
  2537. }
  2538. #ifdef SQLITE_DEBUG
  2539. static void page_ref(PgHdr *pPg){
  2540. if( pPg->nRef==0 ){
  2541. _page_ref(pPg);
  2542. }else{
  2543. pPg->nRef++;
  2544. REFINFO(pPg);
  2545. }
  2546. }
  2547. #else
  2548. # define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
  2549. #endif
  2550. /*
  2551. ** Increment the reference count for a page. The input pointer is
  2552. ** a reference to the page data.
  2553. */
  2554. int sqlite3PagerRef(DbPage *pPg){
  2555. pagerEnter(pPg->pPager);
  2556. page_ref(pPg);
  2557. pagerLeave(pPg->pPager);
  2558. return SQLITE_OK;
  2559. }
  2560. /*
  2561. ** Sync the journal. In other words, make sure all the pages that have
  2562. ** been written to the journal have actually reached the surface of the
  2563. ** disk. It is not safe to modify the original database file until after
  2564. ** the journal has been synced. If the original database is modified before
  2565. ** the journal is synced and a power failure occurs, the unsynced journal
  2566. ** data would be lost and we would be unable to completely rollback the
  2567. ** database changes. Database corruption would occur.
  2568. **
  2569. ** This routine also updates the nRec field in the header of the journal.
  2570. ** (See comments on the pager_playback() routine for additional information.)
  2571. ** If the sync mode is FULL, two syncs will occur. First the whole journal
  2572. ** is synced, then the nRec field is updated, then a second sync occurs.
  2573. **
  2574. ** For temporary databases, we do not care if we are able to rollback
  2575. ** after a power failure, so no sync occurs.
  2576. **
  2577. ** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
  2578. ** the database is stored, then OsSync() is never called on the journal
  2579. ** file. In this case all that is required is to update the nRec field in
  2580. ** the journal header.
  2581. **
  2582. ** This routine clears the needSync field of every page current held in
  2583. ** memory.
  2584. */
  2585. static int syncJournal(Pager *pPager){
  2586. PgHdr *pPg;
  2587. int rc = SQLITE_OK;
  2588. /* Sync the journal before modifying the main database
  2589. ** (assuming there is a journal and it needs to be synced.)
  2590. */
  2591. if( pPager->needSync ){
  2592. if( !pPager->tempFile ){
  2593. int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  2594. assert( pPager->journalOpen );
  2595. /* assert( !pPager->noSync ); // noSync might be set if synchronous
  2596. ** was turned off after the transaction was started. Ticket #615 */
  2597. #ifndef NDEBUG
  2598. {
  2599. /* Make sure the pPager->nRec counter we are keeping agrees
  2600. ** with the nRec computed from the size of the journal file.
  2601. */
  2602. i64 jSz;
  2603. rc = sqlite3OsFileSize(pPager->jfd, &jSz);
  2604. if( rc!=0 ) return rc;
  2605. assert( pPager->journalOff==jSz );
  2606. }
  2607. #endif
  2608. if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
  2609. /* Write the nRec value into the journal file header. If in
  2610. ** full-synchronous mode, sync the journal first. This ensures that
  2611. ** all data has really hit the disk before nRec is updated to mark
  2612. ** it as a candidate for rollback.
  2613. **
  2614. ** This is not required if the persistent media supports the
  2615. ** SAFE_APPEND property. Because in this case it is not possible
  2616. ** for garbage data to be appended to the file, the nRec field
  2617. ** is populated with 0xFFFFFFFF when the journal header is written
  2618. ** and never needs to be updated.
  2619. */
  2620. i64 jrnlOff;
  2621. if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
  2622. PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
  2623. IOTRACE(("JSYNC %p\n", pPager))
  2624. rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
  2625. if( rc!=0 ) return rc;
  2626. }
  2627. jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
  2628. IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
  2629. rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
  2630. if( rc ) return rc;
  2631. }
  2632. if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
  2633. PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
  2634. IOTRACE(("JSYNC %p\n", pPager))
  2635. rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags|
  2636. (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
  2637. );
  2638. if( rc!=0 ) return rc;
  2639. }
  2640. pPager->journalStarted = 1;
  2641. }
  2642. pPager->needSync = 0;
  2643. /* Erase the needSync flag from every page.
  2644. */
  2645. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  2646. pPg->needSync = 0;
  2647. }
  2648. lruListSetFirstSynced(pPager);
  2649. }
  2650. #ifndef NDEBUG
  2651. /* If the Pager.needSync flag is clear then the PgHdr.needSync
  2652. ** flag must also be clear for all pages. Verify that this
  2653. ** invariant is true.
  2654. */
  2655. else{
  2656. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  2657. assert( pPg->needSync==0 );
  2658. }
  2659. assert( pPager->lru.pFirstSynced==pPager->lru.pFirst );
  2660. }
  2661. #endif
  2662. return rc;
  2663. }
  2664. /*
  2665. ** Merge two lists of pages connected by pDirty and in pgno order.
  2666. ** Do not both fixing the pPrevDirty pointers.
  2667. */
  2668. static PgHdr *merge_pagelist(PgHdr *pA, PgHdr *pB){
  2669. PgHdr result, *pTail;
  2670. pTail = &result;
  2671. while( pA && pB ){
  2672. if( pA->pgno<pB->pgno ){
  2673. pTail->pDirty = pA;
  2674. pTail = pA;
  2675. pA = pA->pDirty;
  2676. }else{
  2677. pTail->pDirty = pB;
  2678. pTail = pB;
  2679. pB = pB->pDirty;
  2680. }
  2681. }
  2682. if( pA ){
  2683. pTail->pDirty = pA;
  2684. }else if( pB ){
  2685. pTail->pDirty = pB;
  2686. }else{
  2687. pTail->pDirty = 0;
  2688. }
  2689. return result.pDirty;
  2690. }
  2691. /*
  2692. ** Sort the list of pages in accending order by pgno. Pages are
  2693. ** connected by pDirty pointers. The pPrevDirty pointers are
  2694. ** corrupted by this sort.
  2695. */
  2696. #define N_SORT_BUCKET_ALLOC 25
  2697. #define N_SORT_BUCKET 25
  2698. #ifdef SQLITE_TEST
  2699. int sqlite3_pager_n_sort_bucket = 0;
  2700. #undef N_SORT_BUCKET
  2701. #define N_SORT_BUCKET \
  2702. (sqlite3_pager_n_sort_bucket?sqlite3_pager_n_sort_bucket:N_SORT_BUCKET_ALLOC)
  2703. #endif
  2704. static PgHdr *sort_pagelist(PgHdr *pIn){
  2705. PgHdr *a[N_SORT_BUCKET_ALLOC], *p;
  2706. int i;
  2707. memset(a, 0, sizeof(a));
  2708. while( pIn ){
  2709. p = pIn;
  2710. pIn = p->pDirty;
  2711. p->pDirty = 0;
  2712. for(i=0; i<N_SORT_BUCKET-1; i++){
  2713. if( a[i]==0 ){
  2714. a[i] = p;
  2715. break;
  2716. }else{
  2717. p = merge_pagelist(a[i], p);
  2718. a[i] = 0;
  2719. }
  2720. }
  2721. if( i==N_SORT_BUCKET-1 ){
  2722. /* Coverage: To get here, there need to be 2^(N_SORT_BUCKET)
  2723. ** elements in the input list. This is possible, but impractical.
  2724. ** Testing this line is the point of global variable
  2725. ** sqlite3_pager_n_sort_bucket.
  2726. */
  2727. a[i] = merge_pagelist(a[i], p);
  2728. }
  2729. }
  2730. p = a[0];
  2731. for(i=1; i<N_SORT_BUCKET; i++){
  2732. p = merge_pagelist(p, a[i]);
  2733. }
  2734. return p;
  2735. }
  2736. /*
  2737. ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
  2738. ** every one of those pages out to the database file and mark them all
  2739. ** as clean.
  2740. */
  2741. static int pager_write_pagelist(PgHdr *pList){
  2742. Pager *pPager;
  2743. PgHdr *p;
  2744. int rc;
  2745. if( pList==0 ) return SQLITE_OK;
  2746. pPager = pList->pPager;
  2747. /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
  2748. ** database file. If there is already an EXCLUSIVE lock, the following
  2749. ** calls to sqlite3OsLock() are no-ops.
  2750. **
  2751. ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
  2752. ** through an intermediate state PENDING. A PENDING lock prevents new
  2753. ** readers from attaching to the database but is unsufficient for us to
  2754. ** write. The idea of a PENDING lock is to prevent new readers from
  2755. ** coming in while we wait for existing readers to clear.
  2756. **
  2757. ** While the pager is in the RESERVED state, the original database file
  2758. ** is unchanged and we can rollback without having to playback the
  2759. ** journal into the original database file. Once we transition to
  2760. ** EXCLUSIVE, it means the database file has been changed and any rollback
  2761. ** will require a journal playback.
  2762. */
  2763. rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  2764. if( rc!=SQLITE_OK ){
  2765. return rc;
  2766. }
  2767. pList = sort_pagelist(pList);
  2768. for(p=pList; p; p=p->pDirty){
  2769. assert( p->dirty );
  2770. p->dirty = 0;
  2771. }
  2772. while( pList ){
  2773. /* If the file has not yet been opened, open it now. */
  2774. if( !pPager->fd->pMethods ){
  2775. assert(pPager->tempFile);
  2776. rc = sqlite3PagerOpentemp(pPager->pVfs, pPager->fd, pPager->zFilename,
  2777. pPager->vfsFlags);
  2778. if( rc ) return rc;
  2779. }
  2780. /* If there are dirty pages in the page cache with page numbers greater
  2781. ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
  2782. ** make the file smaller (presumably by auto-vacuum code). Do not write
  2783. ** any such pages to the file.
  2784. */
  2785. if( pList->pgno<=pPager->dbSize ){
  2786. i64 offset = (pList->pgno-1)*(i64)pPager->pageSize;
  2787. char *pData = CODEC2(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
  2788. PAGERTRACE4("STORE %d page %d hash(%08x)\n",
  2789. PAGERID(pPager), pList->pgno, pager_pagehash(pList));
  2790. IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
  2791. rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
  2792. PAGER_INCR(sqlite3_pager_writedb_count);
  2793. PAGER_INCR(pPager->nWrite);
  2794. if( pList->pgno==1 ){
  2795. memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
  2796. }
  2797. }
  2798. #ifndef NDEBUG
  2799. else{
  2800. PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
  2801. }
  2802. #endif
  2803. if( rc ) return rc;
  2804. #ifdef SQLITE_CHECK_PAGES
  2805. pList->pageHash = pager_pagehash(pList);
  2806. #endif
  2807. pList = pList->pDirty;
  2808. }
  2809. return SQLITE_OK;
  2810. }
  2811. /*
  2812. ** Collect every dirty page into a dirty list and
  2813. ** return a pointer to the head of that list. All pages are
  2814. ** collected even if they are still in use.
  2815. */
  2816. static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
  2817. return pPager->pDirty;
  2818. }
  2819. /*
  2820. ** Return TRUE if there is a hot journal on the given pager.
  2821. ** A hot journal is one that needs to be played back.
  2822. **
  2823. ** If the current size of the database file is 0 but a journal file
  2824. ** exists, that is probably an old journal left over from a prior
  2825. ** database with the same name. Just delete the journal.
  2826. */
  2827. static int hasHotJournal(Pager *pPager){
  2828. sqlite3_vfs *pVfs = pPager->pVfs;
  2829. if( !pPager->useJournal ) return 0;
  2830. if( !pPager->fd->pMethods ) return 0;
  2831. if( !sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS) ){
  2832. return 0;
  2833. }
  2834. if( sqlite3OsCheckReservedLock(pPager->fd) ){
  2835. return 0;
  2836. }
  2837. if( sqlite3PagerPagecount(pPager)==0 ){
  2838. sqlite3OsDelete(pVfs, pPager->zJournal, 0);
  2839. return 0;
  2840. }else{
  2841. return 1;
  2842. }
  2843. }
  2844. /*
  2845. ** Try to find a page in the cache that can be recycled.
  2846. **
  2847. ** This routine may return SQLITE_IOERR, SQLITE_FULL or SQLITE_OK. It
  2848. ** does not set the pPager->errCode variable.
  2849. */
  2850. static int pager_recycle(Pager *pPager, PgHdr **ppPg){
  2851. PgHdr *pPg;
  2852. *ppPg = 0;
  2853. /* It is illegal to call this function unless the pager object
  2854. ** pointed to by pPager has at least one free page (page with nRef==0).
  2855. */
  2856. assert(!MEMDB);
  2857. assert(pPager->lru.pFirst);
  2858. /* Find a page to recycle. Try to locate a page that does not
  2859. ** require us to do an fsync() on the journal.
  2860. */
  2861. pPg = pPager->lru.pFirstSynced;
  2862. /* If we could not find a page that does not require an fsync()
  2863. ** on the journal file then fsync the journal file. This is a
  2864. ** very slow operation, so we work hard to avoid it. But sometimes
  2865. ** it can't be helped.
  2866. */
  2867. if( pPg==0 && pPager->lru.pFirst){
  2868. int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
  2869. int rc = syncJournal(pPager);
  2870. if( rc!=0 ){
  2871. return rc;
  2872. }
  2873. if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
  2874. /* If in full-sync mode, write a new journal header into the
  2875. ** journal file. This is done to avoid ever modifying a journal
  2876. ** header that is involved in the rollback of pages that have
  2877. ** already been written to the database (in case the header is
  2878. ** trashed when the nRec field is updated).
  2879. */
  2880. pPager->nRec = 0;
  2881. assert( pPager->journalOff > 0 );
  2882. assert( pPager->doNotSync==0 );
  2883. rc = writeJournalHdr(pPager);
  2884. if( rc!=0 ){
  2885. return rc;
  2886. }
  2887. }
  2888. pPg = pPager->lru.pFirst;
  2889. }
  2890. assert( pPg->nRef==0 );
  2891. /* Write the page to the database file if it is dirty.
  2892. */
  2893. if( pPg->dirty ){
  2894. int rc;
  2895. assert( pPg->needSync==0 );
  2896. makeClean(pPg);
  2897. pPg->dirty = 1;
  2898. pPg->pDirty = 0;
  2899. rc = pager_write_pagelist( pPg );
  2900. pPg->dirty = 0;
  2901. if( rc!=SQLITE_OK ){
  2902. return rc;
  2903. }
  2904. }
  2905. assert( pPg->dirty==0 );
  2906. /* If the page we are recycling is marked as alwaysRollback, then
  2907. ** set the global alwaysRollback flag, thus disabling the
  2908. ** sqlite3PagerDontRollback() optimization for the rest of this transaction.
  2909. ** It is necessary to do this because the page marked alwaysRollback
  2910. ** might be reloaded at a later time but at that point we won't remember
  2911. ** that is was marked alwaysRollback. This means that all pages must
  2912. ** be marked as alwaysRollback from here on out.
  2913. */
  2914. if( pPg->alwaysRollback ){
  2915. IOTRACE(("ALWAYS_ROLLBACK %p\n", pPager))
  2916. pPager->alwaysRollback = 1;
  2917. }
  2918. /* Unlink the old page from the free list and the hash table
  2919. */
  2920. unlinkPage(pPg);
  2921. assert( pPg->pgno==0 );
  2922. *ppPg = pPg;
  2923. return SQLITE_OK;
  2924. }
  2925. #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
  2926. /*
  2927. ** This function is called to free superfluous dynamically allocated memory
  2928. ** held by the pager system. Memory in use by any SQLite pager allocated
  2929. ** by the current thread may be sqlite3_free()ed.
  2930. **
  2931. ** nReq is the number of bytes of memory required. Once this much has
  2932. ** been released, the function returns. The return value is the total number
  2933. ** of bytes of memory released.
  2934. */
  2935. int sqlite3PagerReleaseMemory(int nReq){
  2936. int nReleased = 0; /* Bytes of memory released so far */
  2937. sqlite3_mutex *mutex; /* The MEM2 mutex */
  2938. Pager *pPager; /* For looping over pagers */
  2939. BusyHandler *savedBusy; /* Saved copy of the busy handler */
  2940. int rc = SQLITE_OK;
  2941. /* Acquire the memory-management mutex
  2942. */
  2943. mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
  2944. sqlite3_mutex_enter(mutex);
  2945. /* Signal all database connections that memory management wants
  2946. ** to have access to the pagers.
  2947. */
  2948. for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){
  2949. pPager->iInUseMM = 1;
  2950. }
  2951. while( rc==SQLITE_OK && (nReq<0 || nReleased<nReq) ){
  2952. PgHdr *pPg;
  2953. PgHdr *pRecycled;
  2954. /* Try to find a page to recycle that does not require a sync(). If
  2955. ** this is not possible, find one that does require a sync().
  2956. */
  2957. sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
  2958. pPg = sqlite3LruPageList.pFirstSynced;
  2959. while( pPg && (pPg->needSync || pPg->pPager->iInUseDB) ){
  2960. pPg = pPg->gfree.pNext;
  2961. }
  2962. if( !pPg ){
  2963. pPg = sqlite3LruPageList.pFirst;
  2964. while( pPg && pPg->pPager->iInUseDB ){
  2965. pPg = pPg->gfree.pNext;
  2966. }
  2967. }
  2968. sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
  2969. /* If pPg==0, then the block above has failed to find a page to
  2970. ** recycle. In this case return early - no further memory will
  2971. ** be released.
  2972. */
  2973. if( !pPg ) break;
  2974. pPager = pPg->pPager;
  2975. assert(!pPg->needSync || pPg==pPager->lru.pFirst);
  2976. assert(pPg->needSync || pPg==pPager->lru.pFirstSynced);
  2977. savedBusy = pPager->pBusyHandler;
  2978. pPager->pBusyHandler = 0;
  2979. rc = pager_recycle(pPager, &pRecycled);
  2980. pPager->pBusyHandler = savedBusy;
  2981. assert(pRecycled==pPg || rc!=SQLITE_OK);
  2982. if( rc==SQLITE_OK ){
  2983. /* We've found a page to free. At this point the page has been
  2984. ** removed from the page hash-table, free-list and synced-list
  2985. ** (pFirstSynced). It is still in the all pages (pAll) list.
  2986. ** Remove it from this list before freeing.
  2987. **
  2988. ** Todo: Check the Pager.pStmt list to make sure this is Ok. It
  2989. ** probably is though.
  2990. */
  2991. PgHdr *pTmp;
  2992. assert( pPg );
  2993. if( pPg==pPager->pAll ){
  2994. pPager->pAll = pPg->pNextAll;
  2995. }else{
  2996. for( pTmp=pPager->pAll; pTmp->pNextAll!=pPg; pTmp=pTmp->pNextAll ){}
  2997. pTmp->pNextAll = pPg->pNextAll;
  2998. }
  2999. nReleased += (
  3000. sizeof(*pPg) + pPager->pageSize
  3001. + sizeof(u32) + pPager->nExtra
  3002. + MEMDB*sizeof(PgHistory)
  3003. );
  3004. IOTRACE(("PGFREE %p %d *\n", pPager, pPg->pgno));
  3005. PAGER_INCR(sqlite3_pager_pgfree_count);
  3006. sqlite3_free(pPg);
  3007. pPager->nPage--;
  3008. }else{
  3009. /* An error occured whilst writing to the database file or
  3010. ** journal in pager_recycle(). The error is not returned to the
  3011. ** caller of this function. Instead, set the Pager.errCode variable.
  3012. ** The error will be returned to the user (or users, in the case
  3013. ** of a shared pager cache) of the pager for which the error occured.
  3014. */
  3015. assert(
  3016. (rc&0xff)==SQLITE_IOERR ||
  3017. rc==SQLITE_FULL ||
  3018. rc==SQLITE_BUSY
  3019. );
  3020. assert( pPager->state>=PAGER_RESERVED );
  3021. pager_error(pPager, rc);
  3022. }
  3023. }
  3024. /* Clear the memory management flags and release the mutex
  3025. */
  3026. for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){
  3027. pPager->iInUseMM = 0;
  3028. }
  3029. sqlite3_mutex_leave(mutex);
  3030. /* Return the number of bytes released
  3031. */
  3032. return nReleased;
  3033. }
  3034. #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
  3035. /*
  3036. ** Read the content of page pPg out of the database file.
  3037. */
  3038. static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
  3039. int rc;
  3040. i64 offset;
  3041. assert( MEMDB==0 );
  3042. assert(pPager->fd->pMethods||pPager->tempFile);
  3043. if( !pPager->fd->pMethods ){
  3044. return SQLITE_IOERR_SHORT_READ;
  3045. }
  3046. offset = (pgno-1)*(i64)pPager->pageSize;
  3047. rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg), pPager->pageSize, offset);
  3048. PAGER_INCR(sqlite3_pager_readdb_count);
  3049. PAGER_INCR(pPager->nRead);
  3050. IOTRACE(("PGIN %p %d\n", pPager, pgno));
  3051. if( pgno==1 ){
  3052. memcpy(&pPager->dbFileVers, &((u8*)PGHDR_TO_DATA(pPg))[24],
  3053. sizeof(pPager->dbFileVers));
  3054. }
  3055. CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
  3056. PAGERTRACE4("FETCH %d page %d hash(%08x)\n",
  3057. PAGERID(pPager), pPg->pgno, pager_pagehash(pPg));
  3058. return rc;
  3059. }
  3060. /*
  3061. ** This function is called to obtain the shared lock required before
  3062. ** data may be read from the pager cache. If the shared lock has already
  3063. ** been obtained, this function is a no-op.
  3064. **
  3065. ** Immediately after obtaining the shared lock (if required), this function
  3066. ** checks for a hot-journal file. If one is found, an emergency rollback
  3067. ** is performed immediately.
  3068. */
  3069. static int pagerSharedLock(Pager *pPager){
  3070. int rc = SQLITE_OK;
  3071. int isHot = 0;
  3072. /* If this database is opened for exclusive access, has no outstanding
  3073. ** page references and is in an error-state, now is the chance to clear
  3074. ** the error. Discard the contents of the pager-cache and treat any
  3075. ** open journal file as a hot-journal.
  3076. */
  3077. if( !MEMDB && pPager->exclusiveMode && pPager->nRef==0 && pPager->errCode ){
  3078. if( pPager->journalOpen ){
  3079. isHot = 1;
  3080. }
  3081. pager_reset(pPager);
  3082. pPager->errCode = SQLITE_OK;
  3083. }
  3084. /* If the pager is still in an error state, do not proceed. The error
  3085. ** state will be cleared at some point in the future when all page
  3086. ** references are dropped and the cache can be discarded.
  3087. */
  3088. if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
  3089. return pPager->errCode;
  3090. }
  3091. if( pPager->state==PAGER_UNLOCK || isHot ){
  3092. sqlite3_vfs *pVfs = pPager->pVfs;
  3093. if( !MEMDB ){
  3094. assert( pPager->nRef==0 );
  3095. if( !pPager->noReadlock ){
  3096. rc = pager_wait_on_lock(pPager, SHARED_LOCK);
  3097. if( rc!=SQLITE_OK ){
  3098. return pager_error(pPager, rc);
  3099. }
  3100. assert( pPager->state>=SHARED_LOCK );
  3101. }
  3102. /* If a journal file exists, and there is no RESERVED lock on the
  3103. ** database file, then it either needs to be played back or deleted.
  3104. */
  3105. if( hasHotJournal(pPager) || isHot ){
  3106. /* Get an EXCLUSIVE lock on the database file. At this point it is
  3107. ** important that a RESERVED lock is not obtained on the way to the
  3108. ** EXCLUSIVE lock. If it were, another process might open the
  3109. ** database file, detect the RESERVED lock, and conclude that the
  3110. ** database is safe to read while this process is still rolling it
  3111. ** back.
  3112. **
  3113. ** Because the intermediate RESERVED lock is not requested, the
  3114. ** second process will get to this point in the code and fail to
  3115. ** obtain its own EXCLUSIVE lock on the database file.
  3116. */
  3117. if( pPager->state<EXCLUSIVE_LOCK ){
  3118. rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
  3119. if( rc!=SQLITE_OK ){
  3120. pager_unlock(pPager);
  3121. return pager_error(pPager, rc);
  3122. }
  3123. pPager->state = PAGER_EXCLUSIVE;
  3124. }
  3125. /* Open the journal for reading only. Return SQLITE_BUSY if
  3126. ** we are unable to open the journal file.
  3127. **
  3128. ** The journal file does not need to be locked itself. The
  3129. ** journal file is never open unless the main database file holds
  3130. ** a write lock, so there is never any chance of two or more
  3131. ** processes opening the journal at the same time.
  3132. **
  3133. ** Open the journal for read/write access. This is because in
  3134. ** exclusive-access mode the file descriptor will be kept open and
  3135. ** possibly used for a transaction later on. On some systems, the
  3136. ** OsTruncate() call used in exclusive-access mode also requires
  3137. ** a read/write file handle.
  3138. */
  3139. if( !isHot ){
  3140. rc = SQLITE_BUSY;
  3141. if( sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS) ){
  3142. int fout = 0;
  3143. int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
  3144. assert( !pPager->tempFile );
  3145. rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
  3146. assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
  3147. if( fout&SQLITE_OPEN_READONLY ){
  3148. rc = SQLITE_BUSY;
  3149. sqlite3OsClose(pPager->jfd);
  3150. }
  3151. }
  3152. }
  3153. if( rc!=SQLITE_OK ){
  3154. pager_unlock(pPager);
  3155. return ((rc==SQLITE_NOMEM||rc==SQLITE_IOERR_NOMEM)?rc:SQLITE_BUSY);
  3156. }
  3157. pPager->journalOpen = 1;
  3158. pPager->journalStarted = 0;
  3159. pPager->journalOff = 0;
  3160. pPager->setMaster = 0;
  3161. pPager->journalHdr = 0;
  3162. /* Playback and delete the journal. Drop the database write
  3163. ** lock and reacquire the read lock.
  3164. */
  3165. rc = pager_playback(pPager, 1);
  3166. if( rc!=SQLITE_OK ){
  3167. return pager_error(pPager, rc);
  3168. }
  3169. assert(pPager->state==PAGER_SHARED ||
  3170. (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
  3171. );
  3172. }
  3173. if( pPager->pAll ){
  3174. /* The shared-lock has just been acquired on the database file
  3175. ** and there are already pages in the cache (from a previous
  3176. ** read or write transaction). Check to see if the database
  3177. ** has been modified. If the database has changed, flush the
  3178. ** cache.
  3179. **
  3180. ** Database changes is detected by looking at 15 bytes beginning
  3181. ** at offset 24 into the file. The first 4 of these 16 bytes are
  3182. ** a 32-bit counter that is incremented with each change. The
  3183. ** other bytes change randomly with each file change when
  3184. ** a codec is in use.
  3185. **
  3186. ** There is a vanishingly small chance that a change will not be
  3187. ** detected. The chance of an undetected change is so small that
  3188. ** it can be neglected.
  3189. */
  3190. char dbFileVers[sizeof(pPager->dbFileVers)];
  3191. sqlite3PagerPagecount(pPager);
  3192. if( pPager->errCode ){
  3193. return pPager->errCode;
  3194. }
  3195. if( pPager->dbSize>0 ){
  3196. IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
  3197. rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
  3198. if( rc!=SQLITE_OK ){
  3199. return rc;
  3200. }
  3201. }else{
  3202. memset(dbFileVers, 0, sizeof(dbFileVers));
  3203. }
  3204. if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
  3205. pager_reset(pPager);
  3206. }
  3207. }
  3208. }
  3209. assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
  3210. if( pPager->state==PAGER_UNLOCK ){
  3211. pPager->state = PAGER_SHARED;
  3212. }
  3213. }
  3214. return rc;
  3215. }
  3216. /*
  3217. ** Allocate a PgHdr object. Either create a new one or reuse
  3218. ** an existing one that is not otherwise in use.
  3219. **
  3220. ** A new PgHdr structure is created if any of the following are
  3221. ** true:
  3222. **
  3223. ** (1) We have not exceeded our maximum allocated cache size
  3224. ** as set by the "PRAGMA cache_size" command.
  3225. **
  3226. ** (2) There are no unused PgHdr objects available at this time.
  3227. **
  3228. ** (3) This is an in-memory database.
  3229. **
  3230. ** (4) There are no PgHdr objects that do not require a journal
  3231. ** file sync and a sync of the journal file is currently
  3232. ** prohibited.
  3233. **
  3234. ** Otherwise, reuse an existing PgHdr. In other words, reuse an
  3235. ** existing PgHdr if all of the following are true:
  3236. **
  3237. ** (1) We have reached or exceeded the maximum cache size
  3238. ** allowed by "PRAGMA cache_size".
  3239. **
  3240. ** (2) There is a PgHdr available with PgHdr->nRef==0
  3241. **
  3242. ** (3) We are not in an in-memory database
  3243. **
  3244. ** (4) Either there is an available PgHdr that does not need
  3245. ** to be synced to disk or else disk syncing is currently
  3246. ** allowed.
  3247. */
  3248. static int pagerAllocatePage(Pager *pPager, PgHdr **ppPg){
  3249. int rc = SQLITE_OK;
  3250. PgHdr *pPg;
  3251. int nByteHdr;
  3252. /* Create a new PgHdr if any of the four conditions defined
  3253. ** above are met: */
  3254. if( pPager->nPage<pPager->mxPage
  3255. || pPager->lru.pFirst==0
  3256. || MEMDB
  3257. || (pPager->lru.pFirstSynced==0 && pPager->doNotSync)
  3258. ){
  3259. if( pPager->nPage>=pPager->nHash ){
  3260. pager_resize_hash_table(pPager,
  3261. pPager->nHash<256 ? 256 : pPager->nHash*2);
  3262. if( pPager->nHash==0 ){
  3263. rc = SQLITE_NOMEM;
  3264. goto pager_allocate_out;
  3265. }
  3266. }
  3267. pagerLeave(pPager);
  3268. nByteHdr = sizeof(*pPg) + sizeof(u32) + pPager->nExtra
  3269. + MEMDB*sizeof(PgHistory);
  3270. pPg = sqlite3_malloc( nByteHdr + pPager->pageSize );
  3271. pagerEnter(pPager);
  3272. if( pPg==0 ){
  3273. rc = SQLITE_NOMEM;
  3274. goto pager_allocate_out;
  3275. }
  3276. memset(pPg, 0, nByteHdr);
  3277. pPg->pData = (void*)(nByteHdr + (char*)pPg);
  3278. pPg->pPager = pPager;
  3279. pPg->pNextAll = pPager->pAll;
  3280. pPager->pAll = pPg;
  3281. pPager->nPage++;
  3282. }else{
  3283. /* Recycle an existing page with a zero ref-count. */
  3284. rc = pager_recycle(pPager, &pPg);
  3285. if( rc==SQLITE_BUSY ){
  3286. rc = SQLITE_IOERR_BLOCKED;
  3287. }
  3288. if( rc!=SQLITE_OK ){
  3289. goto pager_allocate_out;
  3290. }
  3291. assert( pPager->state>=SHARED_LOCK );
  3292. assert(pPg);
  3293. }
  3294. *ppPg = pPg;
  3295. pager_allocate_out:
  3296. return rc;
  3297. }
  3298. /*
  3299. ** Make sure we have the content for a page. If the page was
  3300. ** previously acquired with noContent==1, then the content was
  3301. ** just initialized to zeros instead of being read from disk.
  3302. ** But now we need the real data off of disk. So make sure we
  3303. ** have it. Read it in if we do not have it already.
  3304. */
  3305. static int pager_get_content(PgHdr *pPg){
  3306. if( pPg->needRead ){
  3307. int rc = readDbPage(pPg->pPager, pPg, pPg->pgno);
  3308. if( rc==SQLITE_OK ){
  3309. pPg->needRead = 0;
  3310. }else{
  3311. return rc;
  3312. }
  3313. }
  3314. return SQLITE_OK;
  3315. }
  3316. /*
  3317. ** Acquire a page.
  3318. **
  3319. ** A read lock on the disk file is obtained when the first page is acquired.
  3320. ** This read lock is dropped when the last page is released.
  3321. **
  3322. ** This routine works for any page number greater than 0. If the database
  3323. ** file is smaller than the requested page, then no actual disk
  3324. ** read occurs and the memory image of the page is initialized to
  3325. ** all zeros. The extra data appended to a page is always initialized
  3326. ** to zeros the first time a page is loaded into memory.
  3327. **
  3328. ** The acquisition might fail for several reasons. In all cases,
  3329. ** an appropriate error code is returned and *ppPage is set to NULL.
  3330. **
  3331. ** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt
  3332. ** to find a page in the in-memory cache first. If the page is not already
  3333. ** in memory, this routine goes to disk to read it in whereas Lookup()
  3334. ** just returns 0. This routine acquires a read-lock the first time it
  3335. ** has to go to disk, and could also playback an old journal if necessary.
  3336. ** Since Lookup() never goes to disk, it never has to deal with locks
  3337. ** or journal files.
  3338. **
  3339. ** If noContent is false, the page contents are actually read from disk.
  3340. ** If noContent is true, it means that we do not care about the contents
  3341. ** of the page at this time, so do not do a disk read. Just fill in the
  3342. ** page content with zeros. But mark the fact that we have not read the
  3343. ** content by setting the PgHdr.needRead flag. Later on, if
  3344. ** sqlite3PagerWrite() is called on this page or if this routine is
  3345. ** called again with noContent==0, that means that the content is needed
  3346. ** and the disk read should occur at that point.
  3347. */
  3348. static int pagerAcquire(
  3349. Pager *pPager, /* The pager open on the database file */
  3350. Pgno pgno, /* Page number to fetch */
  3351. DbPage **ppPage, /* Write a pointer to the page here */
  3352. int noContent /* Do not bother reading content from disk if true */
  3353. ){
  3354. PgHdr *pPg;
  3355. int rc;
  3356. assert( pPager->state==PAGER_UNLOCK || pPager->nRef>0 || pgno==1 );
  3357. /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
  3358. ** number greater than this, or zero, is requested.
  3359. */
  3360. if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
  3361. return SQLITE_CORRUPT_BKPT;
  3362. }
  3363. /* Make sure we have not hit any critical errors.
  3364. */
  3365. assert( pPager!=0 );
  3366. *ppPage = 0;
  3367. /* If this is the first page accessed, then get a SHARED lock
  3368. ** on the database file. pagerSharedLock() is a no-op if
  3369. ** a database lock is already held.
  3370. */
  3371. rc = pagerSharedLock(pPager);
  3372. if( rc!=SQLITE_OK ){
  3373. return rc;
  3374. }
  3375. assert( pPager->state!=PAGER_UNLOCK );
  3376. pPg = pager_lookup(pPager, pgno);
  3377. if( pPg==0 ){
  3378. /* The requested page is not in the page cache. */
  3379. int nMax;
  3380. int h;
  3381. PAGER_INCR(pPager->nMiss);
  3382. rc = pagerAllocatePage(pPager, &pPg);
  3383. if( rc!=SQLITE_OK ){
  3384. return rc;
  3385. }
  3386. pPg->pgno = pgno;
  3387. assert( !MEMDB || pgno>pPager->stmtSize );
  3388. if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
  3389. #if 0
  3390. sqlite3CheckMemory(pPager->aInJournal, pgno/8);
  3391. #endif
  3392. assert( pPager->journalOpen );
  3393. pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
  3394. pPg->needSync = 0;
  3395. }else{
  3396. pPg->inJournal = 0;
  3397. pPg->needSync = 0;
  3398. }
  3399. makeClean(pPg);
  3400. pPg->nRef = 1;
  3401. REFINFO(pPg);
  3402. pPager->nRef++;
  3403. if( pPager->nExtra>0 ){
  3404. memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
  3405. }
  3406. nMax = sqlite3PagerPagecount(pPager);
  3407. if( pPager->errCode ){
  3408. rc = pPager->errCode;
  3409. sqlite3PagerUnref(pPg);
  3410. return rc;
  3411. }
  3412. /* Populate the page with data, either by reading from the database
  3413. ** file, or by setting the entire page to zero.
  3414. */
  3415. if( nMax<(int)pgno || MEMDB || (noContent && !pPager->alwaysRollback) ){
  3416. if( pgno>pPager->mxPgno ){
  3417. sqlite3PagerUnref(pPg);
  3418. return SQLITE_FULL;
  3419. }
  3420. memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
  3421. pPg->needRead = noContent && !pPager->alwaysRollback;
  3422. IOTRACE(("ZERO %p %d\n", pPager, pgno));
  3423. }else{
  3424. rc = readDbPage(pPager, pPg, pgno);
  3425. if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
  3426. pPg->pgno = 0;
  3427. sqlite3PagerUnref(pPg);
  3428. return rc;
  3429. }
  3430. pPg->needRead = 0;
  3431. }
  3432. /* Link the page into the page hash table */
  3433. h = pgno & (pPager->nHash-1);
  3434. assert( pgno!=0 );
  3435. pPg->pNextHash = pPager->aHash[h];
  3436. pPager->aHash[h] = pPg;
  3437. if( pPg->pNextHash ){
  3438. assert( pPg->pNextHash->pPrevHash==0 );
  3439. pPg->pNextHash->pPrevHash = pPg;
  3440. }
  3441. #ifdef SQLITE_CHECK_PAGES
  3442. pPg->pageHash = pager_pagehash(pPg);
  3443. #endif
  3444. }else{
  3445. /* The requested page is in the page cache. */
  3446. assert(pPager->nRef>0 || pgno==1);
  3447. PAGER_INCR(pPager->nHit);
  3448. if( !noContent ){
  3449. rc = pager_get_content(pPg);
  3450. if( rc ){
  3451. return rc;
  3452. }
  3453. }
  3454. page_ref(pPg);
  3455. }
  3456. *ppPage = pPg;
  3457. return SQLITE_OK;
  3458. }
  3459. int sqlite3PagerAcquire(
  3460. Pager *pPager, /* The pager open on the database file */
  3461. Pgno pgno, /* Page number to fetch */
  3462. DbPage **ppPage, /* Write a pointer to the page here */
  3463. int noContent /* Do not bother reading content from disk if true */
  3464. ){
  3465. int rc;
  3466. pagerEnter(pPager);
  3467. rc = pagerAcquire(pPager, pgno, ppPage, noContent);
  3468. pagerLeave(pPager);
  3469. return rc;
  3470. }
  3471. /*
  3472. ** Acquire a page if it is already in the in-memory cache. Do
  3473. ** not read the page from disk. Return a pointer to the page,
  3474. ** or 0 if the page is not in cache.
  3475. **
  3476. ** See also sqlite3PagerGet(). The difference between this routine
  3477. ** and sqlite3PagerGet() is that _get() will go to the disk and read
  3478. ** in the page if the page is not already in cache. This routine
  3479. ** returns NULL if the page is not in cache or if a disk I/O error
  3480. ** has ever happened.
  3481. */
  3482. DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
  3483. PgHdr *pPg = 0;
  3484. assert( pPager!=0 );
  3485. assert( pgno!=0 );
  3486. pagerEnter(pPager);
  3487. if( pPager->state==PAGER_UNLOCK ){
  3488. assert( !pPager->pAll || pPager->exclusiveMode );
  3489. }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
  3490. /* Do nothing */
  3491. }else if( (pPg = pager_lookup(pPager, pgno))!=0 ){
  3492. page_ref(pPg);
  3493. }
  3494. pagerLeave(pPager);
  3495. return pPg;
  3496. }
  3497. /*
  3498. ** Release a page.
  3499. **
  3500. ** If the number of references to the page drop to zero, then the
  3501. ** page is added to the LRU list. When all references to all pages
  3502. ** are released, a rollback occurs and the lock on the database is
  3503. ** removed.
  3504. */
  3505. int sqlite3PagerUnref(DbPage *pPg){
  3506. Pager *pPager = pPg->pPager;
  3507. /* Decrement the reference count for this page
  3508. */
  3509. assert( pPg->nRef>0 );
  3510. pagerEnter(pPg->pPager);
  3511. pPg->nRef--;
  3512. REFINFO(pPg);
  3513. CHECK_PAGE(pPg);
  3514. /* When the number of references to a page reach 0, call the
  3515. ** destructor and add the page to the freelist.
  3516. */
  3517. if( pPg->nRef==0 ){
  3518. lruListAdd(pPg);
  3519. if( pPager->xDestructor ){
  3520. pPager->xDestructor(pPg, pPager->pageSize);
  3521. }
  3522. /* When all pages reach the freelist, drop the read lock from
  3523. ** the database file.
  3524. */
  3525. pPager->nRef--;
  3526. assert( pPager->nRef>=0 );
  3527. if( pPager->nRef==0 && (!pPager->exclusiveMode || pPager->journalOff>0) ){
  3528. pagerUnlockAndRollback(pPager);
  3529. }
  3530. }
  3531. pagerLeave(pPager);
  3532. return SQLITE_OK;
  3533. }
  3534. /*
  3535. ** Create a journal file for pPager. There should already be a RESERVED
  3536. ** or EXCLUSIVE lock on the database file when this routine is called.
  3537. **
  3538. ** Return SQLITE_OK if everything. Return an error code and release the
  3539. ** write lock if anything goes wrong.
  3540. */
  3541. static int pager_open_journal(Pager *pPager){
  3542. sqlite3_vfs *pVfs = pPager->pVfs;
  3543. int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE);
  3544. int rc;
  3545. assert( !MEMDB );
  3546. assert( pPager->state>=PAGER_RESERVED );
  3547. assert( pPager->journalOpen==0 );
  3548. assert( pPager->useJournal );
  3549. assert( pPager->aInJournal==0 );
  3550. sqlite3PagerPagecount(pPager);
  3551. pagerLeave(pPager);
  3552. pPager->aInJournal = sqlite3MallocZero( pPager->dbSize/8 + 1 );
  3553. pagerEnter(pPager);
  3554. if( pPager->aInJournal==0 ){
  3555. rc = SQLITE_NOMEM;
  3556. goto failed_to_open_journal;
  3557. }
  3558. if( pPager->tempFile ){
  3559. flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
  3560. }else{
  3561. flags |= (SQLITE_OPEN_MAIN_JOURNAL);
  3562. }
  3563. #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  3564. rc = sqlite3JournalOpen(
  3565. pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
  3566. );
  3567. #else
  3568. rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
  3569. #endif
  3570. assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
  3571. pPager->journalOff = 0;
  3572. pPager->setMaster = 0;
  3573. pPager->journalHdr = 0;
  3574. if( rc!=SQLITE_OK ){
  3575. if( rc==SQLITE_NOMEM ){
  3576. sqlite3OsDelete(pVfs, pPager->zJournal, 0);
  3577. }
  3578. goto failed_to_open_journal;
  3579. }
  3580. pPager->journalOpen = 1;
  3581. pPager->journalStarted = 0;
  3582. pPager->needSync = 0;
  3583. pPager->alwaysRollback = 0;
  3584. pPager->nRec = 0;
  3585. if( pPager->errCode ){
  3586. rc = pPager->errCode;
  3587. goto failed_to_open_journal;
  3588. }
  3589. pPager->origDbSize = pPager->dbSize;
  3590. rc = writeJournalHdr(pPager);
  3591. if( pPager->stmtAutoopen && rc==SQLITE_OK ){
  3592. rc = sqlite3PagerStmtBegin(pPager);
  3593. }
  3594. if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){
  3595. rc = pager_end_transaction(pPager);
  3596. if( rc==SQLITE_OK ){
  3597. rc = SQLITE_FULL;
  3598. }
  3599. }
  3600. return rc;
  3601. failed_to_open_journal:
  3602. sqlite3_free(pPager->aInJournal);
  3603. pPager->aInJournal = 0;
  3604. return rc;
  3605. }
  3606. /*
  3607. ** Acquire a write-lock on the database. The lock is removed when
  3608. ** the any of the following happen:
  3609. **
  3610. ** * sqlite3PagerCommitPhaseTwo() is called.
  3611. ** * sqlite3PagerRollback() is called.
  3612. ** * sqlite3PagerClose() is called.
  3613. ** * sqlite3PagerUnref() is called to on every outstanding page.
  3614. **
  3615. ** The first parameter to this routine is a pointer to any open page of the
  3616. ** database file. Nothing changes about the page - it is used merely to
  3617. ** acquire a pointer to the Pager structure and as proof that there is
  3618. ** already a read-lock on the database.
  3619. **
  3620. ** The second parameter indicates how much space in bytes to reserve for a
  3621. ** master journal file-name at the start of the journal when it is created.
  3622. **
  3623. ** A journal file is opened if this is not a temporary file. For temporary
  3624. ** files, the opening of the journal file is deferred until there is an
  3625. ** actual need to write to the journal.
  3626. **
  3627. ** If the database is already reserved for writing, this routine is a no-op.
  3628. **
  3629. ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
  3630. ** immediately instead of waiting until we try to flush the cache. The
  3631. ** exFlag is ignored if a transaction is already active.
  3632. */
  3633. int sqlite3PagerBegin(DbPage *pPg, int exFlag){
  3634. Pager *pPager = pPg->pPager;
  3635. int rc = SQLITE_OK;
  3636. pagerEnter(pPager);
  3637. assert( pPg->nRef>0 );
  3638. assert( pPager->state!=PAGER_UNLOCK );
  3639. if( pPager->state==PAGER_SHARED ){
  3640. assert( pPager->aInJournal==0 );
  3641. if( MEMDB ){
  3642. pPager->state = PAGER_EXCLUSIVE;
  3643. pPager->origDbSize = pPager->dbSize;
  3644. }else{
  3645. rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
  3646. if( rc==SQLITE_OK ){
  3647. pPager->state = PAGER_RESERVED;
  3648. if( exFlag ){
  3649. rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
  3650. }
  3651. }
  3652. if( rc!=SQLITE_OK ){
  3653. pagerLeave(pPager);
  3654. return rc;
  3655. }
  3656. pPager->dirtyCache = 0;
  3657. PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
  3658. if( pPager->useJournal && !pPager->tempFile ){
  3659. rc = pager_open_journal(pPager);
  3660. }
  3661. }
  3662. }else if( pPager->journalOpen && pPager->journalOff==0 ){
  3663. /* This happens when the pager was in exclusive-access mode last
  3664. ** time a (read or write) transaction was successfully concluded
  3665. ** by this connection. Instead of deleting the journal file it was
  3666. ** kept open and truncated to 0 bytes.
  3667. */
  3668. assert( pPager->nRec==0 );
  3669. assert( pPager->origDbSize==0 );
  3670. assert( pPager->aInJournal==0 );
  3671. sqlite3PagerPagecount(pPager);
  3672. pagerLeave(pPager);
  3673. pPager->aInJournal = sqlite3MallocZero( pPager->dbSize/8 + 1 );
  3674. pagerEnter(pPager);
  3675. if( !pPager->aInJournal ){
  3676. rc = SQLITE_NOMEM;
  3677. }else{
  3678. pPager->origDbSize = pPager->dbSize;
  3679. rc = writeJournalHdr(pPager);
  3680. }
  3681. }
  3682. assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
  3683. pagerLeave(pPager);
  3684. return rc;
  3685. }
  3686. /*
  3687. ** Make a page dirty. Set its dirty flag and add it to the dirty
  3688. ** page list.
  3689. */
  3690. static void makeDirty(PgHdr *pPg){
  3691. if( pPg->dirty==0 ){
  3692. Pager *pPager = pPg->pPager;
  3693. pPg->dirty = 1;
  3694. pPg->pDirty = pPager->pDirty;
  3695. if( pPager->pDirty ){
  3696. pPager->pDirty->pPrevDirty = pPg;
  3697. }
  3698. pPg->pPrevDirty = 0;
  3699. pPager->pDirty = pPg;
  3700. }
  3701. }
  3702. /*
  3703. ** Make a page clean. Clear its dirty bit and remove it from the
  3704. ** dirty page list.
  3705. */
  3706. static void makeClean(PgHdr *pPg){
  3707. if( pPg->dirty ){
  3708. pPg->dirty = 0;
  3709. if( pPg->pDirty ){
  3710. assert( pPg->pDirty->pPrevDirty==pPg );
  3711. pPg->pDirty->pPrevDirty = pPg->pPrevDirty;
  3712. }
  3713. if( pPg->pPrevDirty ){
  3714. assert( pPg->pPrevDirty->pDirty==pPg );
  3715. pPg->pPrevDirty->pDirty = pPg->pDirty;
  3716. }else{
  3717. assert( pPg->pPager->pDirty==pPg );
  3718. pPg->pPager->pDirty = pPg->pDirty;
  3719. }
  3720. }
  3721. }
  3722. /*
  3723. ** Mark a data page as writeable. The page is written into the journal
  3724. ** if it is not there already. This routine must be called before making
  3725. ** changes to a page.
  3726. **
  3727. ** The first time this routine is called, the pager creates a new
  3728. ** journal and acquires a RESERVED lock on the database. If the RESERVED
  3729. ** lock could not be acquired, this routine returns SQLITE_BUSY. The
  3730. ** calling routine must check for that return value and be careful not to
  3731. ** change any page data until this routine returns SQLITE_OK.
  3732. **
  3733. ** If the journal file could not be written because the disk is full,
  3734. ** then this routine returns SQLITE_FULL and does an immediate rollback.
  3735. ** All subsequent write attempts also return SQLITE_FULL until there
  3736. ** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
  3737. ** reset.
  3738. */
  3739. static int pager_write(PgHdr *pPg){
  3740. void *pData = PGHDR_TO_DATA(pPg);
  3741. Pager *pPager = pPg->pPager;
  3742. int rc = SQLITE_OK;
  3743. /* Check for errors
  3744. */
  3745. if( pPager->errCode ){
  3746. return pPager->errCode;
  3747. }
  3748. if( pPager->readOnly ){
  3749. return SQLITE_PERM;
  3750. }
  3751. assert( !pPager->setMaster );
  3752. CHECK_PAGE(pPg);
  3753. /* If this page was previously acquired with noContent==1, that means
  3754. ** we didn't really read in the content of the page. This can happen
  3755. ** (for example) when the page is being moved to the freelist. But
  3756. ** now we are (perhaps) moving the page off of the freelist for
  3757. ** reuse and we need to know its original content so that content
  3758. ** can be stored in the rollback journal. So do the read at this
  3759. ** time.
  3760. */
  3761. rc = pager_get_content(pPg);
  3762. if( rc ){
  3763. return rc;
  3764. }
  3765. /* Mark the page as dirty. If the page has already been written
  3766. ** to the journal then we can return right away.
  3767. */
  3768. makeDirty(pPg);
  3769. if( pPg->inJournal && (pageInStatement(pPg) || pPager->stmtInUse==0) ){
  3770. pPager->dirtyCache = 1;
  3771. }else{
  3772. /* If we get this far, it means that the page needs to be
  3773. ** written to the transaction journal or the ckeckpoint journal
  3774. ** or both.
  3775. **
  3776. ** First check to see that the transaction journal exists and
  3777. ** create it if it does not.
  3778. */
  3779. assert( pPager->state!=PAGER_UNLOCK );
  3780. rc = sqlite3PagerBegin(pPg, 0);
  3781. if( rc!=SQLITE_OK ){
  3782. return rc;
  3783. }
  3784. assert( pPager->state>=PAGER_RESERVED );
  3785. if( !pPager->journalOpen && pPager->useJournal ){
  3786. rc = pager_open_journal(pPager);
  3787. if( rc!=SQLITE_OK ) return rc;
  3788. }
  3789. assert( pPager->journalOpen || !pPager->useJournal );
  3790. pPager->dirtyCache = 1;
  3791. /* The transaction journal now exists and we have a RESERVED or an
  3792. ** EXCLUSIVE lock on the main database file. Write the current page to
  3793. ** the transaction journal if it is not there already.
  3794. */
  3795. if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){
  3796. if( (int)pPg->pgno <= pPager->origDbSize ){
  3797. if( MEMDB ){
  3798. PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  3799. PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  3800. assert( pHist->pOrig==0 );
  3801. pHist->pOrig = sqlite3_malloc( pPager->pageSize );
  3802. if( !pHist->pOrig ){
  3803. return SQLITE_NOMEM;
  3804. }
  3805. memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
  3806. }else{
  3807. u32 cksum;
  3808. char *pData2;
  3809. /* We should never write to the journal file the page that
  3810. ** contains the database locks. The following assert verifies
  3811. ** that we do not. */
  3812. assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
  3813. pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
  3814. cksum = pager_cksum(pPager, (u8*)pData2);
  3815. rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
  3816. if( rc==SQLITE_OK ){
  3817. rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
  3818. pPager->journalOff + 4);
  3819. pPager->journalOff += pPager->pageSize+4;
  3820. }
  3821. if( rc==SQLITE_OK ){
  3822. rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
  3823. pPager->journalOff += 4;
  3824. }
  3825. IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno,
  3826. pPager->journalOff, pPager->pageSize));
  3827. PAGER_INCR(sqlite3_pager_writej_count);
  3828. PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
  3829. PAGERID(pPager), pPg->pgno, pPg->needSync, pager_pagehash(pPg));
  3830. /* An error has occured writing to the journal file. The
  3831. ** transaction will be rolled back by the layer above.
  3832. */
  3833. if( rc!=SQLITE_OK ){
  3834. return rc;
  3835. }
  3836. pPager->nRec++;
  3837. assert( pPager->aInJournal!=0 );
  3838. pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
  3839. pPg->needSync = !pPager->noSync;
  3840. if( pPager->stmtInUse ){
  3841. pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
  3842. }
  3843. }
  3844. }else{
  3845. pPg->needSync = !pPager->journalStarted && !pPager->noSync;
  3846. PAGERTRACE4("APPEND %d page %d needSync=%d\n",
  3847. PAGERID(pPager), pPg->pgno, pPg->needSync);
  3848. }
  3849. if( pPg->needSync ){
  3850. pPager->needSync = 1;
  3851. }
  3852. pPg->inJournal = 1;
  3853. }
  3854. /* If the statement journal is open and the page is not in it,
  3855. ** then write the current page to the statement journal. Note that
  3856. ** the statement journal format differs from the standard journal format
  3857. ** in that it omits the checksums and the header.
  3858. */
  3859. if( pPager->stmtInUse
  3860. && !pageInStatement(pPg)
  3861. && (int)pPg->pgno<=pPager->stmtSize
  3862. ){
  3863. assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
  3864. if( MEMDB ){
  3865. PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  3866. assert( pHist->pStmt==0 );
  3867. pHist->pStmt = sqlite3_malloc( pPager->pageSize );
  3868. if( pHist->pStmt ){
  3869. memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
  3870. }
  3871. PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  3872. page_add_to_stmt_list(pPg);
  3873. }else{
  3874. i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
  3875. char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
  3876. rc = write32bits(pPager->stfd, offset, pPg->pgno);
  3877. if( rc==SQLITE_OK ){
  3878. rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
  3879. }
  3880. PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
  3881. if( rc!=SQLITE_OK ){
  3882. return rc;
  3883. }
  3884. pPager->stmtNRec++;
  3885. assert( pPager->aInStmt!=0 );
  3886. pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
  3887. }
  3888. }
  3889. }
  3890. /* Update the database size and return.
  3891. */
  3892. assert( pPager->state>=PAGER_SHARED );
  3893. if( pPager->dbSize<(int)pPg->pgno ){
  3894. pPager->dbSize = pPg->pgno;
  3895. if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
  3896. pPager->dbSize++;
  3897. }
  3898. }
  3899. return rc;
  3900. }
  3901. /*
  3902. ** This function is used to mark a data-page as writable. It uses
  3903. ** pager_write() to open a journal file (if it is not already open)
  3904. ** and write the page *pData to the journal.
  3905. **
  3906. ** The difference between this function and pager_write() is that this
  3907. ** function also deals with the special case where 2 or more pages
  3908. ** fit on a single disk sector. In this case all co-resident pages
  3909. ** must have been written to the journal file before returning.
  3910. */
  3911. int sqlite3PagerWrite(DbPage *pDbPage){
  3912. int rc = SQLITE_OK;
  3913. PgHdr *pPg = pDbPage;
  3914. Pager *pPager = pPg->pPager;
  3915. Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
  3916. pagerEnter(pPager);
  3917. if( !MEMDB && nPagePerSector>1 ){
  3918. Pgno nPageCount; /* Total number of pages in database file */
  3919. Pgno pg1; /* First page of the sector pPg is located on. */
  3920. int nPage; /* Number of pages starting at pg1 to journal */
  3921. int ii;
  3922. int needSync = 0;
  3923. /* Set the doNotSync flag to 1. This is because we cannot allow a journal
  3924. ** header to be written between the pages journaled by this function.
  3925. */
  3926. assert( pPager->doNotSync==0 );
  3927. pPager->doNotSync = 1;
  3928. /* This trick assumes that both the page-size and sector-size are
  3929. ** an integer power of 2. It sets variable pg1 to the identifier
  3930. ** of the first page of the sector pPg is located on.
  3931. */
  3932. pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
  3933. nPageCount = sqlite3PagerPagecount(pPager);
  3934. if( pPg->pgno>nPageCount ){
  3935. nPage = (pPg->pgno - pg1)+1;
  3936. }else if( (pg1+nPagePerSector-1)>nPageCount ){
  3937. nPage = nPageCount+1-pg1;
  3938. }else{
  3939. nPage = nPagePerSector;
  3940. }
  3941. assert(nPage>0);
  3942. assert(pg1<=pPg->pgno);
  3943. assert((pg1+nPage)>pPg->pgno);
  3944. for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
  3945. Pgno pg = pg1+ii;
  3946. PgHdr *pPage;
  3947. if( !pPager->aInJournal || pg==pPg->pgno ||
  3948. pg>pPager->origDbSize || !(pPager->aInJournal[pg/8]&(1<<(pg&7)))
  3949. ) {
  3950. if( pg!=PAGER_MJ_PGNO(pPager) ){
  3951. rc = sqlite3PagerGet(pPager, pg, &pPage);
  3952. if( rc==SQLITE_OK ){
  3953. rc = pager_write(pPage);
  3954. if( pPage->needSync ){
  3955. needSync = 1;
  3956. }
  3957. sqlite3PagerUnref(pPage);
  3958. }
  3959. }
  3960. }else if( (pPage = pager_lookup(pPager, pg)) ){
  3961. if( pPage->needSync ){
  3962. needSync = 1;
  3963. }
  3964. }
  3965. }
  3966. /* If the PgHdr.needSync flag is set for any of the nPage pages
  3967. ** starting at pg1, then it needs to be set for all of them. Because
  3968. ** writing to any of these nPage pages may damage the others, the
  3969. ** journal file must contain sync()ed copies of all of them
  3970. ** before any of them can be written out to the database file.
  3971. */
  3972. if( needSync ){
  3973. for(ii=0; ii<nPage && needSync; ii++){
  3974. PgHdr *pPage = pager_lookup(pPager, pg1+ii);
  3975. if( pPage ) pPage->needSync = 1;
  3976. }
  3977. assert(pPager->needSync);
  3978. }
  3979. assert( pPager->doNotSync==1 );
  3980. pPager->doNotSync = 0;
  3981. }else{
  3982. rc = pager_write(pDbPage);
  3983. }
  3984. pagerLeave(pPager);
  3985. return rc;
  3986. }
  3987. /*
  3988. ** Return TRUE if the page given in the argument was previously passed
  3989. ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok
  3990. ** to change the content of the page.
  3991. */
  3992. #ifndef NDEBUG
  3993. int sqlite3PagerIswriteable(DbPage *pPg){
  3994. return pPg->dirty;
  3995. }
  3996. #endif
  3997. #ifndef SQLITE_OMIT_VACUUM
  3998. /*
  3999. ** Replace the content of a single page with the information in the third
  4000. ** argument.
  4001. */
  4002. int sqlite3PagerOverwrite(Pager *pPager, Pgno pgno, void *pData){
  4003. PgHdr *pPg;
  4004. int rc;
  4005. pagerEnter(pPager);
  4006. rc = sqlite3PagerGet(pPager, pgno, &pPg);
  4007. if( rc==SQLITE_OK ){
  4008. rc = sqlite3PagerWrite(pPg);
  4009. if( rc==SQLITE_OK ){
  4010. memcpy(sqlite3PagerGetData(pPg), pData, pPager->pageSize);
  4011. }
  4012. sqlite3PagerUnref(pPg);
  4013. }
  4014. pagerLeave(pPager);
  4015. return rc;
  4016. }
  4017. #endif
  4018. /*
  4019. ** A call to this routine tells the pager that it is not necessary to
  4020. ** write the information on page pPg back to the disk, even though
  4021. ** that page might be marked as dirty.
  4022. **
  4023. ** The overlying software layer calls this routine when all of the data
  4024. ** on the given page is unused. The pager marks the page as clean so
  4025. ** that it does not get written to disk.
  4026. **
  4027. ** Tests show that this optimization, together with the
  4028. ** sqlite3PagerDontRollback() below, more than double the speed
  4029. ** of large INSERT operations and quadruple the speed of large DELETEs.
  4030. **
  4031. ** When this routine is called, set the alwaysRollback flag to true.
  4032. ** Subsequent calls to sqlite3PagerDontRollback() for the same page
  4033. ** will thereafter be ignored. This is necessary to avoid a problem
  4034. ** where a page with data is added to the freelist during one part of
  4035. ** a transaction then removed from the freelist during a later part
  4036. ** of the same transaction and reused for some other purpose. When it
  4037. ** is first added to the freelist, this routine is called. When reused,
  4038. ** the sqlite3PagerDontRollback() routine is called. But because the
  4039. ** page contains critical data, we still need to be sure it gets
  4040. ** rolled back in spite of the sqlite3PagerDontRollback() call.
  4041. */
  4042. void sqlite3PagerDontWrite(DbPage *pDbPage){
  4043. PgHdr *pPg = pDbPage;
  4044. Pager *pPager = pPg->pPager;
  4045. if( MEMDB ) return;
  4046. pagerEnter(pPager);
  4047. pPg->alwaysRollback = 1;
  4048. if( pPg->dirty && !pPager->stmtInUse ){
  4049. assert( pPager->state>=PAGER_SHARED );
  4050. if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
  4051. /* If this pages is the last page in the file and the file has grown
  4052. ** during the current transaction, then do NOT mark the page as clean.
  4053. ** When the database file grows, we must make sure that the last page
  4054. ** gets written at least once so that the disk file will be the correct
  4055. ** size. If you do not write this page and the size of the file
  4056. ** on the disk ends up being too small, that can lead to database
  4057. ** corruption during the next transaction.
  4058. */
  4059. }else{
  4060. PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
  4061. IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
  4062. makeClean(pPg);
  4063. #ifdef SQLITE_CHECK_PAGES
  4064. pPg->pageHash = pager_pagehash(pPg);
  4065. #endif
  4066. }
  4067. }
  4068. pagerLeave(pPager);
  4069. }
  4070. /*
  4071. ** A call to this routine tells the pager that if a rollback occurs,
  4072. ** it is not necessary to restore the data on the given page. This
  4073. ** means that the pager does not have to record the given page in the
  4074. ** rollback journal.
  4075. **
  4076. ** If we have not yet actually read the content of this page (if
  4077. ** the PgHdr.needRead flag is set) then this routine acts as a promise
  4078. ** that we will never need to read the page content in the future.
  4079. ** so the needRead flag can be cleared at this point.
  4080. */
  4081. void sqlite3PagerDontRollback(DbPage *pPg){
  4082. Pager *pPager = pPg->pPager;
  4083. pagerEnter(pPager);
  4084. assert( pPager->state>=PAGER_RESERVED );
  4085. if( pPager->journalOpen==0 ) return;
  4086. if( pPg->alwaysRollback || pPager->alwaysRollback || MEMDB ) return;
  4087. if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
  4088. assert( pPager->aInJournal!=0 );
  4089. pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
  4090. pPg->inJournal = 1;
  4091. pPg->needRead = 0;
  4092. if( pPager->stmtInUse ){
  4093. pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
  4094. }
  4095. PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
  4096. IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
  4097. }
  4098. if( pPager->stmtInUse
  4099. && !pageInStatement(pPg)
  4100. && (int)pPg->pgno<=pPager->stmtSize
  4101. ){
  4102. assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
  4103. assert( pPager->aInStmt!=0 );
  4104. pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
  4105. }
  4106. pagerLeave(pPager);
  4107. }
  4108. /*
  4109. ** This routine is called to increment the database file change-counter,
  4110. ** stored at byte 24 of the pager file.
  4111. */
  4112. static int pager_incr_changecounter(Pager *pPager, int isDirect){
  4113. PgHdr *pPgHdr;
  4114. u32 change_counter;
  4115. int rc = SQLITE_OK;
  4116. if( !pPager->changeCountDone ){
  4117. /* Open page 1 of the file for writing. */
  4118. rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
  4119. if( rc!=SQLITE_OK ) return rc;
  4120. if( !isDirect ){
  4121. rc = sqlite3PagerWrite(pPgHdr);
  4122. if( rc!=SQLITE_OK ){
  4123. sqlite3PagerUnref(pPgHdr);
  4124. return rc;
  4125. }
  4126. }
  4127. /* Increment the value just read and write it back to byte 24. */
  4128. change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
  4129. change_counter++;
  4130. put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
  4131. if( isDirect && pPager->fd->pMethods ){
  4132. const void *zBuf = PGHDR_TO_DATA(pPgHdr);
  4133. rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
  4134. }
  4135. /* Release the page reference. */
  4136. sqlite3PagerUnref(pPgHdr);
  4137. pPager->changeCountDone = 1;
  4138. }
  4139. return rc;
  4140. }
  4141. /*
  4142. ** Sync the database file for the pager pPager. zMaster points to the name
  4143. ** of a master journal file that should be written into the individual
  4144. ** journal file. zMaster may be NULL, which is interpreted as no master
  4145. ** journal (a single database transaction).
  4146. **
  4147. ** This routine ensures that the journal is synced, all dirty pages written
  4148. ** to the database file and the database file synced. The only thing that
  4149. ** remains to commit the transaction is to delete the journal file (or
  4150. ** master journal file if specified).
  4151. **
  4152. ** Note that if zMaster==NULL, this does not overwrite a previous value
  4153. ** passed to an sqlite3PagerCommitPhaseOne() call.
  4154. **
  4155. ** If parameter nTrunc is non-zero, then the pager file is truncated to
  4156. ** nTrunc pages (this is used by auto-vacuum databases).
  4157. */
  4158. int sqlite3PagerCommitPhaseOne(Pager *pPager, const char *zMaster, Pgno nTrunc){
  4159. int rc = SQLITE_OK;
  4160. PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
  4161. pPager->zFilename, zMaster, nTrunc);
  4162. pagerEnter(pPager);
  4163. /* If this is an in-memory db, or no pages have been written to, or this
  4164. ** function has already been called, it is a no-op.
  4165. */
  4166. if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
  4167. PgHdr *pPg;
  4168. #ifdef SQLITE_ENABLE_ATOMIC_WRITE
  4169. /* The atomic-write optimization can be used if all of the
  4170. ** following are true:
  4171. **
  4172. ** + The file-system supports the atomic-write property for
  4173. ** blocks of size page-size, and
  4174. ** + This commit is not part of a multi-file transaction, and
  4175. ** + Exactly one page has been modified and store in the journal file.
  4176. **
  4177. ** If the optimization can be used, then the journal file will never
  4178. ** be created for this transaction.
  4179. */
  4180. int useAtomicWrite = (
  4181. !zMaster &&
  4182. pPager->journalOff==jrnlBufferSize(pPager) &&
  4183. nTrunc==0 &&
  4184. (0==pPager->pDirty || 0==pPager->pDirty->pDirty)
  4185. );
  4186. if( useAtomicWrite ){
  4187. /* Update the nRec field in the journal file. */
  4188. int offset = pPager->journalHdr + sizeof(aJournalMagic);
  4189. assert(pPager->nRec==1);
  4190. rc = write32bits(pPager->jfd, offset, pPager->nRec);
  4191. /* Update the db file change counter. The following call will modify
  4192. ** the in-memory representation of page 1 to include the updated
  4193. ** change counter and then write page 1 directly to the database
  4194. ** file. Because of the atomic-write property of the host file-system,
  4195. ** this is safe.
  4196. */
  4197. if( rc==SQLITE_OK ){
  4198. rc = pager_incr_changecounter(pPager, 1);
  4199. }
  4200. }else{
  4201. rc = sqlite3JournalCreate(pPager->jfd);
  4202. }
  4203. if( !useAtomicWrite && rc==SQLITE_OK )
  4204. #endif
  4205. /* If a master journal file name has already been written to the
  4206. ** journal file, then no sync is required. This happens when it is
  4207. ** written, then the process fails to upgrade from a RESERVED to an
  4208. ** EXCLUSIVE lock. The next time the process tries to commit the
  4209. ** transaction the m-j name will have already been written.
  4210. */
  4211. if( !pPager->setMaster ){
  4212. assert( pPager->journalOpen );
  4213. rc = pager_incr_changecounter(pPager, 0);
  4214. if( rc!=SQLITE_OK ) goto sync_exit;
  4215. #ifndef SQLITE_OMIT_AUTOVACUUM
  4216. if( nTrunc!=0 ){
  4217. /* If this transaction has made the database smaller, then all pages
  4218. ** being discarded by the truncation must be written to the journal
  4219. ** file.
  4220. */
  4221. Pgno i;
  4222. int iSkip = PAGER_MJ_PGNO(pPager);
  4223. for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
  4224. if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){
  4225. rc = sqlite3PagerGet(pPager, i, &pPg);
  4226. if( rc!=SQLITE_OK ) goto sync_exit;
  4227. rc = sqlite3PagerWrite(pPg);
  4228. sqlite3PagerUnref(pPg);
  4229. if( rc!=SQLITE_OK ) goto sync_exit;
  4230. }
  4231. }
  4232. }
  4233. #endif
  4234. rc = writeMasterJournal(pPager, zMaster);
  4235. if( rc!=SQLITE_OK ) goto sync_exit;
  4236. rc = syncJournal(pPager);
  4237. }
  4238. if( rc!=SQLITE_OK ) goto sync_exit;
  4239. #ifndef SQLITE_OMIT_AUTOVACUUM
  4240. if( nTrunc!=0 ){
  4241. rc = sqlite3PagerTruncate(pPager, nTrunc);
  4242. if( rc!=SQLITE_OK ) goto sync_exit;
  4243. }
  4244. #endif
  4245. /* Write all dirty pages to the database file */
  4246. pPg = pager_get_all_dirty_pages(pPager);
  4247. rc = pager_write_pagelist(pPg);
  4248. if( rc!=SQLITE_OK ){
  4249. while( pPg && !pPg->dirty ){ pPg = pPg->pDirty; }
  4250. pPager->pDirty = pPg;
  4251. goto sync_exit;
  4252. }
  4253. pPager->pDirty = 0;
  4254. /* Sync the database file. */
  4255. if( !pPager->noSync ){
  4256. rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
  4257. }
  4258. IOTRACE(("DBSYNC %p\n", pPager))
  4259. pPager->state = PAGER_SYNCED;
  4260. }else if( MEMDB && nTrunc!=0 ){
  4261. rc = sqlite3PagerTruncate(pPager, nTrunc);
  4262. }
  4263. sync_exit:
  4264. if( rc==SQLITE_IOERR_BLOCKED ){
  4265. /* pager_incr_changecounter() may attempt to obtain an exclusive
  4266. * lock to spill the cache and return IOERR_BLOCKED. But since
  4267. * there is no chance the cache is inconsistent, it is
  4268. * better to return SQLITE_BUSY.
  4269. */
  4270. rc = SQLITE_BUSY;
  4271. }
  4272. pagerLeave(pPager);
  4273. return rc;
  4274. }
  4275. /*
  4276. ** Commit all changes to the database and release the write lock.
  4277. **
  4278. ** If the commit fails for any reason, a rollback attempt is made
  4279. ** and an error code is returned. If the commit worked, SQLITE_OK
  4280. ** is returned.
  4281. */
  4282. int sqlite3PagerCommitPhaseTwo(Pager *pPager){
  4283. int rc;
  4284. PgHdr *pPg;
  4285. if( pPager->errCode ){
  4286. return pPager->errCode;
  4287. }
  4288. if( pPager->state<PAGER_RESERVED ){
  4289. return SQLITE_ERROR;
  4290. }
  4291. pagerEnter(pPager);
  4292. PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
  4293. if( MEMDB ){
  4294. pPg = pager_get_all_dirty_pages(pPager);
  4295. while( pPg ){
  4296. PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  4297. clearHistory(pHist);
  4298. pPg->dirty = 0;
  4299. pPg->inJournal = 0;
  4300. pHist->inStmt = 0;
  4301. pPg->needSync = 0;
  4302. pHist->pPrevStmt = pHist->pNextStmt = 0;
  4303. pPg = pPg->pDirty;
  4304. }
  4305. pPager->pDirty = 0;
  4306. #ifndef NDEBUG
  4307. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  4308. PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  4309. assert( !pPg->alwaysRollback );
  4310. assert( !pHist->pOrig );
  4311. assert( !pHist->pStmt );
  4312. }
  4313. #endif
  4314. pPager->pStmt = 0;
  4315. pPager->state = PAGER_SHARED;
  4316. return SQLITE_OK;
  4317. }
  4318. assert( pPager->journalOpen || !pPager->dirtyCache );
  4319. assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache );
  4320. rc = pager_end_transaction(pPager);
  4321. rc = pager_error(pPager, rc);
  4322. pagerLeave(pPager);
  4323. return rc;
  4324. }
  4325. /*
  4326. ** Rollback all changes. The database falls back to PAGER_SHARED mode.
  4327. ** All in-memory cache pages revert to their original data contents.
  4328. ** The journal is deleted.
  4329. **
  4330. ** This routine cannot fail unless some other process is not following
  4331. ** the correct locking protocol or unless some other
  4332. ** process is writing trash into the journal file (SQLITE_CORRUPT) or
  4333. ** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
  4334. ** codes are returned for all these occasions. Otherwise,
  4335. ** SQLITE_OK is returned.
  4336. */
  4337. int sqlite3PagerRollback(Pager *pPager){
  4338. int rc;
  4339. PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
  4340. if( MEMDB ){
  4341. PgHdr *p;
  4342. for(p=pPager->pAll; p; p=p->pNextAll){
  4343. PgHistory *pHist;
  4344. assert( !p->alwaysRollback );
  4345. if( !p->dirty ){
  4346. assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig );
  4347. assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt );
  4348. continue;
  4349. }
  4350. pHist = PGHDR_TO_HIST(p, pPager);
  4351. if( pHist->pOrig ){
  4352. memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
  4353. PAGERTRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager));
  4354. }else{
  4355. PAGERTRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager));
  4356. }
  4357. clearHistory(pHist);
  4358. p->dirty = 0;
  4359. p->inJournal = 0;
  4360. pHist->inStmt = 0;
  4361. pHist->pPrevStmt = pHist->pNextStmt = 0;
  4362. if( pPager->xReiniter ){
  4363. pPager->xReiniter(p, pPager->pageSize);
  4364. }
  4365. }
  4366. pPager->pDirty = 0;
  4367. pPager->pStmt = 0;
  4368. pPager->dbSize = pPager->origDbSize;
  4369. pager_truncate_cache(pPager);
  4370. pPager->stmtInUse = 0;
  4371. pPager->state = PAGER_SHARED;
  4372. return SQLITE_OK;
  4373. }
  4374. pagerEnter(pPager);
  4375. if( !pPager->dirtyCache || !pPager->journalOpen ){
  4376. rc = pager_end_transaction(pPager);
  4377. pagerLeave(pPager);
  4378. return rc;
  4379. }
  4380. if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
  4381. if( pPager->state>=PAGER_EXCLUSIVE ){
  4382. pager_playback(pPager, 0);
  4383. }
  4384. pagerLeave(pPager);
  4385. return pPager->errCode;
  4386. }
  4387. if( pPager->state==PAGER_RESERVED ){
  4388. int rc2;
  4389. rc = pager_playback(pPager, 0);
  4390. rc2 = pager_end_transaction(pPager);
  4391. if( rc==SQLITE_OK ){
  4392. rc = rc2;
  4393. }
  4394. }else{
  4395. rc = pager_playback(pPager, 0);
  4396. }
  4397. /* pager_reset(pPager); */
  4398. pPager->dbSize = -1;
  4399. /* If an error occurs during a ROLLBACK, we can no longer trust the pager
  4400. ** cache. So call pager_error() on the way out to make any error
  4401. ** persistent.
  4402. */
  4403. rc = pager_error(pPager, rc);
  4404. pagerLeave(pPager);
  4405. return rc;
  4406. }
  4407. /*
  4408. ** Return TRUE if the database file is opened read-only. Return FALSE
  4409. ** if the database is (in theory) writable.
  4410. */
  4411. int sqlite3PagerIsreadonly(Pager *pPager){
  4412. return pPager->readOnly;
  4413. }
  4414. /*
  4415. ** Return the number of references to the pager.
  4416. */
  4417. int sqlite3PagerRefcount(Pager *pPager){
  4418. return pPager->nRef;
  4419. }
  4420. #ifdef SQLITE_TEST
  4421. /*
  4422. ** This routine is used for testing and analysis only.
  4423. */
  4424. int *sqlite3PagerStats(Pager *pPager){
  4425. static int a[11];
  4426. a[0] = pPager->nRef;
  4427. a[1] = pPager->nPage;
  4428. a[2] = pPager->mxPage;
  4429. a[3] = pPager->dbSize;
  4430. a[4] = pPager->state;
  4431. a[5] = pPager->errCode;
  4432. a[6] = pPager->nHit;
  4433. a[7] = pPager->nMiss;
  4434. a[8] = 0; /* Used to be pPager->nOvfl */
  4435. a[9] = pPager->nRead;
  4436. a[10] = pPager->nWrite;
  4437. return a;
  4438. }
  4439. #endif
  4440. /*
  4441. ** Set the statement rollback point.
  4442. **
  4443. ** This routine should be called with the transaction journal already
  4444. ** open. A new statement journal is created that can be used to rollback
  4445. ** changes of a single SQL command within a larger transaction.
  4446. */
  4447. static int pagerStmtBegin(Pager *pPager){
  4448. int rc;
  4449. assert( !pPager->stmtInUse );
  4450. assert( pPager->state>=PAGER_SHARED );
  4451. assert( pPager->dbSize>=0 );
  4452. PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
  4453. if( MEMDB ){
  4454. pPager->stmtInUse = 1;
  4455. pPager->stmtSize = pPager->dbSize;
  4456. return SQLITE_OK;
  4457. }
  4458. if( !pPager->journalOpen ){
  4459. pPager->stmtAutoopen = 1;
  4460. return SQLITE_OK;
  4461. }
  4462. assert( pPager->journalOpen );
  4463. pagerLeave(pPager);
  4464. assert( pPager->aInStmt==0 );
  4465. pPager->aInStmt = sqlite3MallocZero( pPager->dbSize/8 + 1 );
  4466. pagerEnter(pPager);
  4467. if( pPager->aInStmt==0 ){
  4468. /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
  4469. return SQLITE_NOMEM;
  4470. }
  4471. #ifndef NDEBUG
  4472. rc = sqlite3OsFileSize(pPager->jfd, &pPager->stmtJSize);
  4473. if( rc ) goto stmt_begin_failed;
  4474. assert( pPager->stmtJSize == pPager->journalOff );
  4475. #endif
  4476. pPager->stmtJSize = pPager->journalOff;
  4477. pPager->stmtSize = pPager->dbSize;
  4478. pPager->stmtHdrOff = 0;
  4479. pPager->stmtCksum = pPager->cksumInit;
  4480. if( !pPager->stmtOpen ){
  4481. rc = sqlite3PagerOpentemp(pPager->pVfs, pPager->stfd, pPager->zStmtJrnl,
  4482. SQLITE_OPEN_SUBJOURNAL);
  4483. if( rc ){
  4484. goto stmt_begin_failed;
  4485. }
  4486. pPager->stmtOpen = 1;
  4487. pPager->stmtNRec = 0;
  4488. }
  4489. pPager->stmtInUse = 1;
  4490. return SQLITE_OK;
  4491. stmt_begin_failed:
  4492. if( pPager->aInStmt ){
  4493. sqlite3_free(pPager->aInStmt);
  4494. pPager->aInStmt = 0;
  4495. }
  4496. return rc;
  4497. }
  4498. int sqlite3PagerStmtBegin(Pager *pPager){
  4499. int rc;
  4500. pagerEnter(pPager);
  4501. rc = pagerStmtBegin(pPager);
  4502. pagerLeave(pPager);
  4503. return rc;
  4504. }
  4505. /*
  4506. ** Commit a statement.
  4507. */
  4508. int sqlite3PagerStmtCommit(Pager *pPager){
  4509. pagerEnter(pPager);
  4510. if( pPager->stmtInUse ){
  4511. PgHdr *pPg, *pNext;
  4512. PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
  4513. if( !MEMDB ){
  4514. /* sqlite3OsTruncate(pPager->stfd, 0); */
  4515. sqlite3_free( pPager->aInStmt );
  4516. pPager->aInStmt = 0;
  4517. }else{
  4518. for(pPg=pPager->pStmt; pPg; pPg=pNext){
  4519. PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
  4520. pNext = pHist->pNextStmt;
  4521. assert( pHist->inStmt );
  4522. pHist->inStmt = 0;
  4523. pHist->pPrevStmt = pHist->pNextStmt = 0;
  4524. sqlite3_free(pHist->pStmt);
  4525. pHist->pStmt = 0;
  4526. }
  4527. }
  4528. pPager->stmtNRec = 0;
  4529. pPager->stmtInUse = 0;
  4530. pPager->pStmt = 0;
  4531. }
  4532. pPager->stmtAutoopen = 0;
  4533. pagerLeave(pPager);
  4534. return SQLITE_OK;
  4535. }
  4536. /*
  4537. ** Rollback a statement.
  4538. */
  4539. int sqlite3PagerStmtRollback(Pager *pPager){
  4540. int rc;
  4541. pagerEnter(pPager);
  4542. if( pPager->stmtInUse ){
  4543. PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
  4544. if( MEMDB ){
  4545. PgHdr *pPg;
  4546. PgHistory *pHist;
  4547. for(pPg=pPager->pStmt; pPg; pPg=pHist->pNextStmt){
  4548. pHist = PGHDR_TO_HIST(pPg, pPager);
  4549. if( pHist->pStmt ){
  4550. memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
  4551. sqlite3_free(pHist->pStmt);
  4552. pHist->pStmt = 0;
  4553. }
  4554. }
  4555. pPager->dbSize = pPager->stmtSize;
  4556. pager_truncate_cache(pPager);
  4557. rc = SQLITE_OK;
  4558. }else{
  4559. rc = pager_stmt_playback(pPager);
  4560. }
  4561. sqlite3PagerStmtCommit(pPager);
  4562. }else{
  4563. rc = SQLITE_OK;
  4564. }
  4565. pPager->stmtAutoopen = 0;
  4566. pagerLeave(pPager);
  4567. return rc;
  4568. }
  4569. /*
  4570. ** Return the full pathname of the database file.
  4571. */
  4572. const char *sqlite3PagerFilename(Pager *pPager){
  4573. return pPager->zFilename;
  4574. }
  4575. /*
  4576. ** Return the VFS structure for the pager.
  4577. */
  4578. const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
  4579. return pPager->pVfs;
  4580. }
  4581. /*
  4582. ** Return the file handle for the database file associated
  4583. ** with the pager. This might return NULL if the file has
  4584. ** not yet been opened.
  4585. */
  4586. sqlite3_file *sqlite3PagerFile(Pager *pPager){
  4587. return pPager->fd;
  4588. }
  4589. /*
  4590. ** Return the directory of the database file.
  4591. */
  4592. const char *sqlite3PagerDirname(Pager *pPager){
  4593. return pPager->zDirectory;
  4594. }
  4595. /*
  4596. ** Return the full pathname of the journal file.
  4597. */
  4598. const char *sqlite3PagerJournalname(Pager *pPager){
  4599. return pPager->zJournal;
  4600. }
  4601. /*
  4602. ** Return true if fsync() calls are disabled for this pager. Return FALSE
  4603. ** if fsync()s are executed normally.
  4604. */
  4605. int sqlite3PagerNosync(Pager *pPager){
  4606. return pPager->noSync;
  4607. }
  4608. #ifdef SQLITE_HAS_CODEC
  4609. /*
  4610. ** Set the codec for this pager
  4611. */
  4612. void sqlite3PagerSetCodec(
  4613. Pager *pPager,
  4614. void *(*xCodec)(void*,void*,Pgno,int),
  4615. void *pCodecArg
  4616. ){
  4617. pPager->xCodec = xCodec;
  4618. pPager->pCodecArg = pCodecArg;
  4619. }
  4620. #endif
  4621. #ifndef SQLITE_OMIT_AUTOVACUUM
  4622. /*
  4623. ** Move the page pPg to location pgno in the file.
  4624. **
  4625. ** There must be no references to the page previously located at
  4626. ** pgno (which we call pPgOld) though that page is allowed to be
  4627. ** in cache. If the page previous located at pgno is not already
  4628. ** in the rollback journal, it is not put there by by this routine.
  4629. **
  4630. ** References to the page pPg remain valid. Updating any
  4631. ** meta-data associated with pPg (i.e. data stored in the nExtra bytes
  4632. ** allocated along with the page) is the responsibility of the caller.
  4633. **
  4634. ** A transaction must be active when this routine is called. It used to be
  4635. ** required that a statement transaction was not active, but this restriction
  4636. ** has been removed (CREATE INDEX needs to move a page when a statement
  4637. ** transaction is active).
  4638. */
  4639. int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno){
  4640. PgHdr *pPgOld; /* The page being overwritten. */
  4641. int h;
  4642. Pgno needSyncPgno = 0;
  4643. pagerEnter(pPager);
  4644. assert( pPg->nRef>0 );
  4645. PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n",
  4646. PAGERID(pPager), pPg->pgno, pPg->needSync, pgno);
  4647. IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
  4648. pager_get_content(pPg);
  4649. if( pPg->needSync ){
  4650. needSyncPgno = pPg->pgno;
  4651. assert( pPg->inJournal || (int)pgno>pPager->origDbSize );
  4652. assert( pPg->dirty );
  4653. assert( pPager->needSync );
  4654. }
  4655. /* Unlink pPg from its hash-chain */
  4656. unlinkHashChain(pPager, pPg);
  4657. /* If the cache contains a page with page-number pgno, remove it
  4658. ** from its hash chain. Also, if the PgHdr.needSync was set for
  4659. ** page pgno before the 'move' operation, it needs to be retained
  4660. ** for the page moved there.
  4661. */
  4662. pPg->needSync = 0;
  4663. pPgOld = pager_lookup(pPager, pgno);
  4664. if( pPgOld ){
  4665. assert( pPgOld->nRef==0 );
  4666. unlinkHashChain(pPager, pPgOld);
  4667. makeClean(pPgOld);
  4668. pPg->needSync = pPgOld->needSync;
  4669. }else{
  4670. pPg->needSync = 0;
  4671. }
  4672. if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
  4673. pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
  4674. }else{
  4675. pPg->inJournal = 0;
  4676. assert( pPg->needSync==0 || (int)pgno>pPager->origDbSize );
  4677. }
  4678. /* Change the page number for pPg and insert it into the new hash-chain. */
  4679. assert( pgno!=0 );
  4680. pPg->pgno = pgno;
  4681. h = pgno & (pPager->nHash-1);
  4682. if( pPager->aHash[h] ){
  4683. assert( pPager->aHash[h]->pPrevHash==0 );
  4684. pPager->aHash[h]->pPrevHash = pPg;
  4685. }
  4686. pPg->pNextHash = pPager->aHash[h];
  4687. pPager->aHash[h] = pPg;
  4688. pPg->pPrevHash = 0;
  4689. makeDirty(pPg);
  4690. pPager->dirtyCache = 1;
  4691. if( needSyncPgno ){
  4692. /* If needSyncPgno is non-zero, then the journal file needs to be
  4693. ** sync()ed before any data is written to database file page needSyncPgno.
  4694. ** Currently, no such page exists in the page-cache and the
  4695. ** Pager.aInJournal bit has been set. This needs to be remedied by loading
  4696. ** the page into the pager-cache and setting the PgHdr.needSync flag.
  4697. **
  4698. ** The sqlite3PagerGet() call may cause the journal to sync. So make
  4699. ** sure the Pager.needSync flag is set too.
  4700. */
  4701. int rc;
  4702. PgHdr *pPgHdr;
  4703. assert( pPager->needSync );
  4704. rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
  4705. if( rc!=SQLITE_OK ) return rc;
  4706. pPager->needSync = 1;
  4707. pPgHdr->needSync = 1;
  4708. pPgHdr->inJournal = 1;
  4709. makeDirty(pPgHdr);
  4710. sqlite3PagerUnref(pPgHdr);
  4711. }
  4712. pagerLeave(pPager);
  4713. return SQLITE_OK;
  4714. }
  4715. #endif
  4716. /*
  4717. ** Return a pointer to the data for the specified page.
  4718. */
  4719. void *sqlite3PagerGetData(DbPage *pPg){
  4720. return PGHDR_TO_DATA(pPg);
  4721. }
  4722. /*
  4723. ** Return a pointer to the Pager.nExtra bytes of "extra" space
  4724. ** allocated along with the specified page.
  4725. */
  4726. void *sqlite3PagerGetExtra(DbPage *pPg){
  4727. Pager *pPager = pPg->pPager;
  4728. return (pPager?PGHDR_TO_EXTRA(pPg, pPager):0);
  4729. }
  4730. /*
  4731. ** Get/set the locking-mode for this pager. Parameter eMode must be one
  4732. ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or
  4733. ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
  4734. ** the locking-mode is set to the value specified.
  4735. **
  4736. ** The returned value is either PAGER_LOCKINGMODE_NORMAL or
  4737. ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
  4738. ** locking-mode.
  4739. */
  4740. int sqlite3PagerLockingMode(Pager *pPager, int eMode){
  4741. assert( eMode==PAGER_LOCKINGMODE_QUERY
  4742. || eMode==PAGER_LOCKINGMODE_NORMAL
  4743. || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
  4744. assert( PAGER_LOCKINGMODE_QUERY<0 );
  4745. assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
  4746. if( eMode>=0 && !pPager->tempFile ){
  4747. pPager->exclusiveMode = eMode;
  4748. }
  4749. return (int)pPager->exclusiveMode;
  4750. }
  4751. #ifdef SQLITE_TEST
  4752. /*
  4753. ** Print a listing of all referenced pages and their ref count.
  4754. */
  4755. void sqlite3PagerRefdump(Pager *pPager){
  4756. PgHdr *pPg;
  4757. for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
  4758. if( pPg->nRef<=0 ) continue;
  4759. sqlite3DebugPrintf("PAGE %3d addr=%p nRef=%d\n",
  4760. pPg->pgno, PGHDR_TO_DATA(pPg), pPg->nRef);
  4761. }
  4762. }
  4763. #endif
  4764. #endif /* SQLITE_OMIT_DISKIO */