reiser4-for-5.16.patch 2.4 MB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979209802098120982209832098420985209862098720988209892099020991209922099320994209952099620997209982099921000210012100221003210042100521006210072100821009210102101121012210132101421015210162101721018210192102021021210222102321024210252102621027210282102921030210312103221033210342103521036210372103821039210402104121042210432104421045210462104721048210492105021051210522105321054210552105621057210582105921060210612106221063210642106521066210672106821069210702107121072210732107421075210762107721078210792108021081210822108321084210852108621087210882108921090210912109221093210942109521096210972109821099211002110121102211032110421105211062110721108211092111021111211122111321114211152111621117211182111921120211212112221123211242112521126211272112821129211302113121132211332113421135211362113721138211392114021141211422114321144211452114621147211482114921150211512115221153211542115521156211572115821159211602116121162211632116421165211662116721168211692117021171211722117321174211752117621177211782117921180211812118221183211842118521186211872118821189211902119121192211932119421195211962119721198211992120021201212022120321204212052120621207212082120921210212112121221213212142121521216212172121821219212202122121222212232122421225212262122721228212292123021231212322123321234212352123621237212382123921240212412124221243212442124521246212472124821249212502125121252212532125421255212562125721258212592126021261212622126321264212652126621267212682126921270212712127221273212742127521276212772127821279212802128121282212832128421285212862128721288212892129021291212922129321294212952129621297212982129921300213012130221303213042130521306213072130821309213102131121312213132131421315213162131721318213192132021321213222132321324213252132621327213282132921330213312133221333213342133521336213372133821339213402134121342213432134421345213462134721348213492135021351213522135321354213552135621357213582135921360213612136221363213642136521366213672136821369213702137121372213732137421375213762137721378213792138021381213822138321384213852138621387213882138921390213912139221393213942139521396213972139821399214002140121402214032140421405214062140721408214092141021411214122141321414214152141621417214182141921420214212142221423214242142521426214272142821429214302143121432214332143421435214362143721438214392144021441214422144321444214452144621447214482144921450214512145221453214542145521456214572145821459214602146121462214632146421465214662146721468214692147021471214722147321474214752147621477214782147921480214812148221483214842148521486214872148821489214902149121492214932149421495214962149721498214992150021501215022150321504215052150621507215082150921510215112151221513215142151521516215172151821519215202152121522215232152421525215262152721528215292153021531215322153321534215352153621537215382153921540215412154221543215442154521546215472154821549215502155121552215532155421555215562155721558215592156021561215622156321564215652156621567215682156921570215712157221573215742157521576215772157821579215802158121582215832158421585215862158721588215892159021591215922159321594215952159621597215982159921600216012160221603216042160521606216072160821609216102161121612216132161421615216162161721618216192162021621216222162321624216252162621627216282162921630216312163221633216342163521636216372163821639216402164121642216432164421645216462164721648216492165021651216522165321654216552165621657216582165921660216612166221663216642166521666216672166821669216702167121672216732167421675216762167721678216792168021681216822168321684216852168621687216882168921690216912169221693216942169521696216972169821699217002170121702217032170421705217062170721708217092171021711217122171321714217152171621717217182171921720217212172221723217242172521726217272172821729217302173121732217332173421735217362173721738217392174021741217422174321744217452174621747217482174921750217512175221753217542175521756217572175821759217602176121762217632176421765217662176721768217692177021771217722177321774217752177621777217782177921780217812178221783217842178521786217872178821789217902179121792217932179421795217962179721798217992180021801218022180321804218052180621807218082180921810218112181221813218142181521816218172181821819218202182121822218232182421825218262182721828218292183021831218322183321834218352183621837218382183921840218412184221843218442184521846218472184821849218502185121852218532185421855218562185721858218592186021861218622186321864218652186621867218682186921870218712187221873218742187521876218772187821879218802188121882218832188421885218862188721888218892189021891218922189321894218952189621897218982189921900219012190221903219042190521906219072190821909219102191121912219132191421915219162191721918219192192021921219222192321924219252192621927219282192921930219312193221933219342193521936219372193821939219402194121942219432194421945219462194721948219492195021951219522195321954219552195621957219582195921960219612196221963219642196521966219672196821969219702197121972219732197421975219762197721978219792198021981219822198321984219852198621987219882198921990219912199221993219942199521996219972199821999220002200122002220032200422005220062200722008220092201022011220122201322014220152201622017220182201922020220212202222023220242202522026220272202822029220302203122032220332203422035220362203722038220392204022041220422204322044220452204622047220482204922050220512205222053220542205522056220572205822059220602206122062220632206422065220662206722068220692207022071220722207322074220752207622077220782207922080220812208222083220842208522086220872208822089220902209122092220932209422095220962209722098220992210022101221022210322104221052210622107221082210922110221112211222113221142211522116221172211822119221202212122122221232212422125221262212722128221292213022131221322213322134221352213622137221382213922140221412214222143221442214522146221472214822149221502215122152221532215422155221562215722158221592216022161221622216322164221652216622167221682216922170221712217222173221742217522176221772217822179221802218122182221832218422185221862218722188221892219022191221922219322194221952219622197221982219922200222012220222203222042220522206222072220822209222102221122212222132221422215222162221722218222192222022221222222222322224222252222622227222282222922230222312223222233222342223522236222372223822239222402224122242222432224422245222462224722248222492225022251222522225322254222552225622257222582225922260222612226222263222642226522266222672226822269222702227122272222732227422275222762227722278222792228022281222822228322284222852228622287222882228922290222912229222293222942229522296222972229822299223002230122302223032230422305223062230722308223092231022311223122231322314223152231622317223182231922320223212232222323223242232522326223272232822329223302233122332223332233422335223362233722338223392234022341223422234322344223452234622347223482234922350223512235222353223542235522356223572235822359223602236122362223632236422365223662236722368223692237022371223722237322374223752237622377223782237922380223812238222383223842238522386223872238822389223902239122392223932239422395223962239722398223992240022401224022240322404224052240622407224082240922410224112241222413224142241522416224172241822419224202242122422224232242422425224262242722428224292243022431224322243322434224352243622437224382243922440224412244222443224442244522446224472244822449224502245122452224532245422455224562245722458224592246022461224622246322464224652246622467224682246922470224712247222473224742247522476224772247822479224802248122482224832248422485224862248722488224892249022491224922249322494224952249622497224982249922500225012250222503225042250522506225072250822509225102251122512225132251422515225162251722518225192252022521225222252322524225252252622527225282252922530225312253222533225342253522536225372253822539225402254122542225432254422545225462254722548225492255022551225522255322554225552255622557225582255922560225612256222563225642256522566225672256822569225702257122572225732257422575225762257722578225792258022581225822258322584225852258622587225882258922590225912259222593225942259522596225972259822599226002260122602226032260422605226062260722608226092261022611226122261322614226152261622617226182261922620226212262222623226242262522626226272262822629226302263122632226332263422635226362263722638226392264022641226422264322644226452264622647226482264922650226512265222653226542265522656226572265822659226602266122662226632266422665226662266722668226692267022671226722267322674226752267622677226782267922680226812268222683226842268522686226872268822689226902269122692226932269422695226962269722698226992270022701227022270322704227052270622707227082270922710227112271222713227142271522716227172271822719227202272122722227232272422725227262272722728227292273022731227322273322734227352273622737227382273922740227412274222743227442274522746227472274822749227502275122752227532275422755227562275722758227592276022761227622276322764227652276622767227682276922770227712277222773227742277522776227772277822779227802278122782227832278422785227862278722788227892279022791227922279322794227952279622797227982279922800228012280222803228042280522806228072280822809228102281122812228132281422815228162281722818228192282022821228222282322824228252282622827228282282922830228312283222833228342283522836228372283822839228402284122842228432284422845228462284722848228492285022851228522285322854228552285622857228582285922860228612286222863228642286522866228672286822869228702287122872228732287422875228762287722878228792288022881228822288322884228852288622887228882288922890228912289222893228942289522896228972289822899229002290122902229032290422905229062290722908229092291022911229122291322914229152291622917229182291922920229212292222923229242292522926229272292822929229302293122932229332293422935229362293722938229392294022941229422294322944229452294622947229482294922950229512295222953229542295522956229572295822959229602296122962229632296422965229662296722968229692297022971229722297322974229752297622977229782297922980229812298222983229842298522986229872298822989229902299122992229932299422995229962299722998229992300023001230022300323004230052300623007230082300923010230112301223013230142301523016230172301823019230202302123022230232302423025230262302723028230292303023031230322303323034230352303623037230382303923040230412304223043230442304523046230472304823049230502305123052230532305423055230562305723058230592306023061230622306323064230652306623067230682306923070230712307223073230742307523076230772307823079230802308123082230832308423085230862308723088230892309023091230922309323094230952309623097230982309923100231012310223103231042310523106231072310823109231102311123112231132311423115231162311723118231192312023121231222312323124231252312623127231282312923130231312313223133231342313523136231372313823139231402314123142231432314423145231462314723148231492315023151231522315323154231552315623157231582315923160231612316223163231642316523166231672316823169231702317123172231732317423175231762317723178231792318023181231822318323184231852318623187231882318923190231912319223193231942319523196231972319823199232002320123202232032320423205232062320723208232092321023211232122321323214232152321623217232182321923220232212322223223232242322523226232272322823229232302323123232232332323423235232362323723238232392324023241232422324323244232452324623247232482324923250232512325223253232542325523256232572325823259232602326123262232632326423265232662326723268232692327023271232722327323274232752327623277232782327923280232812328223283232842328523286232872328823289232902329123292232932329423295232962329723298232992330023301233022330323304233052330623307233082330923310233112331223313233142331523316233172331823319233202332123322233232332423325233262332723328233292333023331233322333323334233352333623337233382333923340233412334223343233442334523346233472334823349233502335123352233532335423355233562335723358233592336023361233622336323364233652336623367233682336923370233712337223373233742337523376233772337823379233802338123382233832338423385233862338723388233892339023391233922339323394233952339623397233982339923400234012340223403234042340523406234072340823409234102341123412234132341423415234162341723418234192342023421234222342323424234252342623427234282342923430234312343223433234342343523436234372343823439234402344123442234432344423445234462344723448234492345023451234522345323454234552345623457234582345923460234612346223463234642346523466234672346823469234702347123472234732347423475234762347723478234792348023481234822348323484234852348623487234882348923490234912349223493234942349523496234972349823499235002350123502235032350423505235062350723508235092351023511235122351323514235152351623517235182351923520235212352223523235242352523526235272352823529235302353123532235332353423535235362353723538235392354023541235422354323544235452354623547235482354923550235512355223553235542355523556235572355823559235602356123562235632356423565235662356723568235692357023571235722357323574235752357623577235782357923580235812358223583235842358523586235872358823589235902359123592235932359423595235962359723598235992360023601236022360323604236052360623607236082360923610236112361223613236142361523616236172361823619236202362123622236232362423625236262362723628236292363023631236322363323634236352363623637236382363923640236412364223643236442364523646236472364823649236502365123652236532365423655236562365723658236592366023661236622366323664236652366623667236682366923670236712367223673236742367523676236772367823679236802368123682236832368423685236862368723688236892369023691236922369323694236952369623697236982369923700237012370223703237042370523706237072370823709237102371123712237132371423715237162371723718237192372023721237222372323724237252372623727237282372923730237312373223733237342373523736237372373823739237402374123742237432374423745237462374723748237492375023751237522375323754237552375623757237582375923760237612376223763237642376523766237672376823769237702377123772237732377423775237762377723778237792378023781237822378323784237852378623787237882378923790237912379223793237942379523796237972379823799238002380123802238032380423805238062380723808238092381023811238122381323814238152381623817238182381923820238212382223823238242382523826238272382823829238302383123832238332383423835238362383723838238392384023841238422384323844238452384623847238482384923850238512385223853238542385523856238572385823859238602386123862238632386423865238662386723868238692387023871238722387323874238752387623877238782387923880238812388223883238842388523886238872388823889238902389123892238932389423895238962389723898238992390023901239022390323904239052390623907239082390923910239112391223913239142391523916239172391823919239202392123922239232392423925239262392723928239292393023931239322393323934239352393623937239382393923940239412394223943239442394523946239472394823949239502395123952239532395423955239562395723958239592396023961239622396323964239652396623967239682396923970239712397223973239742397523976239772397823979239802398123982239832398423985239862398723988239892399023991239922399323994239952399623997239982399924000240012400224003240042400524006240072400824009240102401124012240132401424015240162401724018240192402024021240222402324024240252402624027240282402924030240312403224033240342403524036240372403824039240402404124042240432404424045240462404724048240492405024051240522405324054240552405624057240582405924060240612406224063240642406524066240672406824069240702407124072240732407424075240762407724078240792408024081240822408324084240852408624087240882408924090240912409224093240942409524096240972409824099241002410124102241032410424105241062410724108241092411024111241122411324114241152411624117241182411924120241212412224123241242412524126241272412824129241302413124132241332413424135241362413724138241392414024141241422414324144241452414624147241482414924150241512415224153241542415524156241572415824159241602416124162241632416424165241662416724168241692417024171241722417324174241752417624177241782417924180241812418224183241842418524186241872418824189241902419124192241932419424195241962419724198241992420024201242022420324204242052420624207242082420924210242112421224213242142421524216242172421824219242202422124222242232422424225242262422724228242292423024231242322423324234242352423624237242382423924240242412424224243242442424524246242472424824249242502425124252242532425424255242562425724258242592426024261242622426324264242652426624267242682426924270242712427224273242742427524276242772427824279242802428124282242832428424285242862428724288242892429024291242922429324294242952429624297242982429924300243012430224303243042430524306243072430824309243102431124312243132431424315243162431724318243192432024321243222432324324243252432624327243282432924330243312433224333243342433524336243372433824339243402434124342243432434424345243462434724348243492435024351243522435324354243552435624357243582435924360243612436224363243642436524366243672436824369243702437124372243732437424375243762437724378243792438024381243822438324384243852438624387243882438924390243912439224393243942439524396243972439824399244002440124402244032440424405244062440724408244092441024411244122441324414244152441624417244182441924420244212442224423244242442524426244272442824429244302443124432244332443424435244362443724438244392444024441244422444324444244452444624447244482444924450244512445224453244542445524456244572445824459244602446124462244632446424465244662446724468244692447024471244722447324474244752447624477244782447924480244812448224483244842448524486244872448824489244902449124492244932449424495244962449724498244992450024501245022450324504245052450624507245082450924510245112451224513245142451524516245172451824519245202452124522245232452424525245262452724528245292453024531245322453324534245352453624537245382453924540245412454224543245442454524546245472454824549245502455124552245532455424555245562455724558245592456024561245622456324564245652456624567245682456924570245712457224573245742457524576245772457824579245802458124582245832458424585245862458724588245892459024591245922459324594245952459624597245982459924600246012460224603246042460524606246072460824609246102461124612246132461424615246162461724618246192462024621246222462324624246252462624627246282462924630246312463224633246342463524636246372463824639246402464124642246432464424645246462464724648246492465024651246522465324654246552465624657246582465924660246612466224663246642466524666246672466824669246702467124672246732467424675246762467724678246792468024681246822468324684246852468624687246882468924690246912469224693246942469524696246972469824699247002470124702247032470424705247062470724708247092471024711247122471324714247152471624717247182471924720247212472224723247242472524726247272472824729247302473124732247332473424735247362473724738247392474024741247422474324744247452474624747247482474924750247512475224753247542475524756247572475824759247602476124762247632476424765247662476724768247692477024771247722477324774247752477624777247782477924780247812478224783247842478524786247872478824789247902479124792247932479424795247962479724798247992480024801248022480324804248052480624807248082480924810248112481224813248142481524816248172481824819248202482124822248232482424825248262482724828248292483024831248322483324834248352483624837248382483924840248412484224843248442484524846248472484824849248502485124852248532485424855248562485724858248592486024861248622486324864248652486624867248682486924870248712487224873248742487524876248772487824879248802488124882248832488424885248862488724888248892489024891248922489324894248952489624897248982489924900249012490224903249042490524906249072490824909249102491124912249132491424915249162491724918249192492024921249222492324924249252492624927249282492924930249312493224933249342493524936249372493824939249402494124942249432494424945249462494724948249492495024951249522495324954249552495624957249582495924960249612496224963249642496524966249672496824969249702497124972249732497424975249762497724978249792498024981249822498324984249852498624987249882498924990249912499224993249942499524996249972499824999250002500125002250032500425005250062500725008250092501025011250122501325014250152501625017250182501925020250212502225023250242502525026250272502825029250302503125032250332503425035250362503725038250392504025041250422504325044250452504625047250482504925050250512505225053250542505525056250572505825059250602506125062250632506425065250662506725068250692507025071250722507325074250752507625077250782507925080250812508225083250842508525086250872508825089250902509125092250932509425095250962509725098250992510025101251022510325104251052510625107251082510925110251112511225113251142511525116251172511825119251202512125122251232512425125251262512725128251292513025131251322513325134251352513625137251382513925140251412514225143251442514525146251472514825149251502515125152251532515425155251562515725158251592516025161251622516325164251652516625167251682516925170251712517225173251742517525176251772517825179251802518125182251832518425185251862518725188251892519025191251922519325194251952519625197251982519925200252012520225203252042520525206252072520825209252102521125212252132521425215252162521725218252192522025221252222522325224252252522625227252282522925230252312523225233252342523525236252372523825239252402524125242252432524425245252462524725248252492525025251252522525325254252552525625257252582525925260252612526225263252642526525266252672526825269252702527125272252732527425275252762527725278252792528025281252822528325284252852528625287252882528925290252912529225293252942529525296252972529825299253002530125302253032530425305253062530725308253092531025311253122531325314253152531625317253182531925320253212532225323253242532525326253272532825329253302533125332253332533425335253362533725338253392534025341253422534325344253452534625347253482534925350253512535225353253542535525356253572535825359253602536125362253632536425365253662536725368253692537025371253722537325374253752537625377253782537925380253812538225383253842538525386253872538825389253902539125392253932539425395253962539725398253992540025401254022540325404254052540625407254082540925410254112541225413254142541525416254172541825419254202542125422254232542425425254262542725428254292543025431254322543325434254352543625437254382543925440254412544225443254442544525446254472544825449254502545125452254532545425455254562545725458254592546025461254622546325464254652546625467254682546925470254712547225473254742547525476254772547825479254802548125482254832548425485254862548725488254892549025491254922549325494254952549625497254982549925500255012550225503255042550525506255072550825509255102551125512255132551425515255162551725518255192552025521255222552325524255252552625527255282552925530255312553225533255342553525536255372553825539255402554125542255432554425545255462554725548255492555025551255522555325554255552555625557255582555925560255612556225563255642556525566255672556825569255702557125572255732557425575255762557725578255792558025581255822558325584255852558625587255882558925590255912559225593255942559525596255972559825599256002560125602256032560425605256062560725608256092561025611256122561325614256152561625617256182561925620256212562225623256242562525626256272562825629256302563125632256332563425635256362563725638256392564025641256422564325644256452564625647256482564925650256512565225653256542565525656256572565825659256602566125662256632566425665256662566725668256692567025671256722567325674256752567625677256782567925680256812568225683256842568525686256872568825689256902569125692256932569425695256962569725698256992570025701257022570325704257052570625707257082570925710257112571225713257142571525716257172571825719257202572125722257232572425725257262572725728257292573025731257322573325734257352573625737257382573925740257412574225743257442574525746257472574825749257502575125752257532575425755257562575725758257592576025761257622576325764257652576625767257682576925770257712577225773257742577525776257772577825779257802578125782257832578425785257862578725788257892579025791257922579325794257952579625797257982579925800258012580225803258042580525806258072580825809258102581125812258132581425815258162581725818258192582025821258222582325824258252582625827258282582925830258312583225833258342583525836258372583825839258402584125842258432584425845258462584725848258492585025851258522585325854258552585625857258582585925860258612586225863258642586525866258672586825869258702587125872258732587425875258762587725878258792588025881258822588325884258852588625887258882588925890258912589225893258942589525896258972589825899259002590125902259032590425905259062590725908259092591025911259122591325914259152591625917259182591925920259212592225923259242592525926259272592825929259302593125932259332593425935259362593725938259392594025941259422594325944259452594625947259482594925950259512595225953259542595525956259572595825959259602596125962259632596425965259662596725968259692597025971259722597325974259752597625977259782597925980259812598225983259842598525986259872598825989259902599125992259932599425995259962599725998259992600026001260022600326004260052600626007260082600926010260112601226013260142601526016260172601826019260202602126022260232602426025260262602726028260292603026031260322603326034260352603626037260382603926040260412604226043260442604526046260472604826049260502605126052260532605426055260562605726058260592606026061260622606326064260652606626067260682606926070260712607226073260742607526076260772607826079260802608126082260832608426085260862608726088260892609026091260922609326094260952609626097260982609926100261012610226103261042610526106261072610826109261102611126112261132611426115261162611726118261192612026121261222612326124261252612626127261282612926130261312613226133261342613526136261372613826139261402614126142261432614426145261462614726148261492615026151261522615326154261552615626157261582615926160261612616226163261642616526166261672616826169261702617126172261732617426175261762617726178261792618026181261822618326184261852618626187261882618926190261912619226193261942619526196261972619826199262002620126202262032620426205262062620726208262092621026211262122621326214262152621626217262182621926220262212622226223262242622526226262272622826229262302623126232262332623426235262362623726238262392624026241262422624326244262452624626247262482624926250262512625226253262542625526256262572625826259262602626126262262632626426265262662626726268262692627026271262722627326274262752627626277262782627926280262812628226283262842628526286262872628826289262902629126292262932629426295262962629726298262992630026301263022630326304263052630626307263082630926310263112631226313263142631526316263172631826319263202632126322263232632426325263262632726328263292633026331263322633326334263352633626337263382633926340263412634226343263442634526346263472634826349263502635126352263532635426355263562635726358263592636026361263622636326364263652636626367263682636926370263712637226373263742637526376263772637826379263802638126382263832638426385263862638726388263892639026391263922639326394263952639626397263982639926400264012640226403264042640526406264072640826409264102641126412264132641426415264162641726418264192642026421264222642326424264252642626427264282642926430264312643226433264342643526436264372643826439264402644126442264432644426445264462644726448264492645026451264522645326454264552645626457264582645926460264612646226463264642646526466264672646826469264702647126472264732647426475264762647726478264792648026481264822648326484264852648626487264882648926490264912649226493264942649526496264972649826499265002650126502265032650426505265062650726508265092651026511265122651326514265152651626517265182651926520265212652226523265242652526526265272652826529265302653126532265332653426535265362653726538265392654026541265422654326544265452654626547265482654926550265512655226553265542655526556265572655826559265602656126562265632656426565265662656726568265692657026571265722657326574265752657626577265782657926580265812658226583265842658526586265872658826589265902659126592265932659426595265962659726598265992660026601266022660326604266052660626607266082660926610266112661226613266142661526616266172661826619266202662126622266232662426625266262662726628266292663026631266322663326634266352663626637266382663926640266412664226643266442664526646266472664826649266502665126652266532665426655266562665726658266592666026661266622666326664266652666626667266682666926670266712667226673266742667526676266772667826679266802668126682266832668426685266862668726688266892669026691266922669326694266952669626697266982669926700267012670226703267042670526706267072670826709267102671126712267132671426715267162671726718267192672026721267222672326724267252672626727267282672926730267312673226733267342673526736267372673826739267402674126742267432674426745267462674726748267492675026751267522675326754267552675626757267582675926760267612676226763267642676526766267672676826769267702677126772267732677426775267762677726778267792678026781267822678326784267852678626787267882678926790267912679226793267942679526796267972679826799268002680126802268032680426805268062680726808268092681026811268122681326814268152681626817268182681926820268212682226823268242682526826268272682826829268302683126832268332683426835268362683726838268392684026841268422684326844268452684626847268482684926850268512685226853268542685526856268572685826859268602686126862268632686426865268662686726868268692687026871268722687326874268752687626877268782687926880268812688226883268842688526886268872688826889268902689126892268932689426895268962689726898268992690026901269022690326904269052690626907269082690926910269112691226913269142691526916269172691826919269202692126922269232692426925269262692726928269292693026931269322693326934269352693626937269382693926940269412694226943269442694526946269472694826949269502695126952269532695426955269562695726958269592696026961269622696326964269652696626967269682696926970269712697226973269742697526976269772697826979269802698126982269832698426985269862698726988269892699026991269922699326994269952699626997269982699927000270012700227003270042700527006270072700827009270102701127012270132701427015270162701727018270192702027021270222702327024270252702627027270282702927030270312703227033270342703527036270372703827039270402704127042270432704427045270462704727048270492705027051270522705327054270552705627057270582705927060270612706227063270642706527066270672706827069270702707127072270732707427075270762707727078270792708027081270822708327084270852708627087270882708927090270912709227093270942709527096270972709827099271002710127102271032710427105271062710727108271092711027111271122711327114271152711627117271182711927120271212712227123271242712527126271272712827129271302713127132271332713427135271362713727138271392714027141271422714327144271452714627147271482714927150271512715227153271542715527156271572715827159271602716127162271632716427165271662716727168271692717027171271722717327174271752717627177271782717927180271812718227183271842718527186271872718827189271902719127192271932719427195271962719727198271992720027201272022720327204272052720627207272082720927210272112721227213272142721527216272172721827219272202722127222272232722427225272262722727228272292723027231272322723327234272352723627237272382723927240272412724227243272442724527246272472724827249272502725127252272532725427255272562725727258272592726027261272622726327264272652726627267272682726927270272712727227273272742727527276272772727827279272802728127282272832728427285272862728727288272892729027291272922729327294272952729627297272982729927300273012730227303273042730527306273072730827309273102731127312273132731427315273162731727318273192732027321273222732327324273252732627327273282732927330273312733227333273342733527336273372733827339273402734127342273432734427345273462734727348273492735027351273522735327354273552735627357273582735927360273612736227363273642736527366273672736827369273702737127372273732737427375273762737727378273792738027381273822738327384273852738627387273882738927390273912739227393273942739527396273972739827399274002740127402274032740427405274062740727408274092741027411274122741327414274152741627417274182741927420274212742227423274242742527426274272742827429274302743127432274332743427435274362743727438274392744027441274422744327444274452744627447274482744927450274512745227453274542745527456274572745827459274602746127462274632746427465274662746727468274692747027471274722747327474274752747627477274782747927480274812748227483274842748527486274872748827489274902749127492274932749427495274962749727498274992750027501275022750327504275052750627507275082750927510275112751227513275142751527516275172751827519275202752127522275232752427525275262752727528275292753027531275322753327534275352753627537275382753927540275412754227543275442754527546275472754827549275502755127552275532755427555275562755727558275592756027561275622756327564275652756627567275682756927570275712757227573275742757527576275772757827579275802758127582275832758427585275862758727588275892759027591275922759327594275952759627597275982759927600276012760227603276042760527606276072760827609276102761127612276132761427615276162761727618276192762027621276222762327624276252762627627276282762927630276312763227633276342763527636276372763827639276402764127642276432764427645276462764727648276492765027651276522765327654276552765627657276582765927660276612766227663276642766527666276672766827669276702767127672276732767427675276762767727678276792768027681276822768327684276852768627687276882768927690276912769227693276942769527696276972769827699277002770127702277032770427705277062770727708277092771027711277122771327714277152771627717277182771927720277212772227723277242772527726277272772827729277302773127732277332773427735277362773727738277392774027741277422774327744277452774627747277482774927750277512775227753277542775527756277572775827759277602776127762277632776427765277662776727768277692777027771277722777327774277752777627777277782777927780277812778227783277842778527786277872778827789277902779127792277932779427795277962779727798277992780027801278022780327804278052780627807278082780927810278112781227813278142781527816278172781827819278202782127822278232782427825278262782727828278292783027831278322783327834278352783627837278382783927840278412784227843278442784527846278472784827849278502785127852278532785427855278562785727858278592786027861278622786327864278652786627867278682786927870278712787227873278742787527876278772787827879278802788127882278832788427885278862788727888278892789027891278922789327894278952789627897278982789927900279012790227903279042790527906279072790827909279102791127912279132791427915279162791727918279192792027921279222792327924279252792627927279282792927930279312793227933279342793527936279372793827939279402794127942279432794427945279462794727948279492795027951279522795327954279552795627957279582795927960279612796227963279642796527966279672796827969279702797127972279732797427975279762797727978279792798027981279822798327984279852798627987279882798927990279912799227993279942799527996279972799827999280002800128002280032800428005280062800728008280092801028011280122801328014280152801628017280182801928020280212802228023280242802528026280272802828029280302803128032280332803428035280362803728038280392804028041280422804328044280452804628047280482804928050280512805228053280542805528056280572805828059280602806128062280632806428065280662806728068280692807028071280722807328074280752807628077280782807928080280812808228083280842808528086280872808828089280902809128092280932809428095280962809728098280992810028101281022810328104281052810628107281082810928110281112811228113281142811528116281172811828119281202812128122281232812428125281262812728128281292813028131281322813328134281352813628137281382813928140281412814228143281442814528146281472814828149281502815128152281532815428155281562815728158281592816028161281622816328164281652816628167281682816928170281712817228173281742817528176281772817828179281802818128182281832818428185281862818728188281892819028191281922819328194281952819628197281982819928200282012820228203282042820528206282072820828209282102821128212282132821428215282162821728218282192822028221282222822328224282252822628227282282822928230282312823228233282342823528236282372823828239282402824128242282432824428245282462824728248282492825028251282522825328254282552825628257282582825928260282612826228263282642826528266282672826828269282702827128272282732827428275282762827728278282792828028281282822828328284282852828628287282882828928290282912829228293282942829528296282972829828299283002830128302283032830428305283062830728308283092831028311283122831328314283152831628317283182831928320283212832228323283242832528326283272832828329283302833128332283332833428335283362833728338283392834028341283422834328344283452834628347283482834928350283512835228353283542835528356283572835828359283602836128362283632836428365283662836728368283692837028371283722837328374283752837628377283782837928380283812838228383283842838528386283872838828389283902839128392283932839428395283962839728398283992840028401284022840328404284052840628407284082840928410284112841228413284142841528416284172841828419284202842128422284232842428425284262842728428284292843028431284322843328434284352843628437284382843928440284412844228443284442844528446284472844828449284502845128452284532845428455284562845728458284592846028461284622846328464284652846628467284682846928470284712847228473284742847528476284772847828479284802848128482284832848428485284862848728488284892849028491284922849328494284952849628497284982849928500285012850228503285042850528506285072850828509285102851128512285132851428515285162851728518285192852028521285222852328524285252852628527285282852928530285312853228533285342853528536285372853828539285402854128542285432854428545285462854728548285492855028551285522855328554285552855628557285582855928560285612856228563285642856528566285672856828569285702857128572285732857428575285762857728578285792858028581285822858328584285852858628587285882858928590285912859228593285942859528596285972859828599286002860128602286032860428605286062860728608286092861028611286122861328614286152861628617286182861928620286212862228623286242862528626286272862828629286302863128632286332863428635286362863728638286392864028641286422864328644286452864628647286482864928650286512865228653286542865528656286572865828659286602866128662286632866428665286662866728668286692867028671286722867328674286752867628677286782867928680286812868228683286842868528686286872868828689286902869128692286932869428695286962869728698286992870028701287022870328704287052870628707287082870928710287112871228713287142871528716287172871828719287202872128722287232872428725287262872728728287292873028731287322873328734287352873628737287382873928740287412874228743287442874528746287472874828749287502875128752287532875428755287562875728758287592876028761287622876328764287652876628767287682876928770287712877228773287742877528776287772877828779287802878128782287832878428785287862878728788287892879028791287922879328794287952879628797287982879928800288012880228803288042880528806288072880828809288102881128812288132881428815288162881728818288192882028821288222882328824288252882628827288282882928830288312883228833288342883528836288372883828839288402884128842288432884428845288462884728848288492885028851288522885328854288552885628857288582885928860288612886228863288642886528866288672886828869288702887128872288732887428875288762887728878288792888028881288822888328884288852888628887288882888928890288912889228893288942889528896288972889828899289002890128902289032890428905289062890728908289092891028911289122891328914289152891628917289182891928920289212892228923289242892528926289272892828929289302893128932289332893428935289362893728938289392894028941289422894328944289452894628947289482894928950289512895228953289542895528956289572895828959289602896128962289632896428965289662896728968289692897028971289722897328974289752897628977289782897928980289812898228983289842898528986289872898828989289902899128992289932899428995289962899728998289992900029001290022900329004290052900629007290082900929010290112901229013290142901529016290172901829019290202902129022290232902429025290262902729028290292903029031290322903329034290352903629037290382903929040290412904229043290442904529046290472904829049290502905129052290532905429055290562905729058290592906029061290622906329064290652906629067290682906929070290712907229073290742907529076290772907829079290802908129082290832908429085290862908729088290892909029091290922909329094290952909629097290982909929100291012910229103291042910529106291072910829109291102911129112291132911429115291162911729118291192912029121291222912329124291252912629127291282912929130291312913229133291342913529136291372913829139291402914129142291432914429145291462914729148291492915029151291522915329154291552915629157291582915929160291612916229163291642916529166291672916829169291702917129172291732917429175291762917729178291792918029181291822918329184291852918629187291882918929190291912919229193291942919529196291972919829199292002920129202292032920429205292062920729208292092921029211292122921329214292152921629217292182921929220292212922229223292242922529226292272922829229292302923129232292332923429235292362923729238292392924029241292422924329244292452924629247292482924929250292512925229253292542925529256292572925829259292602926129262292632926429265292662926729268292692927029271292722927329274292752927629277292782927929280292812928229283292842928529286292872928829289292902929129292292932929429295292962929729298292992930029301293022930329304293052930629307293082930929310293112931229313293142931529316293172931829319293202932129322293232932429325293262932729328293292933029331293322933329334293352933629337293382933929340293412934229343293442934529346293472934829349293502935129352293532935429355293562935729358293592936029361293622936329364293652936629367293682936929370293712937229373293742937529376293772937829379293802938129382293832938429385293862938729388293892939029391293922939329394293952939629397293982939929400294012940229403294042940529406294072940829409294102941129412294132941429415294162941729418294192942029421294222942329424294252942629427294282942929430294312943229433294342943529436294372943829439294402944129442294432944429445294462944729448294492945029451294522945329454294552945629457294582945929460294612946229463294642946529466294672946829469294702947129472294732947429475294762947729478294792948029481294822948329484294852948629487294882948929490294912949229493294942949529496294972949829499295002950129502295032950429505295062950729508295092951029511295122951329514295152951629517295182951929520295212952229523295242952529526295272952829529295302953129532295332953429535295362953729538295392954029541295422954329544295452954629547295482954929550295512955229553295542955529556295572955829559295602956129562295632956429565295662956729568295692957029571295722957329574295752957629577295782957929580295812958229583295842958529586295872958829589295902959129592295932959429595295962959729598295992960029601296022960329604296052960629607296082960929610296112961229613296142961529616296172961829619296202962129622296232962429625296262962729628296292963029631296322963329634296352963629637296382963929640296412964229643296442964529646296472964829649296502965129652296532965429655296562965729658296592966029661296622966329664296652966629667296682966929670296712967229673296742967529676296772967829679296802968129682296832968429685296862968729688296892969029691296922969329694296952969629697296982969929700297012970229703297042970529706297072970829709297102971129712297132971429715297162971729718297192972029721297222972329724297252972629727297282972929730297312973229733297342973529736297372973829739297402974129742297432974429745297462974729748297492975029751297522975329754297552975629757297582975929760297612976229763297642976529766297672976829769297702977129772297732977429775297762977729778297792978029781297822978329784297852978629787297882978929790297912979229793297942979529796297972979829799298002980129802298032980429805298062980729808298092981029811298122981329814298152981629817298182981929820298212982229823298242982529826298272982829829298302983129832298332983429835298362983729838298392984029841298422984329844298452984629847298482984929850298512985229853298542985529856298572985829859298602986129862298632986429865298662986729868298692987029871298722987329874298752987629877298782987929880298812988229883298842988529886298872988829889298902989129892298932989429895298962989729898298992990029901299022990329904299052990629907299082990929910299112991229913299142991529916299172991829919299202992129922299232992429925299262992729928299292993029931299322993329934299352993629937299382993929940299412994229943299442994529946299472994829949299502995129952299532995429955299562995729958299592996029961299622996329964299652996629967299682996929970299712997229973299742997529976299772997829979299802998129982299832998429985299862998729988299892999029991299922999329994299952999629997299982999930000300013000230003300043000530006300073000830009300103001130012300133001430015300163001730018300193002030021300223002330024300253002630027300283002930030300313003230033300343003530036300373003830039300403004130042300433004430045300463004730048300493005030051300523005330054300553005630057300583005930060300613006230063300643006530066300673006830069300703007130072300733007430075300763007730078300793008030081300823008330084300853008630087300883008930090300913009230093300943009530096300973009830099301003010130102301033010430105301063010730108301093011030111301123011330114301153011630117301183011930120301213012230123301243012530126301273012830129301303013130132301333013430135301363013730138301393014030141301423014330144301453014630147301483014930150301513015230153301543015530156301573015830159301603016130162301633016430165301663016730168301693017030171301723017330174301753017630177301783017930180301813018230183301843018530186301873018830189301903019130192301933019430195301963019730198301993020030201302023020330204302053020630207302083020930210302113021230213302143021530216302173021830219302203022130222302233022430225302263022730228302293023030231302323023330234302353023630237302383023930240302413024230243302443024530246302473024830249302503025130252302533025430255302563025730258302593026030261302623026330264302653026630267302683026930270302713027230273302743027530276302773027830279302803028130282302833028430285302863028730288302893029030291302923029330294302953029630297302983029930300303013030230303303043030530306303073030830309303103031130312303133031430315303163031730318303193032030321303223032330324303253032630327303283032930330303313033230333303343033530336303373033830339303403034130342303433034430345303463034730348303493035030351303523035330354303553035630357303583035930360303613036230363303643036530366303673036830369303703037130372303733037430375303763037730378303793038030381303823038330384303853038630387303883038930390303913039230393303943039530396303973039830399304003040130402304033040430405304063040730408304093041030411304123041330414304153041630417304183041930420304213042230423304243042530426304273042830429304303043130432304333043430435304363043730438304393044030441304423044330444304453044630447304483044930450304513045230453304543045530456304573045830459304603046130462304633046430465304663046730468304693047030471304723047330474304753047630477304783047930480304813048230483304843048530486304873048830489304903049130492304933049430495304963049730498304993050030501305023050330504305053050630507305083050930510305113051230513305143051530516305173051830519305203052130522305233052430525305263052730528305293053030531305323053330534305353053630537305383053930540305413054230543305443054530546305473054830549305503055130552305533055430555305563055730558305593056030561305623056330564305653056630567305683056930570305713057230573305743057530576305773057830579305803058130582305833058430585305863058730588305893059030591305923059330594305953059630597305983059930600306013060230603306043060530606306073060830609306103061130612306133061430615306163061730618306193062030621306223062330624306253062630627306283062930630306313063230633306343063530636306373063830639306403064130642306433064430645306463064730648306493065030651306523065330654306553065630657306583065930660306613066230663306643066530666306673066830669306703067130672306733067430675306763067730678306793068030681306823068330684306853068630687306883068930690306913069230693306943069530696306973069830699307003070130702307033070430705307063070730708307093071030711307123071330714307153071630717307183071930720307213072230723307243072530726307273072830729307303073130732307333073430735307363073730738307393074030741307423074330744307453074630747307483074930750307513075230753307543075530756307573075830759307603076130762307633076430765307663076730768307693077030771307723077330774307753077630777307783077930780307813078230783307843078530786307873078830789307903079130792307933079430795307963079730798307993080030801308023080330804308053080630807308083080930810308113081230813308143081530816308173081830819308203082130822308233082430825308263082730828308293083030831308323083330834308353083630837308383083930840308413084230843308443084530846308473084830849308503085130852308533085430855308563085730858308593086030861308623086330864308653086630867308683086930870308713087230873308743087530876308773087830879308803088130882308833088430885308863088730888308893089030891308923089330894308953089630897308983089930900309013090230903309043090530906309073090830909309103091130912309133091430915309163091730918309193092030921309223092330924309253092630927309283092930930309313093230933309343093530936309373093830939309403094130942309433094430945309463094730948309493095030951309523095330954309553095630957309583095930960309613096230963309643096530966309673096830969309703097130972309733097430975309763097730978309793098030981309823098330984309853098630987309883098930990309913099230993309943099530996309973099830999310003100131002310033100431005310063100731008310093101031011310123101331014310153101631017310183101931020310213102231023310243102531026310273102831029310303103131032310333103431035310363103731038310393104031041310423104331044310453104631047310483104931050310513105231053310543105531056310573105831059310603106131062310633106431065310663106731068310693107031071310723107331074310753107631077310783107931080310813108231083310843108531086310873108831089310903109131092310933109431095310963109731098310993110031101311023110331104311053110631107311083110931110311113111231113311143111531116311173111831119311203112131122311233112431125311263112731128311293113031131311323113331134311353113631137311383113931140311413114231143311443114531146311473114831149311503115131152311533115431155311563115731158311593116031161311623116331164311653116631167311683116931170311713117231173311743117531176311773117831179311803118131182311833118431185311863118731188311893119031191311923119331194311953119631197311983119931200312013120231203312043120531206312073120831209312103121131212312133121431215312163121731218312193122031221312223122331224312253122631227312283122931230312313123231233312343123531236312373123831239312403124131242312433124431245312463124731248312493125031251312523125331254312553125631257312583125931260312613126231263312643126531266312673126831269312703127131272312733127431275312763127731278312793128031281312823128331284312853128631287312883128931290312913129231293312943129531296312973129831299313003130131302313033130431305313063130731308313093131031311313123131331314313153131631317313183131931320313213132231323313243132531326313273132831329313303133131332313333133431335313363133731338313393134031341313423134331344313453134631347313483134931350313513135231353313543135531356313573135831359313603136131362313633136431365313663136731368313693137031371313723137331374313753137631377313783137931380313813138231383313843138531386313873138831389313903139131392313933139431395313963139731398313993140031401314023140331404314053140631407314083140931410314113141231413314143141531416314173141831419314203142131422314233142431425314263142731428314293143031431314323143331434314353143631437314383143931440314413144231443314443144531446314473144831449314503145131452314533145431455314563145731458314593146031461314623146331464314653146631467314683146931470314713147231473314743147531476314773147831479314803148131482314833148431485314863148731488314893149031491314923149331494314953149631497314983149931500315013150231503315043150531506315073150831509315103151131512315133151431515315163151731518315193152031521315223152331524315253152631527315283152931530315313153231533315343153531536315373153831539315403154131542315433154431545315463154731548315493155031551315523155331554315553155631557315583155931560315613156231563315643156531566315673156831569315703157131572315733157431575315763157731578315793158031581315823158331584315853158631587315883158931590315913159231593315943159531596315973159831599316003160131602316033160431605316063160731608316093161031611316123161331614316153161631617316183161931620316213162231623316243162531626316273162831629316303163131632316333163431635316363163731638316393164031641316423164331644316453164631647316483164931650316513165231653316543165531656316573165831659316603166131662316633166431665316663166731668316693167031671316723167331674316753167631677316783167931680316813168231683316843168531686316873168831689316903169131692316933169431695316963169731698316993170031701317023170331704317053170631707317083170931710317113171231713317143171531716317173171831719317203172131722317233172431725317263172731728317293173031731317323173331734317353173631737317383173931740317413174231743317443174531746317473174831749317503175131752317533175431755317563175731758317593176031761317623176331764317653176631767317683176931770317713177231773317743177531776317773177831779317803178131782317833178431785317863178731788317893179031791317923179331794317953179631797317983179931800318013180231803318043180531806318073180831809318103181131812318133181431815318163181731818318193182031821318223182331824318253182631827318283182931830318313183231833318343183531836318373183831839318403184131842318433184431845318463184731848318493185031851318523185331854318553185631857318583185931860318613186231863318643186531866318673186831869318703187131872318733187431875318763187731878318793188031881318823188331884318853188631887318883188931890318913189231893318943189531896318973189831899319003190131902319033190431905319063190731908319093191031911319123191331914319153191631917319183191931920319213192231923319243192531926319273192831929319303193131932319333193431935319363193731938319393194031941319423194331944319453194631947319483194931950319513195231953319543195531956319573195831959319603196131962319633196431965319663196731968319693197031971319723197331974319753197631977319783197931980319813198231983319843198531986319873198831989319903199131992319933199431995319963199731998319993200032001320023200332004320053200632007320083200932010320113201232013320143201532016320173201832019320203202132022320233202432025320263202732028320293203032031320323203332034320353203632037320383203932040320413204232043320443204532046320473204832049320503205132052320533205432055320563205732058320593206032061320623206332064320653206632067320683206932070320713207232073320743207532076320773207832079320803208132082320833208432085320863208732088320893209032091320923209332094320953209632097320983209932100321013210232103321043210532106321073210832109321103211132112321133211432115321163211732118321193212032121321223212332124321253212632127321283212932130321313213232133321343213532136321373213832139321403214132142321433214432145321463214732148321493215032151321523215332154321553215632157321583215932160321613216232163321643216532166321673216832169321703217132172321733217432175321763217732178321793218032181321823218332184321853218632187321883218932190321913219232193321943219532196321973219832199322003220132202322033220432205322063220732208322093221032211322123221332214322153221632217322183221932220322213222232223322243222532226322273222832229322303223132232322333223432235322363223732238322393224032241322423224332244322453224632247322483224932250322513225232253322543225532256322573225832259322603226132262322633226432265322663226732268322693227032271322723227332274322753227632277322783227932280322813228232283322843228532286322873228832289322903229132292322933229432295322963229732298322993230032301323023230332304323053230632307323083230932310323113231232313323143231532316323173231832319323203232132322323233232432325323263232732328323293233032331323323233332334323353233632337323383233932340323413234232343323443234532346323473234832349323503235132352323533235432355323563235732358323593236032361323623236332364323653236632367323683236932370323713237232373323743237532376323773237832379323803238132382323833238432385323863238732388323893239032391323923239332394323953239632397323983239932400324013240232403324043240532406324073240832409324103241132412324133241432415324163241732418324193242032421324223242332424324253242632427324283242932430324313243232433324343243532436324373243832439324403244132442324433244432445324463244732448324493245032451324523245332454324553245632457324583245932460324613246232463324643246532466324673246832469324703247132472324733247432475324763247732478324793248032481324823248332484324853248632487324883248932490324913249232493324943249532496324973249832499325003250132502325033250432505325063250732508325093251032511325123251332514325153251632517325183251932520325213252232523325243252532526325273252832529325303253132532325333253432535325363253732538325393254032541325423254332544325453254632547325483254932550325513255232553325543255532556325573255832559325603256132562325633256432565325663256732568325693257032571325723257332574325753257632577325783257932580325813258232583325843258532586325873258832589325903259132592325933259432595325963259732598325993260032601326023260332604326053260632607326083260932610326113261232613326143261532616326173261832619326203262132622326233262432625326263262732628326293263032631326323263332634326353263632637326383263932640326413264232643326443264532646326473264832649326503265132652326533265432655326563265732658326593266032661326623266332664326653266632667326683266932670326713267232673326743267532676326773267832679326803268132682326833268432685326863268732688326893269032691326923269332694326953269632697326983269932700327013270232703327043270532706327073270832709327103271132712327133271432715327163271732718327193272032721327223272332724327253272632727327283272932730327313273232733327343273532736327373273832739327403274132742327433274432745327463274732748327493275032751327523275332754327553275632757327583275932760327613276232763327643276532766327673276832769327703277132772327733277432775327763277732778327793278032781327823278332784327853278632787327883278932790327913279232793327943279532796327973279832799328003280132802328033280432805328063280732808328093281032811328123281332814328153281632817328183281932820328213282232823328243282532826328273282832829328303283132832328333283432835328363283732838328393284032841328423284332844328453284632847328483284932850328513285232853328543285532856328573285832859328603286132862328633286432865328663286732868328693287032871328723287332874328753287632877328783287932880328813288232883328843288532886328873288832889328903289132892328933289432895328963289732898328993290032901329023290332904329053290632907329083290932910329113291232913329143291532916329173291832919329203292132922329233292432925329263292732928329293293032931329323293332934329353293632937329383293932940329413294232943329443294532946329473294832949329503295132952329533295432955329563295732958329593296032961329623296332964329653296632967329683296932970329713297232973329743297532976329773297832979329803298132982329833298432985329863298732988329893299032991329923299332994329953299632997329983299933000330013300233003330043300533006330073300833009330103301133012330133301433015330163301733018330193302033021330223302333024330253302633027330283302933030330313303233033330343303533036330373303833039330403304133042330433304433045330463304733048330493305033051330523305333054330553305633057330583305933060330613306233063330643306533066330673306833069330703307133072330733307433075330763307733078330793308033081330823308333084330853308633087330883308933090330913309233093330943309533096330973309833099331003310133102331033310433105331063310733108331093311033111331123311333114331153311633117331183311933120331213312233123331243312533126331273312833129331303313133132331333313433135331363313733138331393314033141331423314333144331453314633147331483314933150331513315233153331543315533156331573315833159331603316133162331633316433165331663316733168331693317033171331723317333174331753317633177331783317933180331813318233183331843318533186331873318833189331903319133192331933319433195331963319733198331993320033201332023320333204332053320633207332083320933210332113321233213332143321533216332173321833219332203322133222332233322433225332263322733228332293323033231332323323333234332353323633237332383323933240332413324233243332443324533246332473324833249332503325133252332533325433255332563325733258332593326033261332623326333264332653326633267332683326933270332713327233273332743327533276332773327833279332803328133282332833328433285332863328733288332893329033291332923329333294332953329633297332983329933300333013330233303333043330533306333073330833309333103331133312333133331433315333163331733318333193332033321333223332333324333253332633327333283332933330333313333233333333343333533336333373333833339333403334133342333433334433345333463334733348333493335033351333523335333354333553335633357333583335933360333613336233363333643336533366333673336833369333703337133372333733337433375333763337733378333793338033381333823338333384333853338633387333883338933390333913339233393333943339533396333973339833399334003340133402334033340433405334063340733408334093341033411334123341333414334153341633417334183341933420334213342233423334243342533426334273342833429334303343133432334333343433435334363343733438334393344033441334423344333444334453344633447334483344933450334513345233453334543345533456334573345833459334603346133462334633346433465334663346733468334693347033471334723347333474334753347633477334783347933480334813348233483334843348533486334873348833489334903349133492334933349433495334963349733498334993350033501335023350333504335053350633507335083350933510335113351233513335143351533516335173351833519335203352133522335233352433525335263352733528335293353033531335323353333534335353353633537335383353933540335413354233543335443354533546335473354833549335503355133552335533355433555335563355733558335593356033561335623356333564335653356633567335683356933570335713357233573335743357533576335773357833579335803358133582335833358433585335863358733588335893359033591335923359333594335953359633597335983359933600336013360233603336043360533606336073360833609336103361133612336133361433615336163361733618336193362033621336223362333624336253362633627336283362933630336313363233633336343363533636336373363833639336403364133642336433364433645336463364733648336493365033651336523365333654336553365633657336583365933660336613366233663336643366533666336673366833669336703367133672336733367433675336763367733678336793368033681336823368333684336853368633687336883368933690336913369233693336943369533696336973369833699337003370133702337033370433705337063370733708337093371033711337123371333714337153371633717337183371933720337213372233723337243372533726337273372833729337303373133732337333373433735337363373733738337393374033741337423374333744337453374633747337483374933750337513375233753337543375533756337573375833759337603376133762337633376433765337663376733768337693377033771337723377333774337753377633777337783377933780337813378233783337843378533786337873378833789337903379133792337933379433795337963379733798337993380033801338023380333804338053380633807338083380933810338113381233813338143381533816338173381833819338203382133822338233382433825338263382733828338293383033831338323383333834338353383633837338383383933840338413384233843338443384533846338473384833849338503385133852338533385433855338563385733858338593386033861338623386333864338653386633867338683386933870338713387233873338743387533876338773387833879338803388133882338833388433885338863388733888338893389033891338923389333894338953389633897338983389933900339013390233903339043390533906339073390833909339103391133912339133391433915339163391733918339193392033921339223392333924339253392633927339283392933930339313393233933339343393533936339373393833939339403394133942339433394433945339463394733948339493395033951339523395333954339553395633957339583395933960339613396233963339643396533966339673396833969339703397133972339733397433975339763397733978339793398033981339823398333984339853398633987339883398933990339913399233993339943399533996339973399833999340003400134002340033400434005340063400734008340093401034011340123401334014340153401634017340183401934020340213402234023340243402534026340273402834029340303403134032340333403434035340363403734038340393404034041340423404334044340453404634047340483404934050340513405234053340543405534056340573405834059340603406134062340633406434065340663406734068340693407034071340723407334074340753407634077340783407934080340813408234083340843408534086340873408834089340903409134092340933409434095340963409734098340993410034101341023410334104341053410634107341083410934110341113411234113341143411534116341173411834119341203412134122341233412434125341263412734128341293413034131341323413334134341353413634137341383413934140341413414234143341443414534146341473414834149341503415134152341533415434155341563415734158341593416034161341623416334164341653416634167341683416934170341713417234173341743417534176341773417834179341803418134182341833418434185341863418734188341893419034191341923419334194341953419634197341983419934200342013420234203342043420534206342073420834209342103421134212342133421434215342163421734218342193422034221342223422334224342253422634227342283422934230342313423234233342343423534236342373423834239342403424134242342433424434245342463424734248342493425034251342523425334254342553425634257342583425934260342613426234263342643426534266342673426834269342703427134272342733427434275342763427734278342793428034281342823428334284342853428634287342883428934290342913429234293342943429534296342973429834299343003430134302343033430434305343063430734308343093431034311343123431334314343153431634317343183431934320343213432234323343243432534326343273432834329343303433134332343333433434335343363433734338343393434034341343423434334344343453434634347343483434934350343513435234353343543435534356343573435834359343603436134362343633436434365343663436734368343693437034371343723437334374343753437634377343783437934380343813438234383343843438534386343873438834389343903439134392343933439434395343963439734398343993440034401344023440334404344053440634407344083440934410344113441234413344143441534416344173441834419344203442134422344233442434425344263442734428344293443034431344323443334434344353443634437344383443934440344413444234443344443444534446344473444834449344503445134452344533445434455344563445734458344593446034461344623446334464344653446634467344683446934470344713447234473344743447534476344773447834479344803448134482344833448434485344863448734488344893449034491344923449334494344953449634497344983449934500345013450234503345043450534506345073450834509345103451134512345133451434515345163451734518345193452034521345223452334524345253452634527345283452934530345313453234533345343453534536345373453834539345403454134542345433454434545345463454734548345493455034551345523455334554345553455634557345583455934560345613456234563345643456534566345673456834569345703457134572345733457434575345763457734578345793458034581345823458334584345853458634587345883458934590345913459234593345943459534596345973459834599346003460134602346033460434605346063460734608346093461034611346123461334614346153461634617346183461934620346213462234623346243462534626346273462834629346303463134632346333463434635346363463734638346393464034641346423464334644346453464634647346483464934650346513465234653346543465534656346573465834659346603466134662346633466434665346663466734668346693467034671346723467334674346753467634677346783467934680346813468234683346843468534686346873468834689346903469134692346933469434695346963469734698346993470034701347023470334704347053470634707347083470934710347113471234713347143471534716347173471834719347203472134722347233472434725347263472734728347293473034731347323473334734347353473634737347383473934740347413474234743347443474534746347473474834749347503475134752347533475434755347563475734758347593476034761347623476334764347653476634767347683476934770347713477234773347743477534776347773477834779347803478134782347833478434785347863478734788347893479034791347923479334794347953479634797347983479934800348013480234803348043480534806348073480834809348103481134812348133481434815348163481734818348193482034821348223482334824348253482634827348283482934830348313483234833348343483534836348373483834839348403484134842348433484434845348463484734848348493485034851348523485334854348553485634857348583485934860348613486234863348643486534866348673486834869348703487134872348733487434875348763487734878348793488034881348823488334884348853488634887348883488934890348913489234893348943489534896348973489834899349003490134902349033490434905349063490734908349093491034911349123491334914349153491634917349183491934920349213492234923349243492534926349273492834929349303493134932349333493434935349363493734938349393494034941349423494334944349453494634947349483494934950349513495234953349543495534956349573495834959349603496134962349633496434965349663496734968349693497034971349723497334974349753497634977349783497934980349813498234983349843498534986349873498834989349903499134992349933499434995349963499734998349993500035001350023500335004350053500635007350083500935010350113501235013350143501535016350173501835019350203502135022350233502435025350263502735028350293503035031350323503335034350353503635037350383503935040350413504235043350443504535046350473504835049350503505135052350533505435055350563505735058350593506035061350623506335064350653506635067350683506935070350713507235073350743507535076350773507835079350803508135082350833508435085350863508735088350893509035091350923509335094350953509635097350983509935100351013510235103351043510535106351073510835109351103511135112351133511435115351163511735118351193512035121351223512335124351253512635127351283512935130351313513235133351343513535136351373513835139351403514135142351433514435145351463514735148351493515035151351523515335154351553515635157351583515935160351613516235163351643516535166351673516835169351703517135172351733517435175351763517735178351793518035181351823518335184351853518635187351883518935190351913519235193351943519535196351973519835199352003520135202352033520435205352063520735208352093521035211352123521335214352153521635217352183521935220352213522235223352243522535226352273522835229352303523135232352333523435235352363523735238352393524035241352423524335244352453524635247352483524935250352513525235253352543525535256352573525835259352603526135262352633526435265352663526735268352693527035271352723527335274352753527635277352783527935280352813528235283352843528535286352873528835289352903529135292352933529435295352963529735298352993530035301353023530335304353053530635307353083530935310353113531235313353143531535316353173531835319353203532135322353233532435325353263532735328353293533035331353323533335334353353533635337353383533935340353413534235343353443534535346353473534835349353503535135352353533535435355353563535735358353593536035361353623536335364353653536635367353683536935370353713537235373353743537535376353773537835379353803538135382353833538435385353863538735388353893539035391353923539335394353953539635397353983539935400354013540235403354043540535406354073540835409354103541135412354133541435415354163541735418354193542035421354223542335424354253542635427354283542935430354313543235433354343543535436354373543835439354403544135442354433544435445354463544735448354493545035451354523545335454354553545635457354583545935460354613546235463354643546535466354673546835469354703547135472354733547435475354763547735478354793548035481354823548335484354853548635487354883548935490354913549235493354943549535496354973549835499355003550135502355033550435505355063550735508355093551035511355123551335514355153551635517355183551935520355213552235523355243552535526355273552835529355303553135532355333553435535355363553735538355393554035541355423554335544355453554635547355483554935550355513555235553355543555535556355573555835559355603556135562355633556435565355663556735568355693557035571355723557335574355753557635577355783557935580355813558235583355843558535586355873558835589355903559135592355933559435595355963559735598355993560035601356023560335604356053560635607356083560935610356113561235613356143561535616356173561835619356203562135622356233562435625356263562735628356293563035631356323563335634356353563635637356383563935640356413564235643356443564535646356473564835649356503565135652356533565435655356563565735658356593566035661356623566335664356653566635667356683566935670356713567235673356743567535676356773567835679356803568135682356833568435685356863568735688356893569035691356923569335694356953569635697356983569935700357013570235703357043570535706357073570835709357103571135712357133571435715357163571735718357193572035721357223572335724357253572635727357283572935730357313573235733357343573535736357373573835739357403574135742357433574435745357463574735748357493575035751357523575335754357553575635757357583575935760357613576235763357643576535766357673576835769357703577135772357733577435775357763577735778357793578035781357823578335784357853578635787357883578935790357913579235793357943579535796357973579835799358003580135802358033580435805358063580735808358093581035811358123581335814358153581635817358183581935820358213582235823358243582535826358273582835829358303583135832358333583435835358363583735838358393584035841358423584335844358453584635847358483584935850358513585235853358543585535856358573585835859358603586135862358633586435865358663586735868358693587035871358723587335874358753587635877358783587935880358813588235883358843588535886358873588835889358903589135892358933589435895358963589735898358993590035901359023590335904359053590635907359083590935910359113591235913359143591535916359173591835919359203592135922359233592435925359263592735928359293593035931359323593335934359353593635937359383593935940359413594235943359443594535946359473594835949359503595135952359533595435955359563595735958359593596035961359623596335964359653596635967359683596935970359713597235973359743597535976359773597835979359803598135982359833598435985359863598735988359893599035991359923599335994359953599635997359983599936000360013600236003360043600536006360073600836009360103601136012360133601436015360163601736018360193602036021360223602336024360253602636027360283602936030360313603236033360343603536036360373603836039360403604136042360433604436045360463604736048360493605036051360523605336054360553605636057360583605936060360613606236063360643606536066360673606836069360703607136072360733607436075360763607736078360793608036081360823608336084360853608636087360883608936090360913609236093360943609536096360973609836099361003610136102361033610436105361063610736108361093611036111361123611336114361153611636117361183611936120361213612236123361243612536126361273612836129361303613136132361333613436135361363613736138361393614036141361423614336144361453614636147361483614936150361513615236153361543615536156361573615836159361603616136162361633616436165361663616736168361693617036171361723617336174361753617636177361783617936180361813618236183361843618536186361873618836189361903619136192361933619436195361963619736198361993620036201362023620336204362053620636207362083620936210362113621236213362143621536216362173621836219362203622136222362233622436225362263622736228362293623036231362323623336234362353623636237362383623936240362413624236243362443624536246362473624836249362503625136252362533625436255362563625736258362593626036261362623626336264362653626636267362683626936270362713627236273362743627536276362773627836279362803628136282362833628436285362863628736288362893629036291362923629336294362953629636297362983629936300363013630236303363043630536306363073630836309363103631136312363133631436315363163631736318363193632036321363223632336324363253632636327363283632936330363313633236333363343633536336363373633836339363403634136342363433634436345363463634736348363493635036351363523635336354363553635636357363583635936360363613636236363363643636536366363673636836369363703637136372363733637436375363763637736378363793638036381363823638336384363853638636387363883638936390363913639236393363943639536396363973639836399364003640136402364033640436405364063640736408364093641036411364123641336414364153641636417364183641936420364213642236423364243642536426364273642836429364303643136432364333643436435364363643736438364393644036441364423644336444364453644636447364483644936450364513645236453364543645536456364573645836459364603646136462364633646436465364663646736468364693647036471364723647336474364753647636477364783647936480364813648236483364843648536486364873648836489364903649136492364933649436495364963649736498364993650036501365023650336504365053650636507365083650936510365113651236513365143651536516365173651836519365203652136522365233652436525365263652736528365293653036531365323653336534365353653636537365383653936540365413654236543365443654536546365473654836549365503655136552365533655436555365563655736558365593656036561365623656336564365653656636567365683656936570365713657236573365743657536576365773657836579365803658136582365833658436585365863658736588365893659036591365923659336594365953659636597365983659936600366013660236603366043660536606366073660836609366103661136612366133661436615366163661736618366193662036621366223662336624366253662636627366283662936630366313663236633366343663536636366373663836639366403664136642366433664436645366463664736648366493665036651366523665336654366553665636657366583665936660366613666236663366643666536666366673666836669366703667136672366733667436675366763667736678366793668036681366823668336684366853668636687366883668936690366913669236693366943669536696366973669836699367003670136702367033670436705367063670736708367093671036711367123671336714367153671636717367183671936720367213672236723367243672536726367273672836729367303673136732367333673436735367363673736738367393674036741367423674336744367453674636747367483674936750367513675236753367543675536756367573675836759367603676136762367633676436765367663676736768367693677036771367723677336774367753677636777367783677936780367813678236783367843678536786367873678836789367903679136792367933679436795367963679736798367993680036801368023680336804368053680636807368083680936810368113681236813368143681536816368173681836819368203682136822368233682436825368263682736828368293683036831368323683336834368353683636837368383683936840368413684236843368443684536846368473684836849368503685136852368533685436855368563685736858368593686036861368623686336864368653686636867368683686936870368713687236873368743687536876368773687836879368803688136882368833688436885368863688736888368893689036891368923689336894368953689636897368983689936900369013690236903369043690536906369073690836909369103691136912369133691436915369163691736918369193692036921369223692336924369253692636927369283692936930369313693236933369343693536936369373693836939369403694136942369433694436945369463694736948369493695036951369523695336954369553695636957369583695936960369613696236963369643696536966369673696836969369703697136972369733697436975369763697736978369793698036981369823698336984369853698636987369883698936990369913699236993369943699536996369973699836999370003700137002370033700437005370063700737008370093701037011370123701337014370153701637017370183701937020370213702237023370243702537026370273702837029370303703137032370333703437035370363703737038370393704037041370423704337044370453704637047370483704937050370513705237053370543705537056370573705837059370603706137062370633706437065370663706737068370693707037071370723707337074370753707637077370783707937080370813708237083370843708537086370873708837089370903709137092370933709437095370963709737098370993710037101371023710337104371053710637107371083710937110371113711237113371143711537116371173711837119371203712137122371233712437125371263712737128371293713037131371323713337134371353713637137371383713937140371413714237143371443714537146371473714837149371503715137152371533715437155371563715737158371593716037161371623716337164371653716637167371683716937170371713717237173371743717537176371773717837179371803718137182371833718437185371863718737188371893719037191371923719337194371953719637197371983719937200372013720237203372043720537206372073720837209372103721137212372133721437215372163721737218372193722037221372223722337224372253722637227372283722937230372313723237233372343723537236372373723837239372403724137242372433724437245372463724737248372493725037251372523725337254372553725637257372583725937260372613726237263372643726537266372673726837269372703727137272372733727437275372763727737278372793728037281372823728337284372853728637287372883728937290372913729237293372943729537296372973729837299373003730137302373033730437305373063730737308373093731037311373123731337314373153731637317373183731937320373213732237323373243732537326373273732837329373303733137332373333733437335373363733737338373393734037341373423734337344373453734637347373483734937350373513735237353373543735537356373573735837359373603736137362373633736437365373663736737368373693737037371373723737337374373753737637377373783737937380373813738237383373843738537386373873738837389373903739137392373933739437395373963739737398373993740037401374023740337404374053740637407374083740937410374113741237413374143741537416374173741837419374203742137422374233742437425374263742737428374293743037431374323743337434374353743637437374383743937440374413744237443374443744537446374473744837449374503745137452374533745437455374563745737458374593746037461374623746337464374653746637467374683746937470374713747237473374743747537476374773747837479374803748137482374833748437485374863748737488374893749037491374923749337494374953749637497374983749937500375013750237503375043750537506375073750837509375103751137512375133751437515375163751737518375193752037521375223752337524375253752637527375283752937530375313753237533375343753537536375373753837539375403754137542375433754437545375463754737548375493755037551375523755337554375553755637557375583755937560375613756237563375643756537566375673756837569375703757137572375733757437575375763757737578375793758037581375823758337584375853758637587375883758937590375913759237593375943759537596375973759837599376003760137602376033760437605376063760737608376093761037611376123761337614376153761637617376183761937620376213762237623376243762537626376273762837629376303763137632376333763437635376363763737638376393764037641376423764337644376453764637647376483764937650376513765237653376543765537656376573765837659376603766137662376633766437665376663766737668376693767037671376723767337674376753767637677376783767937680376813768237683376843768537686376873768837689376903769137692376933769437695376963769737698376993770037701377023770337704377053770637707377083770937710377113771237713377143771537716377173771837719377203772137722377233772437725377263772737728377293773037731377323773337734377353773637737377383773937740377413774237743377443774537746377473774837749377503775137752377533775437755377563775737758377593776037761377623776337764377653776637767377683776937770377713777237773377743777537776377773777837779377803778137782377833778437785377863778737788377893779037791377923779337794377953779637797377983779937800378013780237803378043780537806378073780837809378103781137812378133781437815378163781737818378193782037821378223782337824378253782637827378283782937830378313783237833378343783537836378373783837839378403784137842378433784437845378463784737848378493785037851378523785337854378553785637857378583785937860378613786237863378643786537866378673786837869378703787137872378733787437875378763787737878378793788037881378823788337884378853788637887378883788937890378913789237893378943789537896378973789837899379003790137902379033790437905379063790737908379093791037911379123791337914379153791637917379183791937920379213792237923379243792537926379273792837929379303793137932379333793437935379363793737938379393794037941379423794337944379453794637947379483794937950379513795237953379543795537956379573795837959379603796137962379633796437965379663796737968379693797037971379723797337974379753797637977379783797937980379813798237983379843798537986379873798837989379903799137992379933799437995379963799737998379993800038001380023800338004380053800638007380083800938010380113801238013380143801538016380173801838019380203802138022380233802438025380263802738028380293803038031380323803338034380353803638037380383803938040380413804238043380443804538046380473804838049380503805138052380533805438055380563805738058380593806038061380623806338064380653806638067380683806938070380713807238073380743807538076380773807838079380803808138082380833808438085380863808738088380893809038091380923809338094380953809638097380983809938100381013810238103381043810538106381073810838109381103811138112381133811438115381163811738118381193812038121381223812338124381253812638127381283812938130381313813238133381343813538136381373813838139381403814138142381433814438145381463814738148381493815038151381523815338154381553815638157381583815938160381613816238163381643816538166381673816838169381703817138172381733817438175381763817738178381793818038181381823818338184381853818638187381883818938190381913819238193381943819538196381973819838199382003820138202382033820438205382063820738208382093821038211382123821338214382153821638217382183821938220382213822238223382243822538226382273822838229382303823138232382333823438235382363823738238382393824038241382423824338244382453824638247382483824938250382513825238253382543825538256382573825838259382603826138262382633826438265382663826738268382693827038271382723827338274382753827638277382783827938280382813828238283382843828538286382873828838289382903829138292382933829438295382963829738298382993830038301383023830338304383053830638307383083830938310383113831238313383143831538316383173831838319383203832138322383233832438325383263832738328383293833038331383323833338334383353833638337383383833938340383413834238343383443834538346383473834838349383503835138352383533835438355383563835738358383593836038361383623836338364383653836638367383683836938370383713837238373383743837538376383773837838379383803838138382383833838438385383863838738388383893839038391383923839338394383953839638397383983839938400384013840238403384043840538406384073840838409384103841138412384133841438415384163841738418384193842038421384223842338424384253842638427384283842938430384313843238433384343843538436384373843838439384403844138442384433844438445384463844738448384493845038451384523845338454384553845638457384583845938460384613846238463384643846538466384673846838469384703847138472384733847438475384763847738478384793848038481384823848338484384853848638487384883848938490384913849238493384943849538496384973849838499385003850138502385033850438505385063850738508385093851038511385123851338514385153851638517385183851938520385213852238523385243852538526385273852838529385303853138532385333853438535385363853738538385393854038541385423854338544385453854638547385483854938550385513855238553385543855538556385573855838559385603856138562385633856438565385663856738568385693857038571385723857338574385753857638577385783857938580385813858238583385843858538586385873858838589385903859138592385933859438595385963859738598385993860038601386023860338604386053860638607386083860938610386113861238613386143861538616386173861838619386203862138622386233862438625386263862738628386293863038631386323863338634386353863638637386383863938640386413864238643386443864538646386473864838649386503865138652386533865438655386563865738658386593866038661386623866338664386653866638667386683866938670386713867238673386743867538676386773867838679386803868138682386833868438685386863868738688386893869038691386923869338694386953869638697386983869938700387013870238703387043870538706387073870838709387103871138712387133871438715387163871738718387193872038721387223872338724387253872638727387283872938730387313873238733387343873538736387373873838739387403874138742387433874438745387463874738748387493875038751387523875338754387553875638757387583875938760387613876238763387643876538766387673876838769387703877138772387733877438775387763877738778387793878038781387823878338784387853878638787387883878938790387913879238793387943879538796387973879838799388003880138802388033880438805388063880738808388093881038811388123881338814388153881638817388183881938820388213882238823388243882538826388273882838829388303883138832388333883438835388363883738838388393884038841388423884338844388453884638847388483884938850388513885238853388543885538856388573885838859388603886138862388633886438865388663886738868388693887038871388723887338874388753887638877388783887938880388813888238883388843888538886388873888838889388903889138892388933889438895388963889738898388993890038901389023890338904389053890638907389083890938910389113891238913389143891538916389173891838919389203892138922389233892438925389263892738928389293893038931389323893338934389353893638937389383893938940389413894238943389443894538946389473894838949389503895138952389533895438955389563895738958389593896038961389623896338964389653896638967389683896938970389713897238973389743897538976389773897838979389803898138982389833898438985389863898738988389893899038991389923899338994389953899638997389983899939000390013900239003390043900539006390073900839009390103901139012390133901439015390163901739018390193902039021390223902339024390253902639027390283902939030390313903239033390343903539036390373903839039390403904139042390433904439045390463904739048390493905039051390523905339054390553905639057390583905939060390613906239063390643906539066390673906839069390703907139072390733907439075390763907739078390793908039081390823908339084390853908639087390883908939090390913909239093390943909539096390973909839099391003910139102391033910439105391063910739108391093911039111391123911339114391153911639117391183911939120391213912239123391243912539126391273912839129391303913139132391333913439135391363913739138391393914039141391423914339144391453914639147391483914939150391513915239153391543915539156391573915839159391603916139162391633916439165391663916739168391693917039171391723917339174391753917639177391783917939180391813918239183391843918539186391873918839189391903919139192391933919439195391963919739198391993920039201392023920339204392053920639207392083920939210392113921239213392143921539216392173921839219392203922139222392233922439225392263922739228392293923039231392323923339234392353923639237392383923939240392413924239243392443924539246392473924839249392503925139252392533925439255392563925739258392593926039261392623926339264392653926639267392683926939270392713927239273392743927539276392773927839279392803928139282392833928439285392863928739288392893929039291392923929339294392953929639297392983929939300393013930239303393043930539306393073930839309393103931139312393133931439315393163931739318393193932039321393223932339324393253932639327393283932939330393313933239333393343933539336393373933839339393403934139342393433934439345393463934739348393493935039351393523935339354393553935639357393583935939360393613936239363393643936539366393673936839369393703937139372393733937439375393763937739378393793938039381393823938339384393853938639387393883938939390393913939239393393943939539396393973939839399394003940139402394033940439405394063940739408394093941039411394123941339414394153941639417394183941939420394213942239423394243942539426394273942839429394303943139432394333943439435394363943739438394393944039441394423944339444394453944639447394483944939450394513945239453394543945539456394573945839459394603946139462394633946439465394663946739468394693947039471394723947339474394753947639477394783947939480394813948239483394843948539486394873948839489394903949139492394933949439495394963949739498394993950039501395023950339504395053950639507395083950939510395113951239513395143951539516395173951839519395203952139522395233952439525395263952739528395293953039531395323953339534395353953639537395383953939540395413954239543395443954539546395473954839549395503955139552395533955439555395563955739558395593956039561395623956339564395653956639567395683956939570395713957239573395743957539576395773957839579395803958139582395833958439585395863958739588395893959039591395923959339594395953959639597395983959939600396013960239603396043960539606396073960839609396103961139612396133961439615396163961739618396193962039621396223962339624396253962639627396283962939630396313963239633396343963539636396373963839639396403964139642396433964439645396463964739648396493965039651396523965339654396553965639657396583965939660396613966239663396643966539666396673966839669396703967139672396733967439675396763967739678396793968039681396823968339684396853968639687396883968939690396913969239693396943969539696396973969839699397003970139702397033970439705397063970739708397093971039711397123971339714397153971639717397183971939720397213972239723397243972539726397273972839729397303973139732397333973439735397363973739738397393974039741397423974339744397453974639747397483974939750397513975239753397543975539756397573975839759397603976139762397633976439765397663976739768397693977039771397723977339774397753977639777397783977939780397813978239783397843978539786397873978839789397903979139792397933979439795397963979739798397993980039801398023980339804398053980639807398083980939810398113981239813398143981539816398173981839819398203982139822398233982439825398263982739828398293983039831398323983339834398353983639837398383983939840398413984239843398443984539846398473984839849398503985139852398533985439855398563985739858398593986039861398623986339864398653986639867398683986939870398713987239873398743987539876398773987839879398803988139882398833988439885398863988739888398893989039891398923989339894398953989639897398983989939900399013990239903399043990539906399073990839909399103991139912399133991439915399163991739918399193992039921399223992339924399253992639927399283992939930399313993239933399343993539936399373993839939399403994139942399433994439945399463994739948399493995039951399523995339954399553995639957399583995939960399613996239963399643996539966399673996839969399703997139972399733997439975399763997739978399793998039981399823998339984399853998639987399883998939990399913999239993399943999539996399973999839999400004000140002400034000440005400064000740008400094001040011400124001340014400154001640017400184001940020400214002240023400244002540026400274002840029400304003140032400334003440035400364003740038400394004040041400424004340044400454004640047400484004940050400514005240053400544005540056400574005840059400604006140062400634006440065400664006740068400694007040071400724007340074400754007640077400784007940080400814008240083400844008540086400874008840089400904009140092400934009440095400964009740098400994010040101401024010340104401054010640107401084010940110401114011240113401144011540116401174011840119401204012140122401234012440125401264012740128401294013040131401324013340134401354013640137401384013940140401414014240143401444014540146401474014840149401504015140152401534015440155401564015740158401594016040161401624016340164401654016640167401684016940170401714017240173401744017540176401774017840179401804018140182401834018440185401864018740188401894019040191401924019340194401954019640197401984019940200402014020240203402044020540206402074020840209402104021140212402134021440215402164021740218402194022040221402224022340224402254022640227402284022940230402314023240233402344023540236402374023840239402404024140242402434024440245402464024740248402494025040251402524025340254402554025640257402584025940260402614026240263402644026540266402674026840269402704027140272402734027440275402764027740278402794028040281402824028340284402854028640287402884028940290402914029240293402944029540296402974029840299403004030140302403034030440305403064030740308403094031040311403124031340314403154031640317403184031940320403214032240323403244032540326403274032840329403304033140332403334033440335403364033740338403394034040341403424034340344403454034640347403484034940350403514035240353403544035540356403574035840359403604036140362403634036440365403664036740368403694037040371403724037340374403754037640377403784037940380403814038240383403844038540386403874038840389403904039140392403934039440395403964039740398403994040040401404024040340404404054040640407404084040940410404114041240413404144041540416404174041840419404204042140422404234042440425404264042740428404294043040431404324043340434404354043640437404384043940440404414044240443404444044540446404474044840449404504045140452404534045440455404564045740458404594046040461404624046340464404654046640467404684046940470404714047240473404744047540476404774047840479404804048140482404834048440485404864048740488404894049040491404924049340494404954049640497404984049940500405014050240503405044050540506405074050840509405104051140512405134051440515405164051740518405194052040521405224052340524405254052640527405284052940530405314053240533405344053540536405374053840539405404054140542405434054440545405464054740548405494055040551405524055340554405554055640557405584055940560405614056240563405644056540566405674056840569405704057140572405734057440575405764057740578405794058040581405824058340584405854058640587405884058940590405914059240593405944059540596405974059840599406004060140602406034060440605406064060740608406094061040611406124061340614406154061640617406184061940620406214062240623406244062540626406274062840629406304063140632406334063440635406364063740638406394064040641406424064340644406454064640647406484064940650406514065240653406544065540656406574065840659406604066140662406634066440665406664066740668406694067040671406724067340674406754067640677406784067940680406814068240683406844068540686406874068840689406904069140692406934069440695406964069740698406994070040701407024070340704407054070640707407084070940710407114071240713407144071540716407174071840719407204072140722407234072440725407264072740728407294073040731407324073340734407354073640737407384073940740407414074240743407444074540746407474074840749407504075140752407534075440755407564075740758407594076040761407624076340764407654076640767407684076940770407714077240773407744077540776407774077840779407804078140782407834078440785407864078740788407894079040791407924079340794407954079640797407984079940800408014080240803408044080540806408074080840809408104081140812408134081440815408164081740818408194082040821408224082340824408254082640827408284082940830408314083240833408344083540836408374083840839408404084140842408434084440845408464084740848408494085040851408524085340854408554085640857408584085940860408614086240863408644086540866408674086840869408704087140872408734087440875408764087740878408794088040881408824088340884408854088640887408884088940890408914089240893408944089540896408974089840899409004090140902409034090440905409064090740908409094091040911409124091340914409154091640917409184091940920409214092240923409244092540926409274092840929409304093140932409334093440935409364093740938409394094040941409424094340944409454094640947409484094940950409514095240953409544095540956409574095840959409604096140962409634096440965409664096740968409694097040971409724097340974409754097640977409784097940980409814098240983409844098540986409874098840989409904099140992409934099440995409964099740998409994100041001410024100341004410054100641007410084100941010410114101241013410144101541016410174101841019410204102141022410234102441025410264102741028410294103041031410324103341034410354103641037410384103941040410414104241043410444104541046410474104841049410504105141052410534105441055410564105741058410594106041061410624106341064410654106641067410684106941070410714107241073410744107541076410774107841079410804108141082410834108441085410864108741088410894109041091410924109341094410954109641097410984109941100411014110241103411044110541106411074110841109411104111141112411134111441115411164111741118411194112041121411224112341124411254112641127411284112941130411314113241133411344113541136411374113841139411404114141142411434114441145411464114741148411494115041151411524115341154411554115641157411584115941160411614116241163411644116541166411674116841169411704117141172411734117441175411764117741178411794118041181411824118341184411854118641187411884118941190411914119241193411944119541196411974119841199412004120141202412034120441205412064120741208412094121041211412124121341214412154121641217412184121941220412214122241223412244122541226412274122841229412304123141232412334123441235412364123741238412394124041241412424124341244412454124641247412484124941250412514125241253412544125541256412574125841259412604126141262412634126441265412664126741268412694127041271412724127341274412754127641277412784127941280412814128241283412844128541286412874128841289412904129141292412934129441295412964129741298412994130041301413024130341304413054130641307413084130941310413114131241313413144131541316413174131841319413204132141322413234132441325413264132741328413294133041331413324133341334413354133641337413384133941340413414134241343413444134541346413474134841349413504135141352413534135441355413564135741358413594136041361413624136341364413654136641367413684136941370413714137241373413744137541376413774137841379413804138141382413834138441385413864138741388413894139041391413924139341394413954139641397413984139941400414014140241403414044140541406414074140841409414104141141412414134141441415414164141741418414194142041421414224142341424414254142641427414284142941430414314143241433414344143541436414374143841439414404144141442414434144441445414464144741448414494145041451414524145341454414554145641457414584145941460414614146241463414644146541466414674146841469414704147141472414734147441475414764147741478414794148041481414824148341484414854148641487414884148941490414914149241493414944149541496414974149841499415004150141502415034150441505415064150741508415094151041511415124151341514415154151641517415184151941520415214152241523415244152541526415274152841529415304153141532415334153441535415364153741538415394154041541415424154341544415454154641547415484154941550415514155241553415544155541556415574155841559415604156141562415634156441565415664156741568415694157041571415724157341574415754157641577415784157941580415814158241583415844158541586415874158841589415904159141592415934159441595415964159741598415994160041601416024160341604416054160641607416084160941610416114161241613416144161541616416174161841619416204162141622416234162441625416264162741628416294163041631416324163341634416354163641637416384163941640416414164241643416444164541646416474164841649416504165141652416534165441655416564165741658416594166041661416624166341664416654166641667416684166941670416714167241673416744167541676416774167841679416804168141682416834168441685416864168741688416894169041691416924169341694416954169641697416984169941700417014170241703417044170541706417074170841709417104171141712417134171441715417164171741718417194172041721417224172341724417254172641727417284172941730417314173241733417344173541736417374173841739417404174141742417434174441745417464174741748417494175041751417524175341754417554175641757417584175941760417614176241763417644176541766417674176841769417704177141772417734177441775417764177741778417794178041781417824178341784417854178641787417884178941790417914179241793417944179541796417974179841799418004180141802418034180441805418064180741808418094181041811418124181341814418154181641817418184181941820418214182241823418244182541826418274182841829418304183141832418334183441835418364183741838418394184041841418424184341844418454184641847418484184941850418514185241853418544185541856418574185841859418604186141862418634186441865418664186741868418694187041871418724187341874418754187641877418784187941880418814188241883418844188541886418874188841889418904189141892418934189441895418964189741898418994190041901419024190341904419054190641907419084190941910419114191241913419144191541916419174191841919419204192141922419234192441925419264192741928419294193041931419324193341934419354193641937419384193941940419414194241943419444194541946419474194841949419504195141952419534195441955419564195741958419594196041961419624196341964419654196641967419684196941970419714197241973419744197541976419774197841979419804198141982419834198441985419864198741988419894199041991419924199341994419954199641997419984199942000420014200242003420044200542006420074200842009420104201142012420134201442015420164201742018420194202042021420224202342024420254202642027420284202942030420314203242033420344203542036420374203842039420404204142042420434204442045420464204742048420494205042051420524205342054420554205642057420584205942060420614206242063420644206542066420674206842069420704207142072420734207442075420764207742078420794208042081420824208342084420854208642087420884208942090420914209242093420944209542096420974209842099421004210142102421034210442105421064210742108421094211042111421124211342114421154211642117421184211942120421214212242123421244212542126421274212842129421304213142132421334213442135421364213742138421394214042141421424214342144421454214642147421484214942150421514215242153421544215542156421574215842159421604216142162421634216442165421664216742168421694217042171421724217342174421754217642177421784217942180421814218242183421844218542186421874218842189421904219142192421934219442195421964219742198421994220042201422024220342204422054220642207422084220942210422114221242213422144221542216422174221842219422204222142222422234222442225422264222742228422294223042231422324223342234422354223642237422384223942240422414224242243422444224542246422474224842249422504225142252422534225442255422564225742258422594226042261422624226342264422654226642267422684226942270422714227242273422744227542276422774227842279422804228142282422834228442285422864228742288422894229042291422924229342294422954229642297422984229942300423014230242303423044230542306423074230842309423104231142312423134231442315423164231742318423194232042321423224232342324423254232642327423284232942330423314233242333423344233542336423374233842339423404234142342423434234442345423464234742348423494235042351423524235342354423554235642357423584235942360423614236242363423644236542366423674236842369423704237142372423734237442375423764237742378423794238042381423824238342384423854238642387423884238942390423914239242393423944239542396423974239842399424004240142402424034240442405424064240742408424094241042411424124241342414424154241642417424184241942420424214242242423424244242542426424274242842429424304243142432424334243442435424364243742438424394244042441424424244342444424454244642447424484244942450424514245242453424544245542456424574245842459424604246142462424634246442465424664246742468424694247042471424724247342474424754247642477424784247942480424814248242483424844248542486424874248842489424904249142492424934249442495424964249742498424994250042501425024250342504425054250642507425084250942510425114251242513425144251542516425174251842519425204252142522425234252442525425264252742528425294253042531425324253342534425354253642537425384253942540425414254242543425444254542546425474254842549425504255142552425534255442555425564255742558425594256042561425624256342564425654256642567425684256942570425714257242573425744257542576425774257842579425804258142582425834258442585425864258742588425894259042591425924259342594425954259642597425984259942600426014260242603426044260542606426074260842609426104261142612426134261442615426164261742618426194262042621426224262342624426254262642627426284262942630426314263242633426344263542636426374263842639426404264142642426434264442645426464264742648426494265042651426524265342654426554265642657426584265942660426614266242663426644266542666426674266842669426704267142672426734267442675426764267742678426794268042681426824268342684426854268642687426884268942690426914269242693426944269542696426974269842699427004270142702427034270442705427064270742708427094271042711427124271342714427154271642717427184271942720427214272242723427244272542726427274272842729427304273142732427334273442735427364273742738427394274042741427424274342744427454274642747427484274942750427514275242753427544275542756427574275842759427604276142762427634276442765427664276742768427694277042771427724277342774427754277642777427784277942780427814278242783427844278542786427874278842789427904279142792427934279442795427964279742798427994280042801428024280342804428054280642807428084280942810428114281242813428144281542816428174281842819428204282142822428234282442825428264282742828428294283042831428324283342834428354283642837428384283942840428414284242843428444284542846428474284842849428504285142852428534285442855428564285742858428594286042861428624286342864428654286642867428684286942870428714287242873428744287542876428774287842879428804288142882428834288442885428864288742888428894289042891428924289342894428954289642897428984289942900429014290242903429044290542906429074290842909429104291142912429134291442915429164291742918429194292042921429224292342924429254292642927429284292942930429314293242933429344293542936429374293842939429404294142942429434294442945429464294742948429494295042951429524295342954429554295642957429584295942960429614296242963429644296542966429674296842969429704297142972429734297442975429764297742978429794298042981429824298342984429854298642987429884298942990429914299242993429944299542996429974299842999430004300143002430034300443005430064300743008430094301043011430124301343014430154301643017430184301943020430214302243023430244302543026430274302843029430304303143032430334303443035430364303743038430394304043041430424304343044430454304643047430484304943050430514305243053430544305543056430574305843059430604306143062430634306443065430664306743068430694307043071430724307343074430754307643077430784307943080430814308243083430844308543086430874308843089430904309143092430934309443095430964309743098430994310043101431024310343104431054310643107431084310943110431114311243113431144311543116431174311843119431204312143122431234312443125431264312743128431294313043131431324313343134431354313643137431384313943140431414314243143431444314543146431474314843149431504315143152431534315443155431564315743158431594316043161431624316343164431654316643167431684316943170431714317243173431744317543176431774317843179431804318143182431834318443185431864318743188431894319043191431924319343194431954319643197431984319943200432014320243203432044320543206432074320843209432104321143212432134321443215432164321743218432194322043221432224322343224432254322643227432284322943230432314323243233432344323543236432374323843239432404324143242432434324443245432464324743248432494325043251432524325343254432554325643257432584325943260432614326243263432644326543266432674326843269432704327143272432734327443275432764327743278432794328043281432824328343284432854328643287432884328943290432914329243293432944329543296432974329843299433004330143302433034330443305433064330743308433094331043311433124331343314433154331643317433184331943320433214332243323433244332543326433274332843329433304333143332433334333443335433364333743338433394334043341433424334343344433454334643347433484334943350433514335243353433544335543356433574335843359433604336143362433634336443365433664336743368433694337043371433724337343374433754337643377433784337943380433814338243383433844338543386433874338843389433904339143392433934339443395433964339743398433994340043401434024340343404434054340643407434084340943410434114341243413434144341543416434174341843419434204342143422434234342443425434264342743428434294343043431434324343343434434354343643437434384343943440434414344243443434444344543446434474344843449434504345143452434534345443455434564345743458434594346043461434624346343464434654346643467434684346943470434714347243473434744347543476434774347843479434804348143482434834348443485434864348743488434894349043491434924349343494434954349643497434984349943500435014350243503435044350543506435074350843509435104351143512435134351443515435164351743518435194352043521435224352343524435254352643527435284352943530435314353243533435344353543536435374353843539435404354143542435434354443545435464354743548435494355043551435524355343554435554355643557435584355943560435614356243563435644356543566435674356843569435704357143572435734357443575435764357743578435794358043581435824358343584435854358643587435884358943590435914359243593435944359543596435974359843599436004360143602436034360443605436064360743608436094361043611436124361343614436154361643617436184361943620436214362243623436244362543626436274362843629436304363143632436334363443635436364363743638436394364043641436424364343644436454364643647436484364943650436514365243653436544365543656436574365843659436604366143662436634366443665436664366743668436694367043671436724367343674436754367643677436784367943680436814368243683436844368543686436874368843689436904369143692436934369443695436964369743698436994370043701437024370343704437054370643707437084370943710437114371243713437144371543716437174371843719437204372143722437234372443725437264372743728437294373043731437324373343734437354373643737437384373943740437414374243743437444374543746437474374843749437504375143752437534375443755437564375743758437594376043761437624376343764437654376643767437684376943770437714377243773437744377543776437774377843779437804378143782437834378443785437864378743788437894379043791437924379343794437954379643797437984379943800438014380243803438044380543806438074380843809438104381143812438134381443815438164381743818438194382043821438224382343824438254382643827438284382943830438314383243833438344383543836438374383843839438404384143842438434384443845438464384743848438494385043851438524385343854438554385643857438584385943860438614386243863438644386543866438674386843869438704387143872438734387443875438764387743878438794388043881438824388343884438854388643887438884388943890438914389243893438944389543896438974389843899439004390143902439034390443905439064390743908439094391043911439124391343914439154391643917439184391943920439214392243923439244392543926439274392843929439304393143932439334393443935439364393743938439394394043941439424394343944439454394643947439484394943950439514395243953439544395543956439574395843959439604396143962439634396443965439664396743968439694397043971439724397343974439754397643977439784397943980439814398243983439844398543986439874398843989439904399143992439934399443995439964399743998439994400044001440024400344004440054400644007440084400944010440114401244013440144401544016440174401844019440204402144022440234402444025440264402744028440294403044031440324403344034440354403644037440384403944040440414404244043440444404544046440474404844049440504405144052440534405444055440564405744058440594406044061440624406344064440654406644067440684406944070440714407244073440744407544076440774407844079440804408144082440834408444085440864408744088440894409044091440924409344094440954409644097440984409944100441014410244103441044410544106441074410844109441104411144112441134411444115441164411744118441194412044121441224412344124441254412644127441284412944130441314413244133441344413544136441374413844139441404414144142441434414444145441464414744148441494415044151441524415344154441554415644157441584415944160441614416244163441644416544166441674416844169441704417144172441734417444175441764417744178441794418044181441824418344184441854418644187441884418944190441914419244193441944419544196441974419844199442004420144202442034420444205442064420744208442094421044211442124421344214442154421644217442184421944220442214422244223442244422544226442274422844229442304423144232442334423444235442364423744238442394424044241442424424344244442454424644247442484424944250442514425244253442544425544256442574425844259442604426144262442634426444265442664426744268442694427044271442724427344274442754427644277442784427944280442814428244283442844428544286442874428844289442904429144292442934429444295442964429744298442994430044301443024430344304443054430644307443084430944310443114431244313443144431544316443174431844319443204432144322443234432444325443264432744328443294433044331443324433344334443354433644337443384433944340443414434244343443444434544346443474434844349443504435144352443534435444355443564435744358443594436044361443624436344364443654436644367443684436944370443714437244373443744437544376443774437844379443804438144382443834438444385443864438744388443894439044391443924439344394443954439644397443984439944400444014440244403444044440544406444074440844409444104441144412444134441444415444164441744418444194442044421444224442344424444254442644427444284442944430444314443244433444344443544436444374443844439444404444144442444434444444445444464444744448444494445044451444524445344454444554445644457444584445944460444614446244463444644446544466444674446844469444704447144472444734447444475444764447744478444794448044481444824448344484444854448644487444884448944490444914449244493444944449544496444974449844499445004450144502445034450444505445064450744508445094451044511445124451344514445154451644517445184451944520445214452244523445244452544526445274452844529445304453144532445334453444535445364453744538445394454044541445424454344544445454454644547445484454944550445514455244553445544455544556445574455844559445604456144562445634456444565445664456744568445694457044571445724457344574445754457644577445784457944580445814458244583445844458544586445874458844589445904459144592445934459444595445964459744598445994460044601446024460344604446054460644607446084460944610446114461244613446144461544616446174461844619446204462144622446234462444625446264462744628446294463044631446324463344634446354463644637446384463944640446414464244643446444464544646446474464844649446504465144652446534465444655446564465744658446594466044661446624466344664446654466644667446684466944670446714467244673446744467544676446774467844679446804468144682446834468444685446864468744688446894469044691446924469344694446954469644697446984469944700447014470244703447044470544706447074470844709447104471144712447134471444715447164471744718447194472044721447224472344724447254472644727447284472944730447314473244733447344473544736447374473844739447404474144742447434474444745447464474744748447494475044751447524475344754447554475644757447584475944760447614476244763447644476544766447674476844769447704477144772447734477444775447764477744778447794478044781447824478344784447854478644787447884478944790447914479244793447944479544796447974479844799448004480144802448034480444805448064480744808448094481044811448124481344814448154481644817448184481944820448214482244823448244482544826448274482844829448304483144832448334483444835448364483744838448394484044841448424484344844448454484644847448484484944850448514485244853448544485544856448574485844859448604486144862448634486444865448664486744868448694487044871448724487344874448754487644877448784487944880448814488244883448844488544886448874488844889448904489144892448934489444895448964489744898448994490044901449024490344904449054490644907449084490944910449114491244913449144491544916449174491844919449204492144922449234492444925449264492744928449294493044931449324493344934449354493644937449384493944940449414494244943449444494544946449474494844949449504495144952449534495444955449564495744958449594496044961449624496344964449654496644967449684496944970449714497244973449744497544976449774497844979449804498144982449834498444985449864498744988449894499044991449924499344994449954499644997449984499945000450014500245003450044500545006450074500845009450104501145012450134501445015450164501745018450194502045021450224502345024450254502645027450284502945030450314503245033450344503545036450374503845039450404504145042450434504445045450464504745048450494505045051450524505345054450554505645057450584505945060450614506245063450644506545066450674506845069450704507145072450734507445075450764507745078450794508045081450824508345084450854508645087450884508945090450914509245093450944509545096450974509845099451004510145102451034510445105451064510745108451094511045111451124511345114451154511645117451184511945120451214512245123451244512545126451274512845129451304513145132451334513445135451364513745138451394514045141451424514345144451454514645147451484514945150451514515245153451544515545156451574515845159451604516145162451634516445165451664516745168451694517045171451724517345174451754517645177451784517945180451814518245183451844518545186451874518845189451904519145192451934519445195451964519745198451994520045201452024520345204452054520645207452084520945210452114521245213452144521545216452174521845219452204522145222452234522445225452264522745228452294523045231452324523345234452354523645237452384523945240452414524245243452444524545246452474524845249452504525145252452534525445255452564525745258452594526045261452624526345264452654526645267452684526945270452714527245273452744527545276452774527845279452804528145282452834528445285452864528745288452894529045291452924529345294452954529645297452984529945300453014530245303453044530545306453074530845309453104531145312453134531445315453164531745318453194532045321453224532345324453254532645327453284532945330453314533245333453344533545336453374533845339453404534145342453434534445345453464534745348453494535045351453524535345354453554535645357453584535945360453614536245363453644536545366453674536845369453704537145372453734537445375453764537745378453794538045381453824538345384453854538645387453884538945390453914539245393453944539545396453974539845399454004540145402454034540445405454064540745408454094541045411454124541345414454154541645417454184541945420454214542245423454244542545426454274542845429454304543145432454334543445435454364543745438454394544045441454424544345444454454544645447454484544945450454514545245453454544545545456454574545845459454604546145462454634546445465454664546745468454694547045471454724547345474454754547645477454784547945480454814548245483454844548545486454874548845489454904549145492454934549445495454964549745498454994550045501455024550345504455054550645507455084550945510455114551245513455144551545516455174551845519455204552145522455234552445525455264552745528455294553045531455324553345534455354553645537455384553945540455414554245543455444554545546455474554845549455504555145552455534555445555455564555745558455594556045561455624556345564455654556645567455684556945570455714557245573455744557545576455774557845579455804558145582455834558445585455864558745588455894559045591455924559345594455954559645597455984559945600456014560245603456044560545606456074560845609456104561145612456134561445615456164561745618456194562045621456224562345624456254562645627456284562945630456314563245633456344563545636456374563845639456404564145642456434564445645456464564745648456494565045651456524565345654456554565645657456584565945660456614566245663456644566545666456674566845669456704567145672456734567445675456764567745678456794568045681456824568345684456854568645687456884568945690456914569245693456944569545696456974569845699457004570145702457034570445705457064570745708457094571045711457124571345714457154571645717457184571945720457214572245723457244572545726457274572845729457304573145732457334573445735457364573745738457394574045741457424574345744457454574645747457484574945750457514575245753457544575545756457574575845759457604576145762457634576445765457664576745768457694577045771457724577345774457754577645777457784577945780457814578245783457844578545786457874578845789457904579145792457934579445795457964579745798457994580045801458024580345804458054580645807458084580945810458114581245813458144581545816458174581845819458204582145822458234582445825458264582745828458294583045831458324583345834458354583645837458384583945840458414584245843458444584545846458474584845849458504585145852458534585445855458564585745858458594586045861458624586345864458654586645867458684586945870458714587245873458744587545876458774587845879458804588145882458834588445885458864588745888458894589045891458924589345894458954589645897458984589945900459014590245903459044590545906459074590845909459104591145912459134591445915459164591745918459194592045921459224592345924459254592645927459284592945930459314593245933459344593545936459374593845939459404594145942459434594445945459464594745948459494595045951459524595345954459554595645957459584595945960459614596245963459644596545966459674596845969459704597145972459734597445975459764597745978459794598045981459824598345984459854598645987459884598945990459914599245993459944599545996459974599845999460004600146002460034600446005460064600746008460094601046011460124601346014460154601646017460184601946020460214602246023460244602546026460274602846029460304603146032460334603446035460364603746038460394604046041460424604346044460454604646047460484604946050460514605246053460544605546056460574605846059460604606146062460634606446065460664606746068460694607046071460724607346074460754607646077460784607946080460814608246083460844608546086460874608846089460904609146092460934609446095460964609746098460994610046101461024610346104461054610646107461084610946110461114611246113461144611546116461174611846119461204612146122461234612446125461264612746128461294613046131461324613346134461354613646137461384613946140461414614246143461444614546146461474614846149461504615146152461534615446155461564615746158461594616046161461624616346164461654616646167461684616946170461714617246173461744617546176461774617846179461804618146182461834618446185461864618746188461894619046191461924619346194461954619646197461984619946200462014620246203462044620546206462074620846209462104621146212462134621446215462164621746218462194622046221462224622346224462254622646227462284622946230462314623246233462344623546236462374623846239462404624146242462434624446245462464624746248462494625046251462524625346254462554625646257462584625946260462614626246263462644626546266462674626846269462704627146272462734627446275462764627746278462794628046281462824628346284462854628646287462884628946290462914629246293462944629546296462974629846299463004630146302463034630446305463064630746308463094631046311463124631346314463154631646317463184631946320463214632246323463244632546326463274632846329463304633146332463334633446335463364633746338463394634046341463424634346344463454634646347463484634946350463514635246353463544635546356463574635846359463604636146362463634636446365463664636746368463694637046371463724637346374463754637646377463784637946380463814638246383463844638546386463874638846389463904639146392463934639446395463964639746398463994640046401464024640346404464054640646407464084640946410464114641246413464144641546416464174641846419464204642146422464234642446425464264642746428464294643046431464324643346434464354643646437464384643946440464414644246443464444644546446464474644846449464504645146452464534645446455464564645746458464594646046461464624646346464464654646646467464684646946470464714647246473464744647546476464774647846479464804648146482464834648446485464864648746488464894649046491464924649346494464954649646497464984649946500465014650246503465044650546506465074650846509465104651146512465134651446515465164651746518465194652046521465224652346524465254652646527465284652946530465314653246533465344653546536465374653846539465404654146542465434654446545465464654746548465494655046551465524655346554465554655646557465584655946560465614656246563465644656546566465674656846569465704657146572465734657446575465764657746578465794658046581465824658346584465854658646587465884658946590465914659246593465944659546596465974659846599466004660146602466034660446605466064660746608466094661046611466124661346614466154661646617466184661946620466214662246623466244662546626466274662846629466304663146632466334663446635466364663746638466394664046641466424664346644466454664646647466484664946650466514665246653466544665546656466574665846659466604666146662466634666446665466664666746668466694667046671466724667346674466754667646677466784667946680466814668246683466844668546686466874668846689466904669146692466934669446695466964669746698466994670046701467024670346704467054670646707467084670946710467114671246713467144671546716467174671846719467204672146722467234672446725467264672746728467294673046731467324673346734467354673646737467384673946740467414674246743467444674546746467474674846749467504675146752467534675446755467564675746758467594676046761467624676346764467654676646767467684676946770467714677246773467744677546776467774677846779467804678146782467834678446785467864678746788467894679046791467924679346794467954679646797467984679946800468014680246803468044680546806468074680846809468104681146812468134681446815468164681746818468194682046821468224682346824468254682646827468284682946830468314683246833468344683546836468374683846839468404684146842468434684446845468464684746848468494685046851468524685346854468554685646857468584685946860468614686246863468644686546866468674686846869468704687146872468734687446875468764687746878468794688046881468824688346884468854688646887468884688946890468914689246893468944689546896468974689846899469004690146902469034690446905469064690746908469094691046911469124691346914469154691646917469184691946920469214692246923469244692546926469274692846929469304693146932469334693446935469364693746938469394694046941469424694346944469454694646947469484694946950469514695246953469544695546956469574695846959469604696146962469634696446965469664696746968469694697046971469724697346974469754697646977469784697946980469814698246983469844698546986469874698846989469904699146992469934699446995469964699746998469994700047001470024700347004470054700647007470084700947010470114701247013470144701547016470174701847019470204702147022470234702447025470264702747028470294703047031470324703347034470354703647037470384703947040470414704247043470444704547046470474704847049470504705147052470534705447055470564705747058470594706047061470624706347064470654706647067470684706947070470714707247073470744707547076470774707847079470804708147082470834708447085470864708747088470894709047091470924709347094470954709647097470984709947100471014710247103471044710547106471074710847109471104711147112471134711447115471164711747118471194712047121471224712347124471254712647127471284712947130471314713247133471344713547136471374713847139471404714147142471434714447145471464714747148471494715047151471524715347154471554715647157471584715947160471614716247163471644716547166471674716847169471704717147172471734717447175471764717747178471794718047181471824718347184471854718647187471884718947190471914719247193471944719547196471974719847199472004720147202472034720447205472064720747208472094721047211472124721347214472154721647217472184721947220472214722247223472244722547226472274722847229472304723147232472334723447235472364723747238472394724047241472424724347244472454724647247472484724947250472514725247253472544725547256472574725847259472604726147262472634726447265472664726747268472694727047271472724727347274472754727647277472784727947280472814728247283472844728547286472874728847289472904729147292472934729447295472964729747298472994730047301473024730347304473054730647307473084730947310473114731247313473144731547316473174731847319473204732147322473234732447325473264732747328473294733047331473324733347334473354733647337473384733947340473414734247343473444734547346473474734847349473504735147352473534735447355473564735747358473594736047361473624736347364473654736647367473684736947370473714737247373473744737547376473774737847379473804738147382473834738447385473864738747388473894739047391473924739347394473954739647397473984739947400474014740247403474044740547406474074740847409474104741147412474134741447415474164741747418474194742047421474224742347424474254742647427474284742947430474314743247433474344743547436474374743847439474404744147442474434744447445474464744747448474494745047451474524745347454474554745647457474584745947460474614746247463474644746547466474674746847469474704747147472474734747447475474764747747478474794748047481474824748347484474854748647487474884748947490474914749247493474944749547496474974749847499475004750147502475034750447505475064750747508475094751047511475124751347514475154751647517475184751947520475214752247523475244752547526475274752847529475304753147532475334753447535475364753747538475394754047541475424754347544475454754647547475484754947550475514755247553475544755547556475574755847559475604756147562475634756447565475664756747568475694757047571475724757347574475754757647577475784757947580475814758247583475844758547586475874758847589475904759147592475934759447595475964759747598475994760047601476024760347604476054760647607476084760947610476114761247613476144761547616476174761847619476204762147622476234762447625476264762747628476294763047631476324763347634476354763647637476384763947640476414764247643476444764547646476474764847649476504765147652476534765447655476564765747658476594766047661476624766347664476654766647667476684766947670476714767247673476744767547676476774767847679476804768147682476834768447685476864768747688476894769047691476924769347694476954769647697476984769947700477014770247703477044770547706477074770847709477104771147712477134771447715477164771747718477194772047721477224772347724477254772647727477284772947730477314773247733477344773547736477374773847739477404774147742477434774447745477464774747748477494775047751477524775347754477554775647757477584775947760477614776247763477644776547766477674776847769477704777147772477734777447775477764777747778477794778047781477824778347784477854778647787477884778947790477914779247793477944779547796477974779847799478004780147802478034780447805478064780747808478094781047811478124781347814478154781647817478184781947820478214782247823478244782547826478274782847829478304783147832478334783447835478364783747838478394784047841478424784347844478454784647847478484784947850478514785247853478544785547856478574785847859478604786147862478634786447865478664786747868478694787047871478724787347874478754787647877478784787947880478814788247883478844788547886478874788847889478904789147892478934789447895478964789747898478994790047901479024790347904479054790647907479084790947910479114791247913479144791547916479174791847919479204792147922479234792447925479264792747928479294793047931479324793347934479354793647937479384793947940479414794247943479444794547946479474794847949479504795147952479534795447955479564795747958479594796047961479624796347964479654796647967479684796947970479714797247973479744797547976479774797847979479804798147982479834798447985479864798747988479894799047991479924799347994479954799647997479984799948000480014800248003480044800548006480074800848009480104801148012480134801448015480164801748018480194802048021480224802348024480254802648027480284802948030480314803248033480344803548036480374803848039480404804148042480434804448045480464804748048480494805048051480524805348054480554805648057480584805948060480614806248063480644806548066480674806848069480704807148072480734807448075480764807748078480794808048081480824808348084480854808648087480884808948090480914809248093480944809548096480974809848099481004810148102481034810448105481064810748108481094811048111481124811348114481154811648117481184811948120481214812248123481244812548126481274812848129481304813148132481334813448135481364813748138481394814048141481424814348144481454814648147481484814948150481514815248153481544815548156481574815848159481604816148162481634816448165481664816748168481694817048171481724817348174481754817648177481784817948180481814818248183481844818548186481874818848189481904819148192481934819448195481964819748198481994820048201482024820348204482054820648207482084820948210482114821248213482144821548216482174821848219482204822148222482234822448225482264822748228482294823048231482324823348234482354823648237482384823948240482414824248243482444824548246482474824848249482504825148252482534825448255482564825748258482594826048261482624826348264482654826648267482684826948270482714827248273482744827548276482774827848279482804828148282482834828448285482864828748288482894829048291482924829348294482954829648297482984829948300483014830248303483044830548306483074830848309483104831148312483134831448315483164831748318483194832048321483224832348324483254832648327483284832948330483314833248333483344833548336483374833848339483404834148342483434834448345483464834748348483494835048351483524835348354483554835648357483584835948360483614836248363483644836548366483674836848369483704837148372483734837448375483764837748378483794838048381483824838348384483854838648387483884838948390483914839248393483944839548396483974839848399484004840148402484034840448405484064840748408484094841048411484124841348414484154841648417484184841948420484214842248423484244842548426484274842848429484304843148432484334843448435484364843748438484394844048441484424844348444484454844648447484484844948450484514845248453484544845548456484574845848459484604846148462484634846448465484664846748468484694847048471484724847348474484754847648477484784847948480484814848248483484844848548486484874848848489484904849148492484934849448495484964849748498484994850048501485024850348504485054850648507485084850948510485114851248513485144851548516485174851848519485204852148522485234852448525485264852748528485294853048531485324853348534485354853648537485384853948540485414854248543485444854548546485474854848549485504855148552485534855448555485564855748558485594856048561485624856348564485654856648567485684856948570485714857248573485744857548576485774857848579485804858148582485834858448585485864858748588485894859048591485924859348594485954859648597485984859948600486014860248603486044860548606486074860848609486104861148612486134861448615486164861748618486194862048621486224862348624486254862648627486284862948630486314863248633486344863548636486374863848639486404864148642486434864448645486464864748648486494865048651486524865348654486554865648657486584865948660486614866248663486644866548666486674866848669486704867148672486734867448675486764867748678486794868048681486824868348684486854868648687486884868948690486914869248693486944869548696486974869848699487004870148702487034870448705487064870748708487094871048711487124871348714487154871648717487184871948720487214872248723487244872548726487274872848729487304873148732487334873448735487364873748738487394874048741487424874348744487454874648747487484874948750487514875248753487544875548756487574875848759487604876148762487634876448765487664876748768487694877048771487724877348774487754877648777487784877948780487814878248783487844878548786487874878848789487904879148792487934879448795487964879748798487994880048801488024880348804488054880648807488084880948810488114881248813488144881548816488174881848819488204882148822488234882448825488264882748828488294883048831488324883348834488354883648837488384883948840488414884248843488444884548846488474884848849488504885148852488534885448855488564885748858488594886048861488624886348864488654886648867488684886948870488714887248873488744887548876488774887848879488804888148882488834888448885488864888748888488894889048891488924889348894488954889648897488984889948900489014890248903489044890548906489074890848909489104891148912489134891448915489164891748918489194892048921489224892348924489254892648927489284892948930489314893248933489344893548936489374893848939489404894148942489434894448945489464894748948489494895048951489524895348954489554895648957489584895948960489614896248963489644896548966489674896848969489704897148972489734897448975489764897748978489794898048981489824898348984489854898648987489884898948990489914899248993489944899548996489974899848999490004900149002490034900449005490064900749008490094901049011490124901349014490154901649017490184901949020490214902249023490244902549026490274902849029490304903149032490334903449035490364903749038490394904049041490424904349044490454904649047490484904949050490514905249053490544905549056490574905849059490604906149062490634906449065490664906749068490694907049071490724907349074490754907649077490784907949080490814908249083490844908549086490874908849089490904909149092490934909449095490964909749098490994910049101491024910349104491054910649107491084910949110491114911249113491144911549116491174911849119491204912149122491234912449125491264912749128491294913049131491324913349134491354913649137491384913949140491414914249143491444914549146491474914849149491504915149152491534915449155491564915749158491594916049161491624916349164491654916649167491684916949170491714917249173491744917549176491774917849179491804918149182491834918449185491864918749188491894919049191491924919349194491954919649197491984919949200492014920249203492044920549206492074920849209492104921149212492134921449215492164921749218492194922049221492224922349224492254922649227492284922949230492314923249233492344923549236492374923849239492404924149242492434924449245492464924749248492494925049251492524925349254492554925649257492584925949260492614926249263492644926549266492674926849269492704927149272492734927449275492764927749278492794928049281492824928349284492854928649287492884928949290492914929249293492944929549296492974929849299493004930149302493034930449305493064930749308493094931049311493124931349314493154931649317493184931949320493214932249323493244932549326493274932849329493304933149332493334933449335493364933749338493394934049341493424934349344493454934649347493484934949350493514935249353493544935549356493574935849359493604936149362493634936449365493664936749368493694937049371493724937349374493754937649377493784937949380493814938249383493844938549386493874938849389493904939149392493934939449395493964939749398493994940049401494024940349404494054940649407494084940949410494114941249413494144941549416494174941849419494204942149422494234942449425494264942749428494294943049431494324943349434494354943649437494384943949440494414944249443494444944549446494474944849449494504945149452494534945449455494564945749458494594946049461494624946349464494654946649467494684946949470494714947249473494744947549476494774947849479494804948149482494834948449485494864948749488494894949049491494924949349494494954949649497494984949949500495014950249503495044950549506495074950849509495104951149512495134951449515495164951749518495194952049521495224952349524495254952649527495284952949530495314953249533495344953549536495374953849539495404954149542495434954449545495464954749548495494955049551495524955349554495554955649557495584955949560495614956249563495644956549566495674956849569495704957149572495734957449575495764957749578495794958049581495824958349584495854958649587495884958949590495914959249593495944959549596495974959849599496004960149602496034960449605496064960749608496094961049611496124961349614496154961649617496184961949620496214962249623496244962549626496274962849629496304963149632496334963449635496364963749638496394964049641496424964349644496454964649647496484964949650496514965249653496544965549656496574965849659496604966149662496634966449665496664966749668496694967049671496724967349674496754967649677496784967949680496814968249683496844968549686496874968849689496904969149692496934969449695496964969749698496994970049701497024970349704497054970649707497084970949710497114971249713497144971549716497174971849719497204972149722497234972449725497264972749728497294973049731497324973349734497354973649737497384973949740497414974249743497444974549746497474974849749497504975149752497534975449755497564975749758497594976049761497624976349764497654976649767497684976949770497714977249773497744977549776497774977849779497804978149782497834978449785497864978749788497894979049791497924979349794497954979649797497984979949800498014980249803498044980549806498074980849809498104981149812498134981449815498164981749818498194982049821498224982349824498254982649827498284982949830498314983249833498344983549836498374983849839498404984149842498434984449845498464984749848498494985049851498524985349854498554985649857498584985949860498614986249863498644986549866498674986849869498704987149872498734987449875498764987749878498794988049881498824988349884498854988649887498884988949890498914989249893498944989549896498974989849899499004990149902499034990449905499064990749908499094991049911499124991349914499154991649917499184991949920499214992249923499244992549926499274992849929499304993149932499334993449935499364993749938499394994049941499424994349944499454994649947499484994949950499514995249953499544995549956499574995849959499604996149962499634996449965499664996749968499694997049971499724997349974499754997649977499784997949980499814998249983499844998549986499874998849989499904999149992499934999449995499964999749998499995000050001500025000350004500055000650007500085000950010500115001250013500145001550016500175001850019500205002150022500235002450025500265002750028500295003050031500325003350034500355003650037500385003950040500415004250043500445004550046500475004850049500505005150052500535005450055500565005750058500595006050061500625006350064500655006650067500685006950070500715007250073500745007550076500775007850079500805008150082500835008450085500865008750088500895009050091500925009350094500955009650097500985009950100501015010250103501045010550106501075010850109501105011150112501135011450115501165011750118501195012050121501225012350124501255012650127501285012950130501315013250133501345013550136501375013850139501405014150142501435014450145501465014750148501495015050151501525015350154501555015650157501585015950160501615016250163501645016550166501675016850169501705017150172501735017450175501765017750178501795018050181501825018350184501855018650187501885018950190501915019250193501945019550196501975019850199502005020150202502035020450205502065020750208502095021050211502125021350214502155021650217502185021950220502215022250223502245022550226502275022850229502305023150232502335023450235502365023750238502395024050241502425024350244502455024650247502485024950250502515025250253502545025550256502575025850259502605026150262502635026450265502665026750268502695027050271502725027350274502755027650277502785027950280502815028250283502845028550286502875028850289502905029150292502935029450295502965029750298502995030050301503025030350304503055030650307503085030950310503115031250313503145031550316503175031850319503205032150322503235032450325503265032750328503295033050331503325033350334503355033650337503385033950340503415034250343503445034550346503475034850349503505035150352503535035450355503565035750358503595036050361503625036350364503655036650367503685036950370503715037250373503745037550376503775037850379503805038150382503835038450385503865038750388503895039050391503925039350394503955039650397503985039950400504015040250403504045040550406504075040850409504105041150412504135041450415504165041750418504195042050421504225042350424504255042650427504285042950430504315043250433504345043550436504375043850439504405044150442504435044450445504465044750448504495045050451504525045350454504555045650457504585045950460504615046250463504645046550466504675046850469504705047150472504735047450475504765047750478504795048050481504825048350484504855048650487504885048950490504915049250493504945049550496504975049850499505005050150502505035050450505505065050750508505095051050511505125051350514505155051650517505185051950520505215052250523505245052550526505275052850529505305053150532505335053450535505365053750538505395054050541505425054350544505455054650547505485054950550505515055250553505545055550556505575055850559505605056150562505635056450565505665056750568505695057050571505725057350574505755057650577505785057950580505815058250583505845058550586505875058850589505905059150592505935059450595505965059750598505995060050601506025060350604506055060650607506085060950610506115061250613506145061550616506175061850619506205062150622506235062450625506265062750628506295063050631506325063350634506355063650637506385063950640506415064250643506445064550646506475064850649506505065150652506535065450655506565065750658506595066050661506625066350664506655066650667506685066950670506715067250673506745067550676506775067850679506805068150682506835068450685506865068750688506895069050691506925069350694506955069650697506985069950700507015070250703507045070550706507075070850709507105071150712507135071450715507165071750718507195072050721507225072350724507255072650727507285072950730507315073250733507345073550736507375073850739507405074150742507435074450745507465074750748507495075050751507525075350754507555075650757507585075950760507615076250763507645076550766507675076850769507705077150772507735077450775507765077750778507795078050781507825078350784507855078650787507885078950790507915079250793507945079550796507975079850799508005080150802508035080450805508065080750808508095081050811508125081350814508155081650817508185081950820508215082250823508245082550826508275082850829508305083150832508335083450835508365083750838508395084050841508425084350844508455084650847508485084950850508515085250853508545085550856508575085850859508605086150862508635086450865508665086750868508695087050871508725087350874508755087650877508785087950880508815088250883508845088550886508875088850889508905089150892508935089450895508965089750898508995090050901509025090350904509055090650907509085090950910509115091250913509145091550916509175091850919509205092150922509235092450925509265092750928509295093050931509325093350934509355093650937509385093950940509415094250943509445094550946509475094850949509505095150952509535095450955509565095750958509595096050961509625096350964509655096650967509685096950970509715097250973509745097550976509775097850979509805098150982509835098450985509865098750988509895099050991509925099350994509955099650997509985099951000510015100251003510045100551006510075100851009510105101151012510135101451015510165101751018510195102051021510225102351024510255102651027510285102951030510315103251033510345103551036510375103851039510405104151042510435104451045510465104751048510495105051051510525105351054510555105651057510585105951060510615106251063510645106551066510675106851069510705107151072510735107451075510765107751078510795108051081510825108351084510855108651087510885108951090510915109251093510945109551096510975109851099511005110151102511035110451105511065110751108511095111051111511125111351114511155111651117511185111951120511215112251123511245112551126511275112851129511305113151132511335113451135511365113751138511395114051141511425114351144511455114651147511485114951150511515115251153511545115551156511575115851159511605116151162511635116451165511665116751168511695117051171511725117351174511755117651177511785117951180511815118251183511845118551186511875118851189511905119151192511935119451195511965119751198511995120051201512025120351204512055120651207512085120951210512115121251213512145121551216512175121851219512205122151222512235122451225512265122751228512295123051231512325123351234512355123651237512385123951240512415124251243512445124551246512475124851249512505125151252512535125451255512565125751258512595126051261512625126351264512655126651267512685126951270512715127251273512745127551276512775127851279512805128151282512835128451285512865128751288512895129051291512925129351294512955129651297512985129951300513015130251303513045130551306513075130851309513105131151312513135131451315513165131751318513195132051321513225132351324513255132651327513285132951330513315133251333513345133551336513375133851339513405134151342513435134451345513465134751348513495135051351513525135351354513555135651357513585135951360513615136251363513645136551366513675136851369513705137151372513735137451375513765137751378513795138051381513825138351384513855138651387513885138951390513915139251393513945139551396513975139851399514005140151402514035140451405514065140751408514095141051411514125141351414514155141651417514185141951420514215142251423514245142551426514275142851429514305143151432514335143451435514365143751438514395144051441514425144351444514455144651447514485144951450514515145251453514545145551456514575145851459514605146151462514635146451465514665146751468514695147051471514725147351474514755147651477514785147951480514815148251483514845148551486514875148851489514905149151492514935149451495514965149751498514995150051501515025150351504515055150651507515085150951510515115151251513515145151551516515175151851519515205152151522515235152451525515265152751528515295153051531515325153351534515355153651537515385153951540515415154251543515445154551546515475154851549515505155151552515535155451555515565155751558515595156051561515625156351564515655156651567515685156951570515715157251573515745157551576515775157851579515805158151582515835158451585515865158751588515895159051591515925159351594515955159651597515985159951600516015160251603516045160551606516075160851609516105161151612516135161451615516165161751618516195162051621516225162351624516255162651627516285162951630516315163251633516345163551636516375163851639516405164151642516435164451645516465164751648516495165051651516525165351654516555165651657516585165951660516615166251663516645166551666516675166851669516705167151672516735167451675516765167751678516795168051681516825168351684516855168651687516885168951690516915169251693516945169551696516975169851699517005170151702517035170451705517065170751708517095171051711517125171351714517155171651717517185171951720517215172251723517245172551726517275172851729517305173151732517335173451735517365173751738517395174051741517425174351744517455174651747517485174951750517515175251753517545175551756517575175851759517605176151762517635176451765517665176751768517695177051771517725177351774517755177651777517785177951780517815178251783517845178551786517875178851789517905179151792517935179451795517965179751798517995180051801518025180351804518055180651807518085180951810518115181251813518145181551816518175181851819518205182151822518235182451825518265182751828518295183051831518325183351834518355183651837518385183951840518415184251843518445184551846518475184851849518505185151852518535185451855518565185751858518595186051861518625186351864518655186651867518685186951870518715187251873518745187551876518775187851879518805188151882518835188451885518865188751888518895189051891518925189351894518955189651897518985189951900519015190251903519045190551906519075190851909519105191151912519135191451915519165191751918519195192051921519225192351924519255192651927519285192951930519315193251933519345193551936519375193851939519405194151942519435194451945519465194751948519495195051951519525195351954519555195651957519585195951960519615196251963519645196551966519675196851969519705197151972519735197451975519765197751978519795198051981519825198351984519855198651987519885198951990519915199251993519945199551996519975199851999520005200152002520035200452005520065200752008520095201052011520125201352014520155201652017520185201952020520215202252023520245202552026520275202852029520305203152032520335203452035520365203752038520395204052041520425204352044520455204652047520485204952050520515205252053520545205552056520575205852059520605206152062520635206452065520665206752068520695207052071520725207352074520755207652077520785207952080520815208252083520845208552086520875208852089520905209152092520935209452095520965209752098520995210052101521025210352104521055210652107521085210952110521115211252113521145211552116521175211852119521205212152122521235212452125521265212752128521295213052131521325213352134521355213652137521385213952140521415214252143521445214552146521475214852149521505215152152521535215452155521565215752158521595216052161521625216352164521655216652167521685216952170521715217252173521745217552176521775217852179521805218152182521835218452185521865218752188521895219052191521925219352194521955219652197521985219952200522015220252203522045220552206522075220852209522105221152212522135221452215522165221752218522195222052221522225222352224522255222652227522285222952230522315223252233522345223552236522375223852239522405224152242522435224452245522465224752248522495225052251522525225352254522555225652257522585225952260522615226252263522645226552266522675226852269522705227152272522735227452275522765227752278522795228052281522825228352284522855228652287522885228952290522915229252293522945229552296522975229852299523005230152302523035230452305523065230752308523095231052311523125231352314523155231652317523185231952320523215232252323523245232552326523275232852329523305233152332523335233452335523365233752338523395234052341523425234352344523455234652347523485234952350523515235252353523545235552356523575235852359523605236152362523635236452365523665236752368523695237052371523725237352374523755237652377523785237952380523815238252383523845238552386523875238852389523905239152392523935239452395523965239752398523995240052401524025240352404524055240652407524085240952410524115241252413524145241552416524175241852419524205242152422524235242452425524265242752428524295243052431524325243352434524355243652437524385243952440524415244252443524445244552446524475244852449524505245152452524535245452455524565245752458524595246052461524625246352464524655246652467524685246952470524715247252473524745247552476524775247852479524805248152482524835248452485524865248752488524895249052491524925249352494524955249652497524985249952500525015250252503525045250552506525075250852509525105251152512525135251452515525165251752518525195252052521525225252352524525255252652527525285252952530525315253252533525345253552536525375253852539525405254152542525435254452545525465254752548525495255052551525525255352554525555255652557525585255952560525615256252563525645256552566525675256852569525705257152572525735257452575525765257752578525795258052581525825258352584525855258652587525885258952590525915259252593525945259552596525975259852599526005260152602526035260452605526065260752608526095261052611526125261352614526155261652617526185261952620526215262252623526245262552626526275262852629526305263152632526335263452635526365263752638526395264052641526425264352644526455264652647526485264952650526515265252653526545265552656526575265852659526605266152662526635266452665526665266752668526695267052671526725267352674526755267652677526785267952680526815268252683526845268552686526875268852689526905269152692526935269452695526965269752698526995270052701527025270352704527055270652707527085270952710527115271252713527145271552716527175271852719527205272152722527235272452725527265272752728527295273052731527325273352734527355273652737527385273952740527415274252743527445274552746527475274852749527505275152752527535275452755527565275752758527595276052761527625276352764527655276652767527685276952770527715277252773527745277552776527775277852779527805278152782527835278452785527865278752788527895279052791527925279352794527955279652797527985279952800528015280252803528045280552806528075280852809528105281152812528135281452815528165281752818528195282052821528225282352824528255282652827528285282952830528315283252833528345283552836528375283852839528405284152842528435284452845528465284752848528495285052851528525285352854528555285652857528585285952860528615286252863528645286552866528675286852869528705287152872528735287452875528765287752878528795288052881528825288352884528855288652887528885288952890528915289252893528945289552896528975289852899529005290152902529035290452905529065290752908529095291052911529125291352914529155291652917529185291952920529215292252923529245292552926529275292852929529305293152932529335293452935529365293752938529395294052941529425294352944529455294652947529485294952950529515295252953529545295552956529575295852959529605296152962529635296452965529665296752968529695297052971529725297352974529755297652977529785297952980529815298252983529845298552986529875298852989529905299152992529935299452995529965299752998529995300053001530025300353004530055300653007530085300953010530115301253013530145301553016530175301853019530205302153022530235302453025530265302753028530295303053031530325303353034530355303653037530385303953040530415304253043530445304553046530475304853049530505305153052530535305453055530565305753058530595306053061530625306353064530655306653067530685306953070530715307253073530745307553076530775307853079530805308153082530835308453085530865308753088530895309053091530925309353094530955309653097530985309953100531015310253103531045310553106531075310853109531105311153112531135311453115531165311753118531195312053121531225312353124531255312653127531285312953130531315313253133531345313553136531375313853139531405314153142531435314453145531465314753148531495315053151531525315353154531555315653157531585315953160531615316253163531645316553166531675316853169531705317153172531735317453175531765317753178531795318053181531825318353184531855318653187531885318953190531915319253193531945319553196531975319853199532005320153202532035320453205532065320753208532095321053211532125321353214532155321653217532185321953220532215322253223532245322553226532275322853229532305323153232532335323453235532365323753238532395324053241532425324353244532455324653247532485324953250532515325253253532545325553256532575325853259532605326153262532635326453265532665326753268532695327053271532725327353274532755327653277532785327953280532815328253283532845328553286532875328853289532905329153292532935329453295532965329753298532995330053301533025330353304533055330653307533085330953310533115331253313533145331553316533175331853319533205332153322533235332453325533265332753328533295333053331533325333353334533355333653337533385333953340533415334253343533445334553346533475334853349533505335153352533535335453355533565335753358533595336053361533625336353364533655336653367533685336953370533715337253373533745337553376533775337853379533805338153382533835338453385533865338753388533895339053391533925339353394533955339653397533985339953400534015340253403534045340553406534075340853409534105341153412534135341453415534165341753418534195342053421534225342353424534255342653427534285342953430534315343253433534345343553436534375343853439534405344153442534435344453445534465344753448534495345053451534525345353454534555345653457534585345953460534615346253463534645346553466534675346853469534705347153472534735347453475534765347753478534795348053481534825348353484534855348653487534885348953490534915349253493534945349553496534975349853499535005350153502535035350453505535065350753508535095351053511535125351353514535155351653517535185351953520535215352253523535245352553526535275352853529535305353153532535335353453535535365353753538535395354053541535425354353544535455354653547535485354953550535515355253553535545355553556535575355853559535605356153562535635356453565535665356753568535695357053571535725357353574535755357653577535785357953580535815358253583535845358553586535875358853589535905359153592535935359453595535965359753598535995360053601536025360353604536055360653607536085360953610536115361253613536145361553616536175361853619536205362153622536235362453625536265362753628536295363053631536325363353634536355363653637536385363953640536415364253643536445364553646536475364853649536505365153652536535365453655536565365753658536595366053661536625366353664536655366653667536685366953670536715367253673536745367553676536775367853679536805368153682536835368453685536865368753688536895369053691536925369353694536955369653697536985369953700537015370253703537045370553706537075370853709537105371153712537135371453715537165371753718537195372053721537225372353724537255372653727537285372953730537315373253733537345373553736537375373853739537405374153742537435374453745537465374753748537495375053751537525375353754537555375653757537585375953760537615376253763537645376553766537675376853769537705377153772537735377453775537765377753778537795378053781537825378353784537855378653787537885378953790537915379253793537945379553796537975379853799538005380153802538035380453805538065380753808538095381053811538125381353814538155381653817538185381953820538215382253823538245382553826538275382853829538305383153832538335383453835538365383753838538395384053841538425384353844538455384653847538485384953850538515385253853538545385553856538575385853859538605386153862538635386453865538665386753868538695387053871538725387353874538755387653877538785387953880538815388253883538845388553886538875388853889538905389153892538935389453895538965389753898538995390053901539025390353904539055390653907539085390953910539115391253913539145391553916539175391853919539205392153922539235392453925539265392753928539295393053931539325393353934539355393653937539385393953940539415394253943539445394553946539475394853949539505395153952539535395453955539565395753958539595396053961539625396353964539655396653967539685396953970539715397253973539745397553976539775397853979539805398153982539835398453985539865398753988539895399053991539925399353994539955399653997539985399954000540015400254003540045400554006540075400854009540105401154012540135401454015540165401754018540195402054021540225402354024540255402654027540285402954030540315403254033540345403554036540375403854039540405404154042540435404454045540465404754048540495405054051540525405354054540555405654057540585405954060540615406254063540645406554066540675406854069540705407154072540735407454075540765407754078540795408054081540825408354084540855408654087540885408954090540915409254093540945409554096540975409854099541005410154102541035410454105541065410754108541095411054111541125411354114541155411654117541185411954120541215412254123541245412554126541275412854129541305413154132541335413454135541365413754138541395414054141541425414354144541455414654147541485414954150541515415254153541545415554156541575415854159541605416154162541635416454165541665416754168541695417054171541725417354174541755417654177541785417954180541815418254183541845418554186541875418854189541905419154192541935419454195541965419754198541995420054201542025420354204542055420654207542085420954210542115421254213542145421554216542175421854219542205422154222542235422454225542265422754228542295423054231542325423354234542355423654237542385423954240542415424254243542445424554246542475424854249542505425154252542535425454255542565425754258542595426054261542625426354264542655426654267542685426954270542715427254273542745427554276542775427854279542805428154282542835428454285542865428754288542895429054291542925429354294542955429654297542985429954300543015430254303543045430554306543075430854309543105431154312543135431454315543165431754318543195432054321543225432354324543255432654327543285432954330543315433254333543345433554336543375433854339543405434154342543435434454345543465434754348543495435054351543525435354354543555435654357543585435954360543615436254363543645436554366543675436854369543705437154372543735437454375543765437754378543795438054381543825438354384543855438654387543885438954390543915439254393543945439554396543975439854399544005440154402544035440454405544065440754408544095441054411544125441354414544155441654417544185441954420544215442254423544245442554426544275442854429544305443154432544335443454435544365443754438544395444054441544425444354444544455444654447544485444954450544515445254453544545445554456544575445854459544605446154462544635446454465544665446754468544695447054471544725447354474544755447654477544785447954480544815448254483544845448554486544875448854489544905449154492544935449454495544965449754498544995450054501545025450354504545055450654507545085450954510545115451254513545145451554516545175451854519545205452154522545235452454525545265452754528545295453054531545325453354534545355453654537545385453954540545415454254543545445454554546545475454854549545505455154552545535455454555545565455754558545595456054561545625456354564545655456654567545685456954570545715457254573545745457554576545775457854579545805458154582545835458454585545865458754588545895459054591545925459354594545955459654597545985459954600546015460254603546045460554606546075460854609546105461154612546135461454615546165461754618546195462054621546225462354624546255462654627546285462954630546315463254633546345463554636546375463854639546405464154642546435464454645546465464754648546495465054651546525465354654546555465654657546585465954660546615466254663546645466554666546675466854669546705467154672546735467454675546765467754678546795468054681546825468354684546855468654687546885468954690546915469254693546945469554696546975469854699547005470154702547035470454705547065470754708547095471054711547125471354714547155471654717547185471954720547215472254723547245472554726547275472854729547305473154732547335473454735547365473754738547395474054741547425474354744547455474654747547485474954750547515475254753547545475554756547575475854759547605476154762547635476454765547665476754768547695477054771547725477354774547755477654777547785477954780547815478254783547845478554786547875478854789547905479154792547935479454795547965479754798547995480054801548025480354804548055480654807548085480954810548115481254813548145481554816548175481854819548205482154822548235482454825548265482754828548295483054831548325483354834548355483654837548385483954840548415484254843548445484554846548475484854849548505485154852548535485454855548565485754858548595486054861548625486354864548655486654867548685486954870548715487254873548745487554876548775487854879548805488154882548835488454885548865488754888548895489054891548925489354894548955489654897548985489954900549015490254903549045490554906549075490854909549105491154912549135491454915549165491754918549195492054921549225492354924549255492654927549285492954930549315493254933549345493554936549375493854939549405494154942549435494454945549465494754948549495495054951549525495354954549555495654957549585495954960549615496254963549645496554966549675496854969549705497154972549735497454975549765497754978549795498054981549825498354984549855498654987549885498954990549915499254993549945499554996549975499854999550005500155002550035500455005550065500755008550095501055011550125501355014550155501655017550185501955020550215502255023550245502555026550275502855029550305503155032550335503455035550365503755038550395504055041550425504355044550455504655047550485504955050550515505255053550545505555056550575505855059550605506155062550635506455065550665506755068550695507055071550725507355074550755507655077550785507955080550815508255083550845508555086550875508855089550905509155092550935509455095550965509755098550995510055101551025510355104551055510655107551085510955110551115511255113551145511555116551175511855119551205512155122551235512455125551265512755128551295513055131551325513355134551355513655137551385513955140551415514255143551445514555146551475514855149551505515155152551535515455155551565515755158551595516055161551625516355164551655516655167551685516955170551715517255173551745517555176551775517855179551805518155182551835518455185551865518755188551895519055191551925519355194551955519655197551985519955200552015520255203552045520555206552075520855209552105521155212552135521455215552165521755218552195522055221552225522355224552255522655227552285522955230552315523255233552345523555236552375523855239552405524155242552435524455245552465524755248552495525055251552525525355254552555525655257552585525955260552615526255263552645526555266552675526855269552705527155272552735527455275552765527755278552795528055281552825528355284552855528655287552885528955290552915529255293552945529555296552975529855299553005530155302553035530455305553065530755308553095531055311553125531355314553155531655317553185531955320553215532255323553245532555326553275532855329553305533155332553335533455335553365533755338553395534055341553425534355344553455534655347553485534955350553515535255353553545535555356553575535855359553605536155362553635536455365553665536755368553695537055371553725537355374553755537655377553785537955380553815538255383553845538555386553875538855389553905539155392553935539455395553965539755398553995540055401554025540355404554055540655407554085540955410554115541255413554145541555416554175541855419554205542155422554235542455425554265542755428554295543055431554325543355434554355543655437554385543955440554415544255443554445544555446554475544855449554505545155452554535545455455554565545755458554595546055461554625546355464554655546655467554685546955470554715547255473554745547555476554775547855479554805548155482554835548455485554865548755488554895549055491554925549355494554955549655497554985549955500555015550255503555045550555506555075550855509555105551155512555135551455515555165551755518555195552055521555225552355524555255552655527555285552955530555315553255533555345553555536555375553855539555405554155542555435554455545555465554755548555495555055551555525555355554555555555655557555585555955560555615556255563555645556555566555675556855569555705557155572555735557455575555765557755578555795558055581555825558355584555855558655587555885558955590555915559255593555945559555596555975559855599556005560155602556035560455605556065560755608556095561055611556125561355614556155561655617556185561955620556215562255623556245562555626556275562855629556305563155632556335563455635556365563755638556395564055641556425564355644556455564655647556485564955650556515565255653556545565555656556575565855659556605566155662556635566455665556665566755668556695567055671556725567355674556755567655677556785567955680556815568255683556845568555686556875568855689556905569155692556935569455695556965569755698556995570055701557025570355704557055570655707557085570955710557115571255713557145571555716557175571855719557205572155722557235572455725557265572755728557295573055731557325573355734557355573655737557385573955740557415574255743557445574555746557475574855749557505575155752557535575455755557565575755758557595576055761557625576355764557655576655767557685576955770557715577255773557745577555776557775577855779557805578155782557835578455785557865578755788557895579055791557925579355794557955579655797557985579955800558015580255803558045580555806558075580855809558105581155812558135581455815558165581755818558195582055821558225582355824558255582655827558285582955830558315583255833558345583555836558375583855839558405584155842558435584455845558465584755848558495585055851558525585355854558555585655857558585585955860558615586255863558645586555866558675586855869558705587155872558735587455875558765587755878558795588055881558825588355884558855588655887558885588955890558915589255893558945589555896558975589855899559005590155902559035590455905559065590755908559095591055911559125591355914559155591655917559185591955920559215592255923559245592555926559275592855929559305593155932559335593455935559365593755938559395594055941559425594355944559455594655947559485594955950559515595255953559545595555956559575595855959559605596155962559635596455965559665596755968559695597055971559725597355974559755597655977559785597955980559815598255983559845598555986559875598855989559905599155992559935599455995559965599755998559995600056001560025600356004560055600656007560085600956010560115601256013560145601556016560175601856019560205602156022560235602456025560265602756028560295603056031560325603356034560355603656037560385603956040560415604256043560445604556046560475604856049560505605156052560535605456055560565605756058560595606056061560625606356064560655606656067560685606956070560715607256073560745607556076560775607856079560805608156082560835608456085560865608756088560895609056091560925609356094560955609656097560985609956100561015610256103561045610556106561075610856109561105611156112561135611456115561165611756118561195612056121561225612356124561255612656127561285612956130561315613256133561345613556136561375613856139561405614156142561435614456145561465614756148561495615056151561525615356154561555615656157561585615956160561615616256163561645616556166561675616856169561705617156172561735617456175561765617756178561795618056181561825618356184561855618656187561885618956190561915619256193561945619556196561975619856199562005620156202562035620456205562065620756208562095621056211562125621356214562155621656217562185621956220562215622256223562245622556226562275622856229562305623156232562335623456235562365623756238562395624056241562425624356244562455624656247562485624956250562515625256253562545625556256562575625856259562605626156262562635626456265562665626756268562695627056271562725627356274562755627656277562785627956280562815628256283562845628556286562875628856289562905629156292562935629456295562965629756298562995630056301563025630356304563055630656307563085630956310563115631256313563145631556316563175631856319563205632156322563235632456325563265632756328563295633056331563325633356334563355633656337563385633956340563415634256343563445634556346563475634856349563505635156352563535635456355563565635756358563595636056361563625636356364563655636656367563685636956370563715637256373563745637556376563775637856379563805638156382563835638456385563865638756388563895639056391563925639356394563955639656397563985639956400564015640256403564045640556406564075640856409564105641156412564135641456415564165641756418564195642056421564225642356424564255642656427564285642956430564315643256433564345643556436564375643856439564405644156442564435644456445564465644756448564495645056451564525645356454564555645656457564585645956460564615646256463564645646556466564675646856469564705647156472564735647456475564765647756478564795648056481564825648356484564855648656487564885648956490564915649256493564945649556496564975649856499565005650156502565035650456505565065650756508565095651056511565125651356514565155651656517565185651956520565215652256523565245652556526565275652856529565305653156532565335653456535565365653756538565395654056541565425654356544565455654656547565485654956550565515655256553565545655556556565575655856559565605656156562565635656456565565665656756568565695657056571565725657356574565755657656577565785657956580565815658256583565845658556586565875658856589565905659156592565935659456595565965659756598565995660056601566025660356604566055660656607566085660956610566115661256613566145661556616566175661856619566205662156622566235662456625566265662756628566295663056631566325663356634566355663656637566385663956640566415664256643566445664556646566475664856649566505665156652566535665456655566565665756658566595666056661566625666356664566655666656667566685666956670566715667256673566745667556676566775667856679566805668156682566835668456685566865668756688566895669056691566925669356694566955669656697566985669956700567015670256703567045670556706567075670856709567105671156712567135671456715567165671756718567195672056721567225672356724567255672656727567285672956730567315673256733567345673556736567375673856739567405674156742567435674456745567465674756748567495675056751567525675356754567555675656757567585675956760567615676256763567645676556766567675676856769567705677156772567735677456775567765677756778567795678056781567825678356784567855678656787567885678956790567915679256793567945679556796567975679856799568005680156802568035680456805568065680756808568095681056811568125681356814568155681656817568185681956820568215682256823568245682556826568275682856829568305683156832568335683456835568365683756838568395684056841568425684356844568455684656847568485684956850568515685256853568545685556856568575685856859568605686156862568635686456865568665686756868568695687056871568725687356874568755687656877568785687956880568815688256883568845688556886568875688856889568905689156892568935689456895568965689756898568995690056901569025690356904569055690656907569085690956910569115691256913569145691556916569175691856919569205692156922569235692456925569265692756928569295693056931569325693356934569355693656937569385693956940569415694256943569445694556946569475694856949569505695156952569535695456955569565695756958569595696056961569625696356964569655696656967569685696956970569715697256973569745697556976569775697856979569805698156982569835698456985569865698756988569895699056991569925699356994569955699656997569985699957000570015700257003570045700557006570075700857009570105701157012570135701457015570165701757018570195702057021570225702357024570255702657027570285702957030570315703257033570345703557036570375703857039570405704157042570435704457045570465704757048570495705057051570525705357054570555705657057570585705957060570615706257063570645706557066570675706857069570705707157072570735707457075570765707757078570795708057081570825708357084570855708657087570885708957090570915709257093570945709557096570975709857099571005710157102571035710457105571065710757108571095711057111571125711357114571155711657117571185711957120571215712257123571245712557126571275712857129571305713157132571335713457135571365713757138571395714057141571425714357144571455714657147571485714957150571515715257153571545715557156571575715857159571605716157162571635716457165571665716757168571695717057171571725717357174571755717657177571785717957180571815718257183571845718557186571875718857189571905719157192571935719457195571965719757198571995720057201572025720357204572055720657207572085720957210572115721257213572145721557216572175721857219572205722157222572235722457225572265722757228572295723057231572325723357234572355723657237572385723957240572415724257243572445724557246572475724857249572505725157252572535725457255572565725757258572595726057261572625726357264572655726657267572685726957270572715727257273572745727557276572775727857279572805728157282572835728457285572865728757288572895729057291572925729357294572955729657297572985729957300573015730257303573045730557306573075730857309573105731157312573135731457315573165731757318573195732057321573225732357324573255732657327573285732957330573315733257333573345733557336573375733857339573405734157342573435734457345573465734757348573495735057351573525735357354573555735657357573585735957360573615736257363573645736557366573675736857369573705737157372573735737457375573765737757378573795738057381573825738357384573855738657387573885738957390573915739257393573945739557396573975739857399574005740157402574035740457405574065740757408574095741057411574125741357414574155741657417574185741957420574215742257423574245742557426574275742857429574305743157432574335743457435574365743757438574395744057441574425744357444574455744657447574485744957450574515745257453574545745557456574575745857459574605746157462574635746457465574665746757468574695747057471574725747357474574755747657477574785747957480574815748257483574845748557486574875748857489574905749157492574935749457495574965749757498574995750057501575025750357504575055750657507575085750957510575115751257513575145751557516575175751857519575205752157522575235752457525575265752757528575295753057531575325753357534575355753657537575385753957540575415754257543575445754557546575475754857549575505755157552575535755457555575565755757558575595756057561575625756357564575655756657567575685756957570575715757257573575745757557576575775757857579575805758157582575835758457585575865758757588575895759057591575925759357594575955759657597575985759957600576015760257603576045760557606576075760857609576105761157612576135761457615576165761757618576195762057621576225762357624576255762657627576285762957630576315763257633576345763557636576375763857639576405764157642576435764457645576465764757648576495765057651576525765357654576555765657657576585765957660576615766257663576645766557666576675766857669576705767157672576735767457675576765767757678576795768057681576825768357684576855768657687576885768957690576915769257693576945769557696576975769857699577005770157702577035770457705577065770757708577095771057711577125771357714577155771657717577185771957720577215772257723577245772557726577275772857729577305773157732577335773457735577365773757738577395774057741577425774357744577455774657747577485774957750577515775257753577545775557756577575775857759577605776157762577635776457765577665776757768577695777057771577725777357774577755777657777577785777957780577815778257783577845778557786577875778857789577905779157792577935779457795577965779757798577995780057801578025780357804578055780657807578085780957810578115781257813578145781557816578175781857819578205782157822578235782457825578265782757828578295783057831578325783357834578355783657837578385783957840578415784257843578445784557846578475784857849578505785157852578535785457855578565785757858578595786057861578625786357864578655786657867578685786957870578715787257873578745787557876578775787857879578805788157882578835788457885578865788757888578895789057891578925789357894578955789657897578985789957900579015790257903579045790557906579075790857909579105791157912579135791457915579165791757918579195792057921579225792357924579255792657927579285792957930579315793257933579345793557936579375793857939579405794157942579435794457945579465794757948579495795057951579525795357954579555795657957579585795957960579615796257963579645796557966579675796857969579705797157972579735797457975579765797757978579795798057981579825798357984579855798657987579885798957990579915799257993579945799557996579975799857999580005800158002580035800458005580065800758008580095801058011580125801358014580155801658017580185801958020580215802258023580245802558026580275802858029580305803158032580335803458035580365803758038580395804058041580425804358044580455804658047580485804958050580515805258053580545805558056580575805858059580605806158062580635806458065580665806758068580695807058071580725807358074580755807658077580785807958080580815808258083580845808558086580875808858089580905809158092580935809458095580965809758098580995810058101581025810358104581055810658107581085810958110581115811258113581145811558116581175811858119581205812158122581235812458125581265812758128581295813058131581325813358134581355813658137581385813958140581415814258143581445814558146581475814858149581505815158152581535815458155581565815758158581595816058161581625816358164581655816658167581685816958170581715817258173581745817558176581775817858179581805818158182581835818458185581865818758188581895819058191581925819358194581955819658197581985819958200582015820258203582045820558206582075820858209582105821158212582135821458215582165821758218582195822058221582225822358224582255822658227582285822958230582315823258233582345823558236582375823858239582405824158242582435824458245582465824758248582495825058251582525825358254582555825658257582585825958260582615826258263582645826558266582675826858269582705827158272582735827458275582765827758278582795828058281582825828358284582855828658287582885828958290582915829258293582945829558296582975829858299583005830158302583035830458305583065830758308583095831058311583125831358314583155831658317583185831958320583215832258323583245832558326583275832858329583305833158332583335833458335583365833758338583395834058341583425834358344583455834658347583485834958350583515835258353583545835558356583575835858359583605836158362583635836458365583665836758368583695837058371583725837358374583755837658377583785837958380583815838258383583845838558386583875838858389583905839158392583935839458395583965839758398583995840058401584025840358404584055840658407584085840958410584115841258413584145841558416584175841858419584205842158422584235842458425584265842758428584295843058431584325843358434584355843658437584385843958440584415844258443584445844558446584475844858449584505845158452584535845458455584565845758458584595846058461584625846358464584655846658467584685846958470584715847258473584745847558476584775847858479584805848158482584835848458485584865848758488584895849058491584925849358494584955849658497584985849958500585015850258503585045850558506585075850858509585105851158512585135851458515585165851758518585195852058521585225852358524585255852658527585285852958530585315853258533585345853558536585375853858539585405854158542585435854458545585465854758548585495855058551585525855358554585555855658557585585855958560585615856258563585645856558566585675856858569585705857158572585735857458575585765857758578585795858058581585825858358584585855858658587585885858958590585915859258593585945859558596585975859858599586005860158602586035860458605586065860758608586095861058611586125861358614586155861658617586185861958620586215862258623586245862558626586275862858629586305863158632586335863458635586365863758638586395864058641586425864358644586455864658647586485864958650586515865258653586545865558656586575865858659586605866158662586635866458665586665866758668586695867058671586725867358674586755867658677586785867958680586815868258683586845868558686586875868858689586905869158692586935869458695586965869758698586995870058701587025870358704587055870658707587085870958710587115871258713587145871558716587175871858719587205872158722587235872458725587265872758728587295873058731587325873358734587355873658737587385873958740587415874258743587445874558746587475874858749587505875158752587535875458755587565875758758587595876058761587625876358764587655876658767587685876958770587715877258773587745877558776587775877858779587805878158782587835878458785587865878758788587895879058791587925879358794587955879658797587985879958800588015880258803588045880558806588075880858809588105881158812588135881458815588165881758818588195882058821588225882358824588255882658827588285882958830588315883258833588345883558836588375883858839588405884158842588435884458845588465884758848588495885058851588525885358854588555885658857588585885958860588615886258863588645886558866588675886858869588705887158872588735887458875588765887758878588795888058881588825888358884588855888658887588885888958890588915889258893588945889558896588975889858899589005890158902589035890458905589065890758908589095891058911589125891358914589155891658917589185891958920589215892258923589245892558926589275892858929589305893158932589335893458935589365893758938589395894058941589425894358944589455894658947589485894958950589515895258953589545895558956589575895858959589605896158962589635896458965589665896758968589695897058971589725897358974589755897658977589785897958980589815898258983589845898558986589875898858989589905899158992589935899458995589965899758998589995900059001590025900359004590055900659007590085900959010590115901259013590145901559016590175901859019590205902159022590235902459025590265902759028590295903059031590325903359034590355903659037590385903959040590415904259043590445904559046590475904859049590505905159052590535905459055590565905759058590595906059061590625906359064590655906659067590685906959070590715907259073590745907559076590775907859079590805908159082590835908459085590865908759088590895909059091590925909359094590955909659097590985909959100591015910259103591045910559106591075910859109591105911159112591135911459115591165911759118591195912059121591225912359124591255912659127591285912959130591315913259133591345913559136591375913859139591405914159142591435914459145591465914759148591495915059151591525915359154591555915659157591585915959160591615916259163591645916559166591675916859169591705917159172591735917459175591765917759178591795918059181591825918359184591855918659187591885918959190591915919259193591945919559196591975919859199592005920159202592035920459205592065920759208592095921059211592125921359214592155921659217592185921959220592215922259223592245922559226592275922859229592305923159232592335923459235592365923759238592395924059241592425924359244592455924659247592485924959250592515925259253592545925559256592575925859259592605926159262592635926459265592665926759268592695927059271592725927359274592755927659277592785927959280592815928259283592845928559286592875928859289592905929159292592935929459295592965929759298592995930059301593025930359304593055930659307593085930959310593115931259313593145931559316593175931859319593205932159322593235932459325593265932759328593295933059331593325933359334593355933659337593385933959340593415934259343593445934559346593475934859349593505935159352593535935459355593565935759358593595936059361593625936359364593655936659367593685936959370593715937259373593745937559376593775937859379593805938159382593835938459385593865938759388593895939059391593925939359394593955939659397593985939959400594015940259403594045940559406594075940859409594105941159412594135941459415594165941759418594195942059421594225942359424594255942659427594285942959430594315943259433594345943559436594375943859439594405944159442594435944459445594465944759448594495945059451594525945359454594555945659457594585945959460594615946259463594645946559466594675946859469594705947159472594735947459475594765947759478594795948059481594825948359484594855948659487594885948959490594915949259493594945949559496594975949859499595005950159502595035950459505595065950759508595095951059511595125951359514595155951659517595185951959520595215952259523595245952559526595275952859529595305953159532595335953459535595365953759538595395954059541595425954359544595455954659547595485954959550595515955259553595545955559556595575955859559595605956159562595635956459565595665956759568595695957059571595725957359574595755957659577595785957959580595815958259583595845958559586595875958859589595905959159592595935959459595595965959759598595995960059601596025960359604596055960659607596085960959610596115961259613596145961559616596175961859619596205962159622596235962459625596265962759628596295963059631596325963359634596355963659637596385963959640596415964259643596445964559646596475964859649596505965159652596535965459655596565965759658596595966059661596625966359664596655966659667596685966959670596715967259673596745967559676596775967859679596805968159682596835968459685596865968759688596895969059691596925969359694596955969659697596985969959700597015970259703597045970559706597075970859709597105971159712597135971459715597165971759718597195972059721597225972359724597255972659727597285972959730597315973259733597345973559736597375973859739597405974159742597435974459745597465974759748597495975059751597525975359754597555975659757597585975959760597615976259763597645976559766597675976859769597705977159772597735977459775597765977759778597795978059781597825978359784597855978659787597885978959790597915979259793597945979559796597975979859799598005980159802598035980459805598065980759808598095981059811598125981359814598155981659817598185981959820598215982259823598245982559826598275982859829598305983159832598335983459835598365983759838598395984059841598425984359844598455984659847598485984959850598515985259853598545985559856598575985859859598605986159862598635986459865598665986759868598695987059871598725987359874598755987659877598785987959880598815988259883598845988559886598875988859889598905989159892598935989459895598965989759898598995990059901599025990359904599055990659907599085990959910599115991259913599145991559916599175991859919599205992159922599235992459925599265992759928599295993059931599325993359934599355993659937599385993959940599415994259943599445994559946599475994859949599505995159952599535995459955599565995759958599595996059961599625996359964599655996659967599685996959970599715997259973599745997559976599775997859979599805998159982599835998459985599865998759988599895999059991599925999359994599955999659997599985999960000600016000260003600046000560006600076000860009600106001160012600136001460015600166001760018600196002060021600226002360024600256002660027600286002960030600316003260033600346003560036600376003860039600406004160042600436004460045600466004760048600496005060051600526005360054600556005660057600586005960060600616006260063600646006560066600676006860069600706007160072600736007460075600766007760078600796008060081600826008360084600856008660087600886008960090600916009260093600946009560096600976009860099601006010160102601036010460105601066010760108601096011060111601126011360114601156011660117601186011960120601216012260123601246012560126601276012860129601306013160132601336013460135601366013760138601396014060141601426014360144601456014660147601486014960150601516015260153601546015560156601576015860159601606016160162601636016460165601666016760168601696017060171601726017360174601756017660177601786017960180601816018260183601846018560186601876018860189601906019160192601936019460195601966019760198601996020060201602026020360204602056020660207602086020960210602116021260213602146021560216602176021860219602206022160222602236022460225602266022760228602296023060231602326023360234602356023660237602386023960240602416024260243602446024560246602476024860249602506025160252602536025460255602566025760258602596026060261602626026360264602656026660267602686026960270602716027260273602746027560276602776027860279602806028160282602836028460285602866028760288602896029060291602926029360294602956029660297602986029960300603016030260303603046030560306603076030860309603106031160312603136031460315603166031760318603196032060321603226032360324603256032660327603286032960330603316033260333603346033560336603376033860339603406034160342603436034460345603466034760348603496035060351603526035360354603556035660357603586035960360603616036260363603646036560366603676036860369603706037160372603736037460375603766037760378603796038060381603826038360384603856038660387603886038960390603916039260393603946039560396603976039860399604006040160402604036040460405604066040760408604096041060411604126041360414604156041660417604186041960420604216042260423604246042560426604276042860429604306043160432604336043460435604366043760438604396044060441604426044360444604456044660447604486044960450604516045260453604546045560456604576045860459604606046160462604636046460465604666046760468604696047060471604726047360474604756047660477604786047960480604816048260483604846048560486604876048860489604906049160492604936049460495604966049760498604996050060501605026050360504605056050660507605086050960510605116051260513605146051560516605176051860519605206052160522605236052460525605266052760528605296053060531605326053360534605356053660537605386053960540605416054260543605446054560546605476054860549605506055160552605536055460555605566055760558605596056060561605626056360564605656056660567605686056960570605716057260573605746057560576605776057860579605806058160582605836058460585605866058760588605896059060591605926059360594605956059660597605986059960600606016060260603606046060560606606076060860609606106061160612606136061460615606166061760618606196062060621606226062360624606256062660627606286062960630606316063260633606346063560636606376063860639606406064160642606436064460645606466064760648606496065060651606526065360654606556065660657606586065960660606616066260663606646066560666606676066860669606706067160672606736067460675606766067760678606796068060681606826068360684606856068660687606886068960690606916069260693606946069560696606976069860699607006070160702607036070460705607066070760708607096071060711607126071360714607156071660717607186071960720607216072260723607246072560726607276072860729607306073160732607336073460735607366073760738607396074060741607426074360744607456074660747607486074960750607516075260753607546075560756607576075860759607606076160762607636076460765607666076760768607696077060771607726077360774607756077660777607786077960780607816078260783607846078560786607876078860789607906079160792607936079460795607966079760798607996080060801608026080360804608056080660807608086080960810608116081260813608146081560816608176081860819608206082160822608236082460825608266082760828608296083060831608326083360834608356083660837608386083960840608416084260843608446084560846608476084860849608506085160852608536085460855608566085760858608596086060861608626086360864608656086660867608686086960870608716087260873608746087560876608776087860879608806088160882608836088460885608866088760888608896089060891608926089360894608956089660897608986089960900609016090260903609046090560906609076090860909609106091160912609136091460915609166091760918609196092060921609226092360924609256092660927609286092960930609316093260933609346093560936609376093860939609406094160942609436094460945609466094760948609496095060951609526095360954609556095660957609586095960960609616096260963609646096560966609676096860969609706097160972609736097460975609766097760978609796098060981609826098360984609856098660987609886098960990609916099260993609946099560996609976099860999610006100161002610036100461005610066100761008610096101061011610126101361014610156101661017610186101961020610216102261023610246102561026610276102861029610306103161032610336103461035610366103761038610396104061041610426104361044610456104661047610486104961050610516105261053610546105561056610576105861059610606106161062610636106461065610666106761068610696107061071610726107361074610756107661077610786107961080610816108261083610846108561086610876108861089610906109161092610936109461095610966109761098610996110061101611026110361104611056110661107611086110961110611116111261113611146111561116611176111861119611206112161122611236112461125611266112761128611296113061131611326113361134611356113661137611386113961140611416114261143611446114561146611476114861149611506115161152611536115461155611566115761158611596116061161611626116361164611656116661167611686116961170611716117261173611746117561176611776117861179611806118161182611836118461185611866118761188611896119061191611926119361194611956119661197611986119961200612016120261203612046120561206612076120861209612106121161212612136121461215612166121761218612196122061221612226122361224612256122661227612286122961230612316123261233612346123561236612376123861239612406124161242612436124461245612466124761248612496125061251612526125361254612556125661257612586125961260612616126261263612646126561266612676126861269612706127161272612736127461275612766127761278612796128061281612826128361284612856128661287612886128961290612916129261293612946129561296612976129861299613006130161302613036130461305613066130761308613096131061311613126131361314613156131661317613186131961320613216132261323613246132561326613276132861329613306133161332613336133461335613366133761338613396134061341613426134361344613456134661347613486134961350613516135261353613546135561356613576135861359613606136161362613636136461365613666136761368613696137061371613726137361374613756137661377613786137961380613816138261383613846138561386613876138861389613906139161392613936139461395613966139761398613996140061401614026140361404614056140661407614086140961410614116141261413614146141561416614176141861419614206142161422614236142461425614266142761428614296143061431614326143361434614356143661437614386143961440614416144261443614446144561446614476144861449614506145161452614536145461455614566145761458614596146061461614626146361464614656146661467614686146961470614716147261473614746147561476614776147861479614806148161482614836148461485614866148761488614896149061491614926149361494614956149661497614986149961500615016150261503615046150561506615076150861509615106151161512615136151461515615166151761518615196152061521615226152361524615256152661527615286152961530615316153261533615346153561536615376153861539615406154161542615436154461545615466154761548615496155061551615526155361554615556155661557615586155961560615616156261563615646156561566615676156861569615706157161572615736157461575615766157761578615796158061581615826158361584615856158661587615886158961590615916159261593615946159561596615976159861599616006160161602616036160461605616066160761608616096161061611616126161361614616156161661617616186161961620616216162261623616246162561626616276162861629616306163161632616336163461635616366163761638616396164061641616426164361644616456164661647616486164961650616516165261653616546165561656616576165861659616606166161662616636166461665616666166761668616696167061671616726167361674616756167661677616786167961680616816168261683616846168561686616876168861689616906169161692616936169461695616966169761698616996170061701617026170361704617056170661707617086170961710617116171261713617146171561716617176171861719617206172161722617236172461725617266172761728617296173061731617326173361734617356173661737617386173961740617416174261743617446174561746617476174861749617506175161752617536175461755617566175761758617596176061761617626176361764617656176661767617686176961770617716177261773617746177561776617776177861779617806178161782617836178461785617866178761788617896179061791617926179361794617956179661797617986179961800618016180261803618046180561806618076180861809618106181161812618136181461815618166181761818618196182061821618226182361824618256182661827618286182961830618316183261833618346183561836618376183861839618406184161842618436184461845618466184761848618496185061851618526185361854618556185661857618586185961860618616186261863618646186561866618676186861869618706187161872618736187461875618766187761878618796188061881618826188361884618856188661887618886188961890618916189261893618946189561896618976189861899619006190161902619036190461905619066190761908619096191061911619126191361914619156191661917619186191961920619216192261923619246192561926619276192861929619306193161932619336193461935619366193761938619396194061941619426194361944619456194661947619486194961950619516195261953619546195561956619576195861959619606196161962619636196461965619666196761968619696197061971619726197361974619756197661977619786197961980619816198261983619846198561986619876198861989619906199161992619936199461995619966199761998619996200062001620026200362004620056200662007620086200962010620116201262013620146201562016620176201862019620206202162022620236202462025620266202762028620296203062031620326203362034620356203662037620386203962040620416204262043620446204562046620476204862049620506205162052620536205462055620566205762058620596206062061620626206362064620656206662067620686206962070620716207262073620746207562076620776207862079620806208162082620836208462085620866208762088620896209062091620926209362094620956209662097620986209962100621016210262103621046210562106621076210862109621106211162112621136211462115621166211762118621196212062121621226212362124621256212662127621286212962130621316213262133621346213562136621376213862139621406214162142621436214462145621466214762148621496215062151621526215362154621556215662157621586215962160621616216262163621646216562166621676216862169621706217162172621736217462175621766217762178621796218062181621826218362184621856218662187621886218962190621916219262193621946219562196621976219862199622006220162202622036220462205622066220762208622096221062211622126221362214622156221662217622186221962220622216222262223622246222562226622276222862229622306223162232622336223462235622366223762238622396224062241622426224362244622456224662247622486224962250622516225262253622546225562256622576225862259622606226162262622636226462265622666226762268622696227062271622726227362274622756227662277622786227962280622816228262283622846228562286622876228862289622906229162292622936229462295622966229762298622996230062301623026230362304623056230662307623086230962310623116231262313623146231562316623176231862319623206232162322623236232462325623266232762328623296233062331623326233362334623356233662337623386233962340623416234262343623446234562346623476234862349623506235162352623536235462355623566235762358623596236062361623626236362364623656236662367623686236962370623716237262373623746237562376623776237862379623806238162382623836238462385623866238762388623896239062391623926239362394623956239662397623986239962400624016240262403624046240562406624076240862409624106241162412624136241462415624166241762418624196242062421624226242362424624256242662427624286242962430624316243262433624346243562436624376243862439624406244162442624436244462445624466244762448624496245062451624526245362454624556245662457624586245962460624616246262463624646246562466624676246862469624706247162472624736247462475624766247762478624796248062481624826248362484624856248662487624886248962490624916249262493624946249562496624976249862499625006250162502625036250462505625066250762508625096251062511625126251362514625156251662517625186251962520625216252262523625246252562526625276252862529625306253162532625336253462535625366253762538625396254062541625426254362544625456254662547625486254962550625516255262553625546255562556625576255862559625606256162562625636256462565625666256762568625696257062571625726257362574625756257662577625786257962580625816258262583625846258562586625876258862589625906259162592625936259462595625966259762598625996260062601626026260362604626056260662607626086260962610626116261262613626146261562616626176261862619626206262162622626236262462625626266262762628626296263062631626326263362634626356263662637626386263962640626416264262643626446264562646626476264862649626506265162652626536265462655626566265762658626596266062661626626266362664626656266662667626686266962670626716267262673626746267562676626776267862679626806268162682626836268462685626866268762688626896269062691626926269362694626956269662697626986269962700627016270262703627046270562706627076270862709627106271162712627136271462715627166271762718627196272062721627226272362724627256272662727627286272962730627316273262733627346273562736627376273862739627406274162742627436274462745627466274762748627496275062751627526275362754627556275662757627586275962760627616276262763627646276562766627676276862769627706277162772627736277462775627766277762778627796278062781627826278362784627856278662787627886278962790627916279262793627946279562796627976279862799628006280162802628036280462805628066280762808628096281062811628126281362814628156281662817628186281962820628216282262823628246282562826628276282862829628306283162832628336283462835628366283762838628396284062841628426284362844628456284662847628486284962850628516285262853628546285562856628576285862859628606286162862628636286462865628666286762868628696287062871628726287362874628756287662877628786287962880628816288262883628846288562886628876288862889628906289162892628936289462895628966289762898628996290062901629026290362904629056290662907629086290962910629116291262913629146291562916629176291862919629206292162922629236292462925629266292762928629296293062931629326293362934629356293662937629386293962940629416294262943629446294562946629476294862949629506295162952629536295462955629566295762958629596296062961629626296362964629656296662967629686296962970629716297262973629746297562976629776297862979629806298162982629836298462985629866298762988629896299062991629926299362994629956299662997629986299963000630016300263003630046300563006630076300863009630106301163012630136301463015630166301763018630196302063021630226302363024630256302663027630286302963030630316303263033630346303563036630376303863039630406304163042630436304463045630466304763048630496305063051630526305363054630556305663057630586305963060630616306263063630646306563066630676306863069630706307163072630736307463075630766307763078630796308063081630826308363084630856308663087630886308963090630916309263093630946309563096630976309863099631006310163102631036310463105631066310763108631096311063111631126311363114631156311663117631186311963120631216312263123631246312563126631276312863129631306313163132631336313463135631366313763138631396314063141631426314363144631456314663147631486314963150631516315263153631546315563156631576315863159631606316163162631636316463165631666316763168631696317063171631726317363174631756317663177631786317963180631816318263183631846318563186631876318863189631906319163192631936319463195631966319763198631996320063201632026320363204632056320663207632086320963210632116321263213632146321563216632176321863219632206322163222632236322463225632266322763228632296323063231632326323363234632356323663237632386323963240632416324263243632446324563246632476324863249632506325163252632536325463255632566325763258632596326063261632626326363264632656326663267632686326963270632716327263273632746327563276632776327863279632806328163282632836328463285632866328763288632896329063291632926329363294632956329663297632986329963300633016330263303633046330563306633076330863309633106331163312633136331463315633166331763318633196332063321633226332363324633256332663327633286332963330633316333263333633346333563336633376333863339633406334163342633436334463345633466334763348633496335063351633526335363354633556335663357633586335963360633616336263363633646336563366633676336863369633706337163372633736337463375633766337763378633796338063381633826338363384633856338663387633886338963390633916339263393633946339563396633976339863399634006340163402634036340463405634066340763408634096341063411634126341363414634156341663417634186341963420634216342263423634246342563426634276342863429634306343163432634336343463435634366343763438634396344063441634426344363444634456344663447634486344963450634516345263453634546345563456634576345863459634606346163462634636346463465634666346763468634696347063471634726347363474634756347663477634786347963480634816348263483634846348563486634876348863489634906349163492634936349463495634966349763498634996350063501635026350363504635056350663507635086350963510635116351263513635146351563516635176351863519635206352163522635236352463525635266352763528635296353063531635326353363534635356353663537635386353963540635416354263543635446354563546635476354863549635506355163552635536355463555635566355763558635596356063561635626356363564635656356663567635686356963570635716357263573635746357563576635776357863579635806358163582635836358463585635866358763588635896359063591635926359363594635956359663597635986359963600636016360263603636046360563606636076360863609636106361163612636136361463615636166361763618636196362063621636226362363624636256362663627636286362963630636316363263633636346363563636636376363863639636406364163642636436364463645636466364763648636496365063651636526365363654636556365663657636586365963660636616366263663636646366563666636676366863669636706367163672636736367463675636766367763678636796368063681636826368363684636856368663687636886368963690636916369263693636946369563696636976369863699637006370163702637036370463705637066370763708637096371063711637126371363714637156371663717637186371963720637216372263723637246372563726637276372863729637306373163732637336373463735637366373763738637396374063741637426374363744637456374663747637486374963750637516375263753637546375563756637576375863759637606376163762637636376463765637666376763768637696377063771637726377363774637756377663777637786377963780637816378263783637846378563786637876378863789637906379163792637936379463795637966379763798637996380063801638026380363804638056380663807638086380963810638116381263813638146381563816638176381863819638206382163822638236382463825638266382763828638296383063831638326383363834638356383663837638386383963840638416384263843638446384563846638476384863849638506385163852638536385463855638566385763858638596386063861638626386363864638656386663867638686386963870638716387263873638746387563876638776387863879638806388163882638836388463885638866388763888638896389063891638926389363894638956389663897638986389963900639016390263903639046390563906639076390863909639106391163912639136391463915639166391763918639196392063921639226392363924639256392663927639286392963930639316393263933639346393563936639376393863939639406394163942639436394463945639466394763948639496395063951639526395363954639556395663957639586395963960639616396263963639646396563966639676396863969639706397163972639736397463975639766397763978639796398063981639826398363984639856398663987639886398963990639916399263993639946399563996639976399863999640006400164002640036400464005640066400764008640096401064011640126401364014640156401664017640186401964020640216402264023640246402564026640276402864029640306403164032640336403464035640366403764038640396404064041640426404364044640456404664047640486404964050640516405264053640546405564056640576405864059640606406164062640636406464065640666406764068640696407064071640726407364074640756407664077640786407964080640816408264083640846408564086640876408864089640906409164092640936409464095640966409764098640996410064101641026410364104641056410664107641086410964110641116411264113641146411564116641176411864119641206412164122641236412464125641266412764128641296413064131641326413364134641356413664137641386413964140641416414264143641446414564146641476414864149641506415164152641536415464155641566415764158641596416064161641626416364164641656416664167641686416964170641716417264173641746417564176641776417864179641806418164182641836418464185641866418764188641896419064191641926419364194641956419664197641986419964200642016420264203642046420564206642076420864209642106421164212642136421464215642166421764218642196422064221642226422364224642256422664227642286422964230642316423264233642346423564236642376423864239642406424164242642436424464245642466424764248642496425064251642526425364254642556425664257642586425964260642616426264263642646426564266642676426864269642706427164272642736427464275642766427764278642796428064281642826428364284642856428664287642886428964290642916429264293642946429564296642976429864299643006430164302643036430464305643066430764308643096431064311643126431364314643156431664317643186431964320643216432264323643246432564326643276432864329643306433164332643336433464335643366433764338643396434064341643426434364344643456434664347643486434964350643516435264353643546435564356643576435864359643606436164362643636436464365643666436764368643696437064371643726437364374643756437664377643786437964380643816438264383643846438564386643876438864389643906439164392643936439464395643966439764398643996440064401644026440364404644056440664407644086440964410644116441264413644146441564416644176441864419644206442164422644236442464425644266442764428644296443064431644326443364434644356443664437644386443964440644416444264443644446444564446644476444864449644506445164452644536445464455644566445764458644596446064461644626446364464644656446664467644686446964470644716447264473644746447564476644776447864479644806448164482644836448464485644866448764488644896449064491644926449364494644956449664497644986449964500645016450264503645046450564506645076450864509645106451164512645136451464515645166451764518645196452064521645226452364524645256452664527645286452964530645316453264533645346453564536645376453864539645406454164542645436454464545645466454764548645496455064551645526455364554645556455664557645586455964560645616456264563645646456564566645676456864569645706457164572645736457464575645766457764578645796458064581645826458364584645856458664587645886458964590645916459264593645946459564596645976459864599646006460164602646036460464605646066460764608646096461064611646126461364614646156461664617646186461964620646216462264623646246462564626646276462864629646306463164632646336463464635646366463764638646396464064641646426464364644646456464664647646486464964650646516465264653646546465564656646576465864659646606466164662646636466464665646666466764668646696467064671646726467364674646756467664677646786467964680646816468264683646846468564686646876468864689646906469164692646936469464695646966469764698646996470064701647026470364704647056470664707647086470964710647116471264713647146471564716647176471864719647206472164722647236472464725647266472764728647296473064731647326473364734647356473664737647386473964740647416474264743647446474564746647476474864749647506475164752647536475464755647566475764758647596476064761647626476364764647656476664767647686476964770647716477264773647746477564776647776477864779647806478164782647836478464785647866478764788647896479064791647926479364794647956479664797647986479964800648016480264803648046480564806648076480864809648106481164812648136481464815648166481764818648196482064821648226482364824648256482664827648286482964830648316483264833648346483564836648376483864839648406484164842648436484464845648466484764848648496485064851648526485364854648556485664857648586485964860648616486264863648646486564866648676486864869648706487164872648736487464875648766487764878648796488064881648826488364884648856488664887648886488964890648916489264893648946489564896648976489864899649006490164902649036490464905649066490764908649096491064911649126491364914649156491664917649186491964920649216492264923649246492564926649276492864929649306493164932649336493464935649366493764938649396494064941649426494364944649456494664947649486494964950649516495264953649546495564956649576495864959649606496164962649636496464965649666496764968649696497064971649726497364974649756497664977649786497964980649816498264983649846498564986649876498864989649906499164992649936499464995649966499764998649996500065001650026500365004650056500665007650086500965010650116501265013650146501565016650176501865019650206502165022650236502465025650266502765028650296503065031650326503365034650356503665037650386503965040650416504265043650446504565046650476504865049650506505165052650536505465055650566505765058650596506065061650626506365064650656506665067650686506965070650716507265073650746507565076650776507865079650806508165082650836508465085650866508765088650896509065091650926509365094650956509665097650986509965100651016510265103651046510565106651076510865109651106511165112651136511465115651166511765118651196512065121651226512365124651256512665127651286512965130651316513265133651346513565136651376513865139651406514165142651436514465145651466514765148651496515065151651526515365154651556515665157651586515965160651616516265163651646516565166651676516865169651706517165172651736517465175651766517765178651796518065181651826518365184651856518665187651886518965190651916519265193651946519565196651976519865199652006520165202652036520465205652066520765208652096521065211652126521365214652156521665217652186521965220652216522265223652246522565226652276522865229652306523165232652336523465235652366523765238652396524065241652426524365244652456524665247652486524965250652516525265253652546525565256652576525865259652606526165262652636526465265652666526765268652696527065271652726527365274652756527665277652786527965280652816528265283652846528565286652876528865289652906529165292652936529465295652966529765298652996530065301653026530365304653056530665307653086530965310653116531265313653146531565316653176531865319653206532165322653236532465325653266532765328653296533065331653326533365334653356533665337653386533965340653416534265343653446534565346653476534865349653506535165352653536535465355653566535765358653596536065361653626536365364653656536665367653686536965370653716537265373653746537565376653776537865379653806538165382653836538465385653866538765388653896539065391653926539365394653956539665397653986539965400654016540265403654046540565406654076540865409654106541165412654136541465415654166541765418654196542065421654226542365424654256542665427654286542965430654316543265433654346543565436654376543865439654406544165442654436544465445654466544765448654496545065451654526545365454654556545665457654586545965460654616546265463654646546565466654676546865469654706547165472654736547465475654766547765478654796548065481654826548365484654856548665487654886548965490654916549265493654946549565496654976549865499655006550165502655036550465505655066550765508655096551065511655126551365514655156551665517655186551965520655216552265523655246552565526655276552865529655306553165532655336553465535655366553765538655396554065541655426554365544655456554665547655486554965550655516555265553655546555565556655576555865559655606556165562655636556465565655666556765568655696557065571655726557365574655756557665577655786557965580655816558265583655846558565586655876558865589655906559165592655936559465595655966559765598655996560065601656026560365604656056560665607656086560965610656116561265613656146561565616656176561865619656206562165622656236562465625656266562765628656296563065631656326563365634656356563665637656386563965640656416564265643656446564565646656476564865649656506565165652656536565465655656566565765658656596566065661656626566365664656656566665667656686566965670656716567265673656746567565676656776567865679656806568165682656836568465685656866568765688656896569065691656926569365694656956569665697656986569965700657016570265703657046570565706657076570865709657106571165712657136571465715657166571765718657196572065721657226572365724657256572665727657286572965730657316573265733657346573565736657376573865739657406574165742657436574465745657466574765748657496575065751657526575365754657556575665757657586575965760657616576265763657646576565766657676576865769657706577165772657736577465775657766577765778657796578065781657826578365784657856578665787657886578965790657916579265793657946579565796657976579865799658006580165802658036580465805658066580765808658096581065811658126581365814658156581665817658186581965820658216582265823658246582565826658276582865829658306583165832658336583465835658366583765838658396584065841658426584365844658456584665847658486584965850658516585265853658546585565856658576585865859658606586165862658636586465865658666586765868658696587065871658726587365874658756587665877658786587965880658816588265883658846588565886658876588865889658906589165892658936589465895658966589765898658996590065901659026590365904659056590665907659086590965910659116591265913659146591565916659176591865919659206592165922659236592465925659266592765928659296593065931659326593365934659356593665937659386593965940659416594265943659446594565946659476594865949659506595165952659536595465955659566595765958659596596065961659626596365964659656596665967659686596965970659716597265973659746597565976659776597865979659806598165982659836598465985659866598765988659896599065991659926599365994659956599665997659986599966000660016600266003660046600566006660076600866009660106601166012660136601466015660166601766018660196602066021660226602366024660256602666027660286602966030660316603266033660346603566036660376603866039660406604166042660436604466045660466604766048660496605066051660526605366054660556605666057660586605966060660616606266063660646606566066660676606866069660706607166072660736607466075660766607766078660796608066081660826608366084660856608666087660886608966090660916609266093660946609566096660976609866099661006610166102661036610466105661066610766108661096611066111661126611366114661156611666117661186611966120661216612266123661246612566126661276612866129661306613166132661336613466135661366613766138661396614066141661426614366144661456614666147661486614966150661516615266153661546615566156661576615866159661606616166162661636616466165661666616766168661696617066171661726617366174661756617666177661786617966180661816618266183661846618566186661876618866189661906619166192661936619466195661966619766198661996620066201662026620366204662056620666207662086620966210662116621266213662146621566216662176621866219662206622166222662236622466225662266622766228662296623066231662326623366234662356623666237662386623966240662416624266243662446624566246662476624866249662506625166252662536625466255662566625766258662596626066261662626626366264662656626666267662686626966270662716627266273662746627566276662776627866279662806628166282662836628466285662866628766288662896629066291662926629366294662956629666297662986629966300663016630266303663046630566306663076630866309663106631166312663136631466315663166631766318663196632066321663226632366324663256632666327663286632966330663316633266333663346633566336663376633866339663406634166342663436634466345663466634766348663496635066351663526635366354663556635666357663586635966360663616636266363663646636566366663676636866369663706637166372663736637466375663766637766378663796638066381663826638366384663856638666387663886638966390663916639266393663946639566396663976639866399664006640166402664036640466405664066640766408664096641066411664126641366414664156641666417664186641966420664216642266423664246642566426664276642866429664306643166432664336643466435664366643766438664396644066441664426644366444664456644666447664486644966450664516645266453664546645566456664576645866459664606646166462664636646466465664666646766468664696647066471664726647366474664756647666477664786647966480664816648266483664846648566486664876648866489664906649166492664936649466495664966649766498664996650066501665026650366504665056650666507665086650966510665116651266513665146651566516665176651866519665206652166522665236652466525665266652766528665296653066531665326653366534665356653666537665386653966540665416654266543665446654566546665476654866549665506655166552665536655466555665566655766558665596656066561665626656366564665656656666567665686656966570665716657266573665746657566576665776657866579665806658166582665836658466585665866658766588665896659066591665926659366594665956659666597665986659966600666016660266603666046660566606666076660866609666106661166612666136661466615666166661766618666196662066621666226662366624666256662666627666286662966630666316663266633666346663566636666376663866639666406664166642666436664466645666466664766648666496665066651666526665366654666556665666657666586665966660666616666266663666646666566666666676666866669666706667166672666736667466675666766667766678666796668066681666826668366684666856668666687666886668966690666916669266693666946669566696666976669866699667006670166702667036670466705667066670766708667096671066711667126671366714667156671666717667186671966720667216672266723667246672566726667276672866729667306673166732667336673466735667366673766738667396674066741667426674366744667456674666747667486674966750667516675266753667546675566756667576675866759667606676166762667636676466765667666676766768667696677066771667726677366774667756677666777667786677966780667816678266783667846678566786667876678866789667906679166792667936679466795667966679766798667996680066801668026680366804668056680666807668086680966810668116681266813668146681566816668176681866819668206682166822668236682466825668266682766828668296683066831668326683366834668356683666837668386683966840668416684266843668446684566846668476684866849668506685166852668536685466855668566685766858668596686066861668626686366864668656686666867668686686966870668716687266873668746687566876668776687866879668806688166882668836688466885668866688766888668896689066891668926689366894668956689666897668986689966900669016690266903669046690566906669076690866909669106691166912669136691466915669166691766918669196692066921669226692366924669256692666927669286692966930669316693266933669346693566936669376693866939669406694166942669436694466945669466694766948669496695066951669526695366954669556695666957669586695966960669616696266963669646696566966669676696866969669706697166972669736697466975669766697766978669796698066981669826698366984669856698666987669886698966990669916699266993669946699566996669976699866999670006700167002670036700467005670066700767008670096701067011670126701367014670156701667017670186701967020670216702267023670246702567026670276702867029670306703167032670336703467035670366703767038670396704067041670426704367044670456704667047670486704967050670516705267053670546705567056670576705867059670606706167062670636706467065670666706767068670696707067071670726707367074670756707667077670786707967080670816708267083670846708567086670876708867089670906709167092670936709467095670966709767098670996710067101671026710367104671056710667107671086710967110671116711267113671146711567116671176711867119671206712167122671236712467125671266712767128671296713067131671326713367134671356713667137671386713967140671416714267143671446714567146671476714867149671506715167152671536715467155671566715767158671596716067161671626716367164671656716667167671686716967170671716717267173671746717567176671776717867179671806718167182671836718467185671866718767188671896719067191671926719367194671956719667197671986719967200672016720267203672046720567206672076720867209672106721167212672136721467215672166721767218672196722067221672226722367224672256722667227672286722967230672316723267233672346723567236672376723867239672406724167242672436724467245672466724767248672496725067251672526725367254672556725667257672586725967260672616726267263672646726567266672676726867269672706727167272672736727467275672766727767278672796728067281672826728367284672856728667287672886728967290672916729267293672946729567296672976729867299673006730167302673036730467305673066730767308673096731067311673126731367314673156731667317673186731967320673216732267323673246732567326673276732867329673306733167332673336733467335673366733767338673396734067341673426734367344673456734667347673486734967350673516735267353673546735567356673576735867359673606736167362673636736467365673666736767368673696737067371673726737367374673756737667377673786737967380673816738267383673846738567386673876738867389673906739167392673936739467395673966739767398673996740067401674026740367404674056740667407674086740967410674116741267413674146741567416674176741867419674206742167422674236742467425674266742767428674296743067431674326743367434674356743667437674386743967440674416744267443674446744567446674476744867449674506745167452674536745467455674566745767458674596746067461674626746367464674656746667467674686746967470674716747267473674746747567476674776747867479674806748167482674836748467485674866748767488674896749067491674926749367494674956749667497674986749967500675016750267503675046750567506675076750867509675106751167512675136751467515675166751767518675196752067521675226752367524675256752667527675286752967530675316753267533675346753567536675376753867539675406754167542675436754467545675466754767548675496755067551675526755367554675556755667557675586755967560675616756267563675646756567566675676756867569675706757167572675736757467575675766757767578675796758067581675826758367584675856758667587675886758967590675916759267593675946759567596675976759867599676006760167602676036760467605676066760767608676096761067611676126761367614676156761667617676186761967620676216762267623676246762567626676276762867629676306763167632676336763467635676366763767638676396764067641676426764367644676456764667647676486764967650676516765267653676546765567656676576765867659676606766167662676636766467665676666766767668676696767067671676726767367674676756767667677676786767967680676816768267683676846768567686676876768867689676906769167692676936769467695676966769767698676996770067701677026770367704677056770667707677086770967710677116771267713677146771567716677176771867719677206772167722677236772467725677266772767728677296773067731677326773367734677356773667737677386773967740677416774267743677446774567746677476774867749677506775167752677536775467755677566775767758677596776067761677626776367764677656776667767677686776967770677716777267773677746777567776677776777867779677806778167782677836778467785677866778767788677896779067791677926779367794677956779667797677986779967800678016780267803678046780567806678076780867809678106781167812678136781467815678166781767818678196782067821678226782367824678256782667827678286782967830678316783267833678346783567836678376783867839678406784167842678436784467845678466784767848678496785067851678526785367854678556785667857678586785967860678616786267863678646786567866678676786867869678706787167872678736787467875678766787767878678796788067881678826788367884678856788667887678886788967890678916789267893678946789567896678976789867899679006790167902679036790467905679066790767908679096791067911679126791367914679156791667917679186791967920679216792267923679246792567926679276792867929679306793167932679336793467935679366793767938679396794067941679426794367944679456794667947679486794967950679516795267953679546795567956679576795867959679606796167962679636796467965679666796767968679696797067971679726797367974679756797667977679786797967980679816798267983679846798567986679876798867989679906799167992679936799467995679966799767998679996800068001680026800368004680056800668007680086800968010680116801268013680146801568016680176801868019680206802168022680236802468025680266802768028680296803068031680326803368034680356803668037680386803968040680416804268043680446804568046680476804868049680506805168052680536805468055680566805768058680596806068061680626806368064680656806668067680686806968070680716807268073680746807568076680776807868079680806808168082680836808468085680866808768088680896809068091680926809368094680956809668097680986809968100681016810268103681046810568106681076810868109681106811168112681136811468115681166811768118681196812068121681226812368124681256812668127681286812968130681316813268133681346813568136681376813868139681406814168142681436814468145681466814768148681496815068151681526815368154681556815668157681586815968160681616816268163681646816568166681676816868169681706817168172681736817468175681766817768178681796818068181681826818368184681856818668187681886818968190681916819268193681946819568196681976819868199682006820168202682036820468205682066820768208682096821068211682126821368214682156821668217682186821968220682216822268223682246822568226682276822868229682306823168232682336823468235682366823768238682396824068241682426824368244682456824668247682486824968250682516825268253682546825568256682576825868259682606826168262682636826468265682666826768268682696827068271682726827368274682756827668277682786827968280682816828268283682846828568286682876828868289682906829168292682936829468295682966829768298682996830068301683026830368304683056830668307683086830968310683116831268313683146831568316683176831868319683206832168322683236832468325683266832768328683296833068331683326833368334683356833668337683386833968340683416834268343683446834568346683476834868349683506835168352683536835468355683566835768358683596836068361683626836368364683656836668367683686836968370683716837268373683746837568376683776837868379683806838168382683836838468385683866838768388683896839068391683926839368394683956839668397683986839968400684016840268403684046840568406684076840868409684106841168412684136841468415684166841768418684196842068421684226842368424684256842668427684286842968430684316843268433684346843568436684376843868439684406844168442684436844468445684466844768448684496845068451684526845368454684556845668457684586845968460684616846268463684646846568466684676846868469684706847168472684736847468475684766847768478684796848068481684826848368484684856848668487684886848968490684916849268493684946849568496684976849868499685006850168502685036850468505685066850768508685096851068511685126851368514685156851668517685186851968520685216852268523685246852568526685276852868529685306853168532685336853468535685366853768538685396854068541685426854368544685456854668547685486854968550685516855268553685546855568556685576855868559685606856168562685636856468565685666856768568685696857068571685726857368574685756857668577685786857968580685816858268583685846858568586685876858868589685906859168592685936859468595685966859768598685996860068601686026860368604686056860668607686086860968610686116861268613686146861568616686176861868619686206862168622686236862468625686266862768628686296863068631686326863368634686356863668637686386863968640686416864268643686446864568646686476864868649686506865168652686536865468655686566865768658686596866068661686626866368664686656866668667686686866968670686716867268673686746867568676686776867868679686806868168682686836868468685686866868768688686896869068691686926869368694686956869668697686986869968700687016870268703687046870568706687076870868709687106871168712687136871468715687166871768718687196872068721687226872368724687256872668727687286872968730687316873268733687346873568736687376873868739687406874168742687436874468745687466874768748687496875068751687526875368754687556875668757687586875968760687616876268763687646876568766687676876868769687706877168772687736877468775687766877768778687796878068781687826878368784687856878668787687886878968790687916879268793687946879568796687976879868799688006880168802688036880468805688066880768808688096881068811688126881368814688156881668817688186881968820688216882268823688246882568826688276882868829688306883168832688336883468835688366883768838688396884068841688426884368844688456884668847688486884968850688516885268853688546885568856688576885868859688606886168862688636886468865688666886768868688696887068871688726887368874688756887668877688786887968880688816888268883688846888568886688876888868889688906889168892688936889468895688966889768898688996890068901689026890368904689056890668907689086890968910689116891268913689146891568916689176891868919689206892168922689236892468925689266892768928689296893068931689326893368934689356893668937689386893968940689416894268943689446894568946689476894868949689506895168952689536895468955689566895768958689596896068961689626896368964689656896668967689686896968970689716897268973689746897568976689776897868979689806898168982689836898468985689866898768988689896899068991689926899368994689956899668997689986899969000690016900269003690046900569006690076900869009690106901169012690136901469015690166901769018690196902069021690226902369024690256902669027690286902969030690316903269033690346903569036690376903869039690406904169042690436904469045690466904769048690496905069051690526905369054690556905669057690586905969060690616906269063690646906569066690676906869069690706907169072690736907469075690766907769078690796908069081690826908369084690856908669087690886908969090690916909269093690946909569096690976909869099691006910169102691036910469105691066910769108691096911069111691126911369114691156911669117691186911969120691216912269123691246912569126691276912869129691306913169132691336913469135691366913769138691396914069141691426914369144691456914669147691486914969150691516915269153691546915569156691576915869159691606916169162691636916469165691666916769168691696917069171691726917369174691756917669177691786917969180691816918269183691846918569186691876918869189691906919169192691936919469195691966919769198691996920069201692026920369204692056920669207692086920969210692116921269213692146921569216692176921869219692206922169222692236922469225692266922769228692296923069231692326923369234692356923669237692386923969240692416924269243692446924569246692476924869249692506925169252692536925469255692566925769258692596926069261692626926369264692656926669267692686926969270692716927269273692746927569276692776927869279692806928169282692836928469285692866928769288692896929069291692926929369294692956929669297692986929969300693016930269303693046930569306693076930869309693106931169312693136931469315693166931769318693196932069321693226932369324693256932669327693286932969330693316933269333693346933569336693376933869339693406934169342693436934469345693466934769348693496935069351693526935369354693556935669357693586935969360693616936269363693646936569366693676936869369693706937169372693736937469375693766937769378693796938069381693826938369384693856938669387693886938969390693916939269393693946939569396693976939869399694006940169402694036940469405694066940769408694096941069411694126941369414694156941669417694186941969420694216942269423694246942569426694276942869429694306943169432694336943469435694366943769438694396944069441694426944369444694456944669447694486944969450694516945269453694546945569456694576945869459694606946169462694636946469465694666946769468694696947069471694726947369474694756947669477694786947969480694816948269483694846948569486694876948869489694906949169492694936949469495694966949769498694996950069501695026950369504695056950669507695086950969510695116951269513695146951569516695176951869519695206952169522695236952469525695266952769528695296953069531695326953369534695356953669537695386953969540695416954269543695446954569546695476954869549695506955169552695536955469555695566955769558695596956069561695626956369564695656956669567695686956969570695716957269573695746957569576695776957869579695806958169582695836958469585695866958769588695896959069591695926959369594695956959669597695986959969600696016960269603696046960569606696076960869609696106961169612696136961469615696166961769618696196962069621696226962369624696256962669627696286962969630696316963269633696346963569636696376963869639696406964169642696436964469645696466964769648696496965069651696526965369654696556965669657696586965969660696616966269663696646966569666696676966869669696706967169672696736967469675696766967769678696796968069681696826968369684696856968669687696886968969690696916969269693696946969569696696976969869699697006970169702697036970469705697066970769708697096971069711697126971369714697156971669717697186971969720697216972269723697246972569726697276972869729697306973169732697336973469735697366973769738697396974069741697426974369744697456974669747697486974969750697516975269753697546975569756697576975869759697606976169762697636976469765697666976769768697696977069771697726977369774697756977669777697786977969780697816978269783697846978569786697876978869789697906979169792697936979469795697966979769798697996980069801698026980369804698056980669807698086980969810698116981269813698146981569816698176981869819698206982169822698236982469825698266982769828698296983069831698326983369834698356983669837698386983969840698416984269843698446984569846698476984869849698506985169852698536985469855698566985769858698596986069861698626986369864698656986669867698686986969870698716987269873698746987569876698776987869879698806988169882698836988469885698866988769888698896989069891698926989369894698956989669897698986989969900699016990269903699046990569906699076990869909699106991169912699136991469915699166991769918699196992069921699226992369924699256992669927699286992969930699316993269933699346993569936699376993869939699406994169942699436994469945699466994769948699496995069951699526995369954699556995669957699586995969960699616996269963699646996569966699676996869969699706997169972699736997469975699766997769978699796998069981699826998369984699856998669987699886998969990699916999269993699946999569996699976999869999700007000170002700037000470005700067000770008700097001070011700127001370014700157001670017700187001970020700217002270023700247002570026700277002870029700307003170032700337003470035700367003770038700397004070041700427004370044700457004670047700487004970050700517005270053700547005570056700577005870059700607006170062700637006470065700667006770068700697007070071700727007370074700757007670077700787007970080700817008270083700847008570086700877008870089700907009170092700937009470095700967009770098700997010070101701027010370104701057010670107701087010970110701117011270113701147011570116701177011870119701207012170122701237012470125701267012770128701297013070131701327013370134701357013670137701387013970140701417014270143701447014570146701477014870149701507015170152701537015470155701567015770158701597016070161701627016370164701657016670167701687016970170701717017270173701747017570176701777017870179701807018170182701837018470185701867018770188701897019070191701927019370194701957019670197701987019970200702017020270203702047020570206702077020870209702107021170212702137021470215702167021770218702197022070221702227022370224702257022670227702287022970230702317023270233702347023570236702377023870239702407024170242702437024470245702467024770248702497025070251702527025370254702557025670257702587025970260702617026270263702647026570266702677026870269702707027170272702737027470275702767027770278702797028070281702827028370284702857028670287702887028970290702917029270293702947029570296702977029870299703007030170302703037030470305703067030770308703097031070311703127031370314703157031670317703187031970320703217032270323703247032570326703277032870329703307033170332703337033470335703367033770338703397034070341703427034370344703457034670347703487034970350703517035270353703547035570356703577035870359703607036170362703637036470365703667036770368703697037070371703727037370374703757037670377703787037970380703817038270383703847038570386703877038870389703907039170392703937039470395703967039770398703997040070401704027040370404704057040670407704087040970410704117041270413704147041570416704177041870419704207042170422704237042470425704267042770428704297043070431704327043370434704357043670437704387043970440704417044270443704447044570446704477044870449704507045170452704537045470455704567045770458704597046070461704627046370464704657046670467704687046970470704717047270473704747047570476704777047870479704807048170482704837048470485704867048770488704897049070491704927049370494704957049670497704987049970500705017050270503705047050570506705077050870509705107051170512705137051470515705167051770518705197052070521705227052370524705257052670527705287052970530705317053270533705347053570536705377053870539705407054170542705437054470545705467054770548705497055070551705527055370554705557055670557705587055970560705617056270563705647056570566705677056870569705707057170572705737057470575705767057770578705797058070581705827058370584705857058670587705887058970590705917059270593705947059570596705977059870599706007060170602706037060470605706067060770608706097061070611706127061370614706157061670617706187061970620706217062270623706247062570626706277062870629706307063170632706337063470635706367063770638706397064070641706427064370644706457064670647706487064970650706517065270653706547065570656706577065870659706607066170662706637066470665706667066770668706697067070671706727067370674706757067670677706787067970680706817068270683706847068570686706877068870689706907069170692706937069470695706967069770698706997070070701707027070370704707057070670707707087070970710707117071270713707147071570716707177071870719707207072170722707237072470725707267072770728707297073070731707327073370734707357073670737707387073970740707417074270743707447074570746707477074870749707507075170752707537075470755707567075770758707597076070761707627076370764707657076670767707687076970770707717077270773707747077570776707777077870779707807078170782707837078470785707867078770788707897079070791707927079370794707957079670797707987079970800708017080270803708047080570806708077080870809708107081170812708137081470815708167081770818708197082070821708227082370824708257082670827708287082970830708317083270833708347083570836708377083870839708407084170842708437084470845708467084770848708497085070851708527085370854708557085670857708587085970860708617086270863708647086570866708677086870869708707087170872708737087470875708767087770878708797088070881708827088370884708857088670887708887088970890708917089270893708947089570896708977089870899709007090170902709037090470905709067090770908709097091070911709127091370914709157091670917709187091970920709217092270923709247092570926709277092870929709307093170932709337093470935709367093770938709397094070941709427094370944709457094670947709487094970950709517095270953709547095570956709577095870959709607096170962709637096470965709667096770968709697097070971709727097370974709757097670977709787097970980709817098270983709847098570986709877098870989709907099170992709937099470995709967099770998709997100071001710027100371004710057100671007710087100971010710117101271013710147101571016710177101871019710207102171022710237102471025710267102771028710297103071031710327103371034710357103671037710387103971040710417104271043710447104571046710477104871049710507105171052710537105471055710567105771058710597106071061710627106371064710657106671067710687106971070710717107271073710747107571076710777107871079710807108171082710837108471085710867108771088710897109071091710927109371094710957109671097710987109971100711017110271103711047110571106711077110871109711107111171112711137111471115711167111771118711197112071121711227112371124711257112671127711287112971130711317113271133711347113571136711377113871139711407114171142711437114471145711467114771148711497115071151711527115371154711557115671157711587115971160711617116271163711647116571166711677116871169711707117171172711737117471175711767117771178711797118071181711827118371184711857118671187711887118971190711917119271193711947119571196711977119871199712007120171202712037120471205712067120771208712097121071211712127121371214712157121671217712187121971220712217122271223712247122571226712277122871229712307123171232712337123471235712367123771238712397124071241712427124371244712457124671247712487124971250712517125271253712547125571256712577125871259712607126171262712637126471265712667126771268712697127071271712727127371274712757127671277712787127971280712817128271283712847128571286712877128871289712907129171292712937129471295712967129771298712997130071301713027130371304713057130671307713087130971310713117131271313713147131571316713177131871319713207132171322713237132471325713267132771328713297133071331713327133371334713357133671337713387133971340713417134271343713447134571346713477134871349713507135171352713537135471355713567135771358713597136071361713627136371364713657136671367713687136971370713717137271373713747137571376713777137871379713807138171382713837138471385713867138771388713897139071391713927139371394713957139671397713987139971400714017140271403714047140571406714077140871409714107141171412714137141471415714167141771418714197142071421714227142371424714257142671427714287142971430714317143271433714347143571436714377143871439714407144171442714437144471445714467144771448714497145071451714527145371454714557145671457714587145971460714617146271463714647146571466714677146871469714707147171472714737147471475714767147771478714797148071481714827148371484714857148671487714887148971490714917149271493714947149571496714977149871499715007150171502715037150471505715067150771508715097151071511715127151371514715157151671517715187151971520715217152271523715247152571526715277152871529715307153171532715337153471535715367153771538715397154071541715427154371544715457154671547715487154971550715517155271553715547155571556715577155871559715607156171562715637156471565715667156771568715697157071571715727157371574715757157671577715787157971580715817158271583715847158571586715877158871589715907159171592715937159471595715967159771598715997160071601716027160371604716057160671607716087160971610716117161271613716147161571616716177161871619716207162171622716237162471625716267162771628716297163071631716327163371634716357163671637716387163971640716417164271643716447164571646716477164871649716507165171652716537165471655716567165771658716597166071661716627166371664716657166671667716687166971670716717167271673716747167571676716777167871679716807168171682716837168471685716867168771688716897169071691716927169371694716957169671697716987169971700717017170271703717047170571706717077170871709717107171171712717137171471715717167171771718717197172071721717227172371724717257172671727717287172971730717317173271733717347173571736717377173871739717407174171742717437174471745717467174771748717497175071751717527175371754717557175671757717587175971760717617176271763717647176571766717677176871769717707177171772717737177471775717767177771778717797178071781717827178371784717857178671787717887178971790717917179271793717947179571796717977179871799718007180171802718037180471805718067180771808718097181071811718127181371814718157181671817718187181971820718217182271823718247182571826718277182871829718307183171832718337183471835718367183771838718397184071841718427184371844718457184671847718487184971850718517185271853718547185571856718577185871859718607186171862718637186471865718667186771868718697187071871718727187371874718757187671877718787187971880718817188271883718847188571886718877188871889718907189171892718937189471895718967189771898718997190071901719027190371904719057190671907719087190971910719117191271913719147191571916719177191871919719207192171922719237192471925719267192771928719297193071931719327193371934719357193671937719387193971940719417194271943719447194571946719477194871949719507195171952719537195471955719567195771958719597196071961719627196371964719657196671967719687196971970719717197271973719747197571976719777197871979719807198171982719837198471985719867198771988719897199071991719927199371994719957199671997719987199972000720017200272003720047200572006720077200872009720107201172012720137201472015720167201772018720197202072021720227202372024720257202672027720287202972030720317203272033720347203572036720377203872039720407204172042720437204472045720467204772048720497205072051720527205372054720557205672057720587205972060720617206272063720647206572066720677206872069720707207172072720737207472075720767207772078720797208072081720827208372084720857208672087720887208972090720917209272093720947209572096720977209872099721007210172102721037210472105721067210772108721097211072111721127211372114721157211672117721187211972120721217212272123721247212572126721277212872129721307213172132721337213472135721367213772138721397214072141721427214372144721457214672147721487214972150721517215272153721547215572156721577215872159721607216172162721637216472165721667216772168721697217072171721727217372174721757217672177721787217972180721817218272183721847218572186721877218872189721907219172192721937219472195721967219772198721997220072201722027220372204722057220672207722087220972210722117221272213722147221572216722177221872219722207222172222722237222472225722267222772228722297223072231722327223372234722357223672237722387223972240722417224272243722447224572246722477224872249722507225172252722537225472255722567225772258722597226072261722627226372264722657226672267722687226972270722717227272273722747227572276722777227872279722807228172282722837228472285722867228772288722897229072291722927229372294722957229672297722987229972300723017230272303723047230572306723077230872309723107231172312723137231472315723167231772318723197232072321723227232372324723257232672327723287232972330723317233272333723347233572336723377233872339723407234172342723437234472345723467234772348723497235072351723527235372354723557235672357723587235972360723617236272363723647236572366723677236872369723707237172372723737237472375723767237772378723797238072381723827238372384723857238672387723887238972390723917239272393723947239572396723977239872399724007240172402724037240472405724067240772408724097241072411724127241372414724157241672417724187241972420724217242272423724247242572426724277242872429724307243172432724337243472435724367243772438724397244072441724427244372444724457244672447724487244972450724517245272453724547245572456724577245872459724607246172462724637246472465724667246772468724697247072471724727247372474724757247672477724787247972480724817248272483724847248572486724877248872489724907249172492724937249472495724967249772498724997250072501725027250372504725057250672507725087250972510725117251272513725147251572516725177251872519725207252172522725237252472525725267252772528725297253072531725327253372534725357253672537725387253972540725417254272543725447254572546725477254872549725507255172552725537255472555725567255772558725597256072561725627256372564725657256672567725687256972570725717257272573725747257572576725777257872579725807258172582725837258472585725867258772588725897259072591725927259372594725957259672597725987259972600726017260272603726047260572606726077260872609726107261172612726137261472615726167261772618726197262072621726227262372624726257262672627726287262972630726317263272633726347263572636726377263872639726407264172642726437264472645726467264772648726497265072651726527265372654726557265672657726587265972660726617266272663726647266572666726677266872669726707267172672726737267472675726767267772678726797268072681726827268372684726857268672687726887268972690726917269272693726947269572696726977269872699727007270172702727037270472705727067270772708727097271072711727127271372714727157271672717727187271972720727217272272723727247272572726727277272872729727307273172732727337273472735727367273772738727397274072741727427274372744727457274672747727487274972750727517275272753727547275572756727577275872759727607276172762727637276472765727667276772768727697277072771727727277372774727757277672777727787277972780727817278272783727847278572786727877278872789727907279172792727937279472795727967279772798727997280072801728027280372804728057280672807728087280972810728117281272813728147281572816728177281872819728207282172822728237282472825728267282772828728297283072831728327283372834728357283672837728387283972840728417284272843728447284572846728477284872849728507285172852728537285472855728567285772858728597286072861728627286372864728657286672867728687286972870728717287272873728747287572876728777287872879728807288172882728837288472885728867288772888728897289072891728927289372894728957289672897728987289972900729017290272903729047290572906729077290872909729107291172912729137291472915729167291772918729197292072921729227292372924729257292672927729287292972930729317293272933729347293572936729377293872939729407294172942729437294472945729467294772948729497295072951729527295372954729557295672957729587295972960729617296272963729647296572966729677296872969729707297172972729737297472975729767297772978729797298072981729827298372984729857298672987729887298972990729917299272993729947299572996729977299872999730007300173002730037300473005730067300773008730097301073011730127301373014730157301673017730187301973020730217302273023730247302573026730277302873029730307303173032730337303473035730367303773038730397304073041730427304373044730457304673047730487304973050730517305273053730547305573056730577305873059730607306173062730637306473065730667306773068730697307073071730727307373074730757307673077730787307973080730817308273083730847308573086730877308873089730907309173092730937309473095730967309773098730997310073101731027310373104731057310673107731087310973110731117311273113731147311573116731177311873119731207312173122731237312473125731267312773128731297313073131731327313373134731357313673137731387313973140731417314273143731447314573146731477314873149731507315173152731537315473155731567315773158731597316073161731627316373164731657316673167731687316973170731717317273173731747317573176731777317873179731807318173182731837318473185731867318773188731897319073191731927319373194731957319673197731987319973200732017320273203732047320573206732077320873209732107321173212732137321473215732167321773218732197322073221732227322373224732257322673227732287322973230732317323273233732347323573236732377323873239732407324173242732437324473245732467324773248732497325073251732527325373254732557325673257732587325973260732617326273263732647326573266732677326873269732707327173272732737327473275732767327773278732797328073281732827328373284732857328673287732887328973290732917329273293732947329573296732977329873299733007330173302733037330473305733067330773308733097331073311733127331373314733157331673317733187331973320733217332273323733247332573326733277332873329733307333173332733337333473335733367333773338733397334073341733427334373344733457334673347733487334973350733517335273353733547335573356733577335873359733607336173362733637336473365733667336773368733697337073371733727337373374733757337673377733787337973380733817338273383733847338573386733877338873389733907339173392733937339473395733967339773398733997340073401734027340373404734057340673407734087340973410734117341273413734147341573416734177341873419734207342173422734237342473425734267342773428734297343073431734327343373434734357343673437734387343973440734417344273443734447344573446734477344873449734507345173452734537345473455734567345773458734597346073461734627346373464734657346673467734687346973470734717347273473734747347573476734777347873479734807348173482734837348473485734867348773488734897349073491734927349373494734957349673497734987349973500735017350273503735047350573506735077350873509735107351173512735137351473515735167351773518735197352073521735227352373524735257352673527735287352973530735317353273533735347353573536735377353873539735407354173542735437354473545735467354773548735497355073551735527355373554735557355673557735587355973560735617356273563735647356573566735677356873569735707357173572735737357473575735767357773578735797358073581735827358373584735857358673587735887358973590735917359273593735947359573596735977359873599736007360173602736037360473605736067360773608736097361073611736127361373614736157361673617736187361973620736217362273623736247362573626736277362873629736307363173632736337363473635736367363773638736397364073641736427364373644736457364673647736487364973650736517365273653736547365573656736577365873659736607366173662736637366473665736667366773668736697367073671736727367373674736757367673677736787367973680736817368273683736847368573686736877368873689736907369173692736937369473695736967369773698736997370073701737027370373704737057370673707737087370973710737117371273713737147371573716737177371873719737207372173722737237372473725737267372773728737297373073731737327373373734737357373673737737387373973740737417374273743737447374573746737477374873749737507375173752737537375473755737567375773758737597376073761737627376373764737657376673767737687376973770737717377273773737747377573776737777377873779737807378173782737837378473785737867378773788737897379073791737927379373794737957379673797737987379973800738017380273803738047380573806738077380873809738107381173812738137381473815738167381773818738197382073821738227382373824738257382673827738287382973830738317383273833738347383573836738377383873839738407384173842738437384473845738467384773848738497385073851738527385373854738557385673857738587385973860738617386273863738647386573866738677386873869738707387173872738737387473875738767387773878738797388073881738827388373884738857388673887738887388973890738917389273893738947389573896738977389873899739007390173902739037390473905739067390773908739097391073911739127391373914739157391673917739187391973920739217392273923739247392573926739277392873929739307393173932739337393473935739367393773938739397394073941739427394373944739457394673947739487394973950739517395273953739547395573956739577395873959739607396173962739637396473965739667396773968739697397073971739727397373974739757397673977739787397973980739817398273983739847398573986739877398873989739907399173992739937399473995739967399773998739997400074001740027400374004740057400674007740087400974010740117401274013740147401574016740177401874019740207402174022740237402474025740267402774028740297403074031740327403374034740357403674037740387403974040740417404274043740447404574046740477404874049740507405174052740537405474055740567405774058740597406074061740627406374064740657406674067740687406974070740717407274073740747407574076740777407874079740807408174082740837408474085740867408774088740897409074091740927409374094740957409674097740987409974100741017410274103741047410574106741077410874109741107411174112741137411474115741167411774118741197412074121741227412374124741257412674127741287412974130741317413274133741347413574136741377413874139741407414174142741437414474145741467414774148741497415074151741527415374154741557415674157741587415974160741617416274163741647416574166741677416874169741707417174172741737417474175741767417774178741797418074181741827418374184741857418674187741887418974190741917419274193741947419574196741977419874199742007420174202742037420474205742067420774208742097421074211742127421374214742157421674217742187421974220742217422274223742247422574226742277422874229742307423174232742337423474235742367423774238742397424074241742427424374244742457424674247742487424974250742517425274253742547425574256742577425874259742607426174262742637426474265742667426774268742697427074271742727427374274742757427674277742787427974280742817428274283742847428574286742877428874289742907429174292742937429474295742967429774298742997430074301743027430374304743057430674307743087430974310743117431274313743147431574316743177431874319743207432174322743237432474325743267432774328743297433074331743327433374334743357433674337743387433974340743417434274343743447434574346743477434874349743507435174352743537435474355743567435774358743597436074361743627436374364743657436674367743687436974370743717437274373743747437574376743777437874379743807438174382743837438474385743867438774388743897439074391743927439374394743957439674397743987439974400744017440274403744047440574406744077440874409744107441174412744137441474415744167441774418744197442074421744227442374424744257442674427744287442974430744317443274433744347443574436744377443874439744407444174442744437444474445744467444774448744497445074451744527445374454744557445674457744587445974460744617446274463744647446574466744677446874469744707447174472744737447474475744767447774478744797448074481744827448374484744857448674487744887448974490744917449274493744947449574496744977449874499745007450174502745037450474505745067450774508745097451074511745127451374514745157451674517745187451974520745217452274523745247452574526745277452874529745307453174532745337453474535745367453774538745397454074541745427454374544745457454674547745487454974550745517455274553745547455574556745577455874559745607456174562745637456474565745667456774568745697457074571745727457374574745757457674577745787457974580745817458274583745847458574586745877458874589745907459174592745937459474595745967459774598745997460074601746027460374604746057460674607746087460974610746117461274613746147461574616746177461874619746207462174622746237462474625746267462774628746297463074631746327463374634746357463674637746387463974640746417464274643746447464574646746477464874649746507465174652746537465474655746567465774658746597466074661746627466374664746657466674667746687466974670746717467274673746747467574676746777467874679746807468174682746837468474685746867468774688746897469074691746927469374694746957469674697746987469974700747017470274703747047470574706747077470874709747107471174712747137471474715747167471774718747197472074721747227472374724747257472674727747287472974730747317473274733747347473574736747377473874739747407474174742747437474474745747467474774748747497475074751747527475374754747557475674757747587475974760747617476274763747647476574766747677476874769747707477174772747737477474775747767477774778747797478074781747827478374784747857478674787747887478974790747917479274793747947479574796747977479874799748007480174802748037480474805748067480774808748097481074811748127481374814748157481674817748187481974820748217482274823748247482574826748277482874829748307483174832748337483474835748367483774838748397484074841748427484374844748457484674847748487484974850748517485274853748547485574856748577485874859748607486174862748637486474865748667486774868748697487074871748727487374874748757487674877748787487974880748817488274883748847488574886748877488874889748907489174892748937489474895748967489774898748997490074901749027490374904749057490674907749087490974910749117491274913749147491574916749177491874919749207492174922749237492474925749267492774928749297493074931749327493374934749357493674937749387493974940749417494274943749447494574946749477494874949749507495174952749537495474955749567495774958749597496074961749627496374964749657496674967749687496974970749717497274973749747497574976749777497874979749807498174982749837498474985749867498774988749897499074991749927499374994749957499674997749987499975000750017500275003750047500575006750077500875009750107501175012750137501475015750167501775018750197502075021750227502375024750257502675027750287502975030750317503275033750347503575036750377503875039750407504175042750437504475045750467504775048750497505075051750527505375054750557505675057750587505975060750617506275063750647506575066750677506875069750707507175072750737507475075750767507775078750797508075081750827508375084750857508675087750887508975090750917509275093750947509575096750977509875099751007510175102751037510475105751067510775108751097511075111751127511375114751157511675117751187511975120751217512275123751247512575126751277512875129751307513175132751337513475135751367513775138751397514075141751427514375144751457514675147751487514975150751517515275153751547515575156751577515875159751607516175162751637516475165751667516775168751697517075171751727517375174751757517675177751787517975180751817518275183751847518575186751877518875189751907519175192751937519475195751967519775198751997520075201752027520375204752057520675207752087520975210752117521275213752147521575216752177521875219752207522175222752237522475225752267522775228752297523075231752327523375234752357523675237752387523975240752417524275243752447524575246752477524875249752507525175252752537525475255752567525775258752597526075261752627526375264752657526675267752687526975270752717527275273752747527575276752777527875279752807528175282752837528475285752867528775288752897529075291752927529375294752957529675297752987529975300753017530275303753047530575306753077530875309753107531175312753137531475315753167531775318753197532075321753227532375324753257532675327753287532975330753317533275333753347533575336753377533875339753407534175342753437534475345753467534775348753497535075351753527535375354753557535675357753587535975360753617536275363753647536575366753677536875369753707537175372753737537475375753767537775378753797538075381753827538375384753857538675387753887538975390753917539275393753947539575396753977539875399754007540175402754037540475405754067540775408754097541075411754127541375414754157541675417754187541975420754217542275423754247542575426754277542875429754307543175432754337543475435754367543775438754397544075441754427544375444754457544675447754487544975450754517545275453754547545575456754577545875459754607546175462754637546475465754667546775468754697547075471754727547375474754757547675477754787547975480754817548275483754847548575486754877548875489754907549175492754937549475495754967549775498754997550075501755027550375504755057550675507755087550975510755117551275513755147551575516755177551875519755207552175522755237552475525755267552775528755297553075531755327553375534755357553675537755387553975540755417554275543755447554575546755477554875549755507555175552755537555475555755567555775558755597556075561755627556375564755657556675567755687556975570755717557275573755747557575576755777557875579755807558175582755837558475585755867558775588755897559075591755927559375594755957559675597755987559975600756017560275603756047560575606756077560875609756107561175612756137561475615756167561775618756197562075621756227562375624756257562675627756287562975630756317563275633756347563575636756377563875639756407564175642756437564475645756467564775648756497565075651756527565375654756557565675657756587565975660756617566275663756647566575666756677566875669756707567175672756737567475675756767567775678756797568075681756827568375684756857568675687756887568975690756917569275693756947569575696756977569875699757007570175702757037570475705757067570775708757097571075711757127571375714757157571675717757187571975720757217572275723757247572575726757277572875729757307573175732757337573475735757367573775738757397574075741757427574375744757457574675747757487574975750757517575275753757547575575756757577575875759757607576175762757637576475765757667576775768757697577075771757727577375774757757577675777757787577975780757817578275783757847578575786757877578875789757907579175792757937579475795757967579775798757997580075801758027580375804758057580675807758087580975810758117581275813758147581575816758177581875819758207582175822758237582475825758267582775828758297583075831758327583375834758357583675837758387583975840758417584275843758447584575846758477584875849758507585175852758537585475855758567585775858758597586075861758627586375864758657586675867758687586975870758717587275873758747587575876758777587875879758807588175882758837588475885758867588775888758897589075891758927589375894758957589675897758987589975900759017590275903759047590575906759077590875909759107591175912759137591475915759167591775918759197592075921759227592375924759257592675927759287592975930759317593275933759347593575936759377593875939759407594175942759437594475945759467594775948759497595075951759527595375954759557595675957759587595975960759617596275963759647596575966759677596875969759707597175972759737597475975759767597775978759797598075981759827598375984759857598675987759887598975990759917599275993759947599575996759977599875999760007600176002760037600476005760067600776008760097601076011760127601376014760157601676017760187601976020760217602276023760247602576026760277602876029760307603176032760337603476035760367603776038760397604076041760427604376044760457604676047760487604976050760517605276053760547605576056760577605876059760607606176062760637606476065760667606776068760697607076071760727607376074760757607676077760787607976080760817608276083760847608576086760877608876089760907609176092760937609476095760967609776098760997610076101761027610376104761057610676107761087610976110761117611276113761147611576116761177611876119761207612176122761237612476125761267612776128761297613076131761327613376134761357613676137761387613976140761417614276143761447614576146761477614876149761507615176152761537615476155761567615776158761597616076161761627616376164761657616676167761687616976170761717617276173761747617576176761777617876179761807618176182761837618476185761867618776188761897619076191761927619376194761957619676197761987619976200762017620276203762047620576206762077620876209762107621176212762137621476215762167621776218762197622076221762227622376224762257622676227762287622976230762317623276233762347623576236762377623876239762407624176242762437624476245762467624776248762497625076251762527625376254762557625676257762587625976260762617626276263762647626576266762677626876269762707627176272762737627476275762767627776278762797628076281762827628376284762857628676287762887628976290762917629276293762947629576296762977629876299763007630176302763037630476305763067630776308763097631076311763127631376314763157631676317763187631976320763217632276323763247632576326763277632876329763307633176332763337633476335763367633776338763397634076341763427634376344763457634676347763487634976350763517635276353763547635576356763577635876359763607636176362763637636476365763667636776368763697637076371763727637376374763757637676377763787637976380763817638276383763847638576386763877638876389763907639176392763937639476395763967639776398763997640076401764027640376404764057640676407764087640976410764117641276413764147641576416764177641876419764207642176422764237642476425764267642776428764297643076431764327643376434764357643676437764387643976440764417644276443764447644576446764477644876449764507645176452764537645476455764567645776458764597646076461764627646376464764657646676467764687646976470764717647276473764747647576476764777647876479764807648176482764837648476485764867648776488764897649076491764927649376494764957649676497764987649976500765017650276503765047650576506765077650876509765107651176512765137651476515765167651776518765197652076521765227652376524765257652676527765287652976530765317653276533765347653576536765377653876539765407654176542765437654476545765467654776548765497655076551765527655376554765557655676557765587655976560765617656276563765647656576566765677656876569765707657176572765737657476575765767657776578765797658076581765827658376584765857658676587765887658976590765917659276593765947659576596765977659876599766007660176602766037660476605766067660776608766097661076611766127661376614766157661676617766187661976620766217662276623766247662576626766277662876629766307663176632766337663476635766367663776638766397664076641766427664376644766457664676647766487664976650766517665276653766547665576656766577665876659766607666176662766637666476665766667666776668766697667076671766727667376674766757667676677766787667976680766817668276683766847668576686766877668876689766907669176692766937669476695766967669776698766997670076701767027670376704767057670676707767087670976710767117671276713767147671576716767177671876719767207672176722767237672476725767267672776728767297673076731767327673376734767357673676737767387673976740767417674276743767447674576746767477674876749767507675176752767537675476755767567675776758767597676076761767627676376764767657676676767767687676976770767717677276773767747677576776767777677876779767807678176782767837678476785767867678776788767897679076791767927679376794767957679676797767987679976800768017680276803768047680576806768077680876809768107681176812768137681476815768167681776818768197682076821768227682376824768257682676827768287682976830768317683276833768347683576836768377683876839768407684176842768437684476845768467684776848768497685076851768527685376854768557685676857768587685976860768617686276863768647686576866768677686876869768707687176872768737687476875768767687776878768797688076881768827688376884768857688676887768887688976890768917689276893768947689576896768977689876899769007690176902769037690476905769067690776908769097691076911769127691376914769157691676917769187691976920769217692276923769247692576926769277692876929769307693176932769337693476935769367693776938769397694076941769427694376944769457694676947769487694976950769517695276953769547695576956769577695876959769607696176962769637696476965769667696776968769697697076971769727697376974769757697676977769787697976980769817698276983769847698576986769877698876989769907699176992769937699476995769967699776998769997700077001770027700377004770057700677007770087700977010770117701277013770147701577016770177701877019770207702177022770237702477025770267702777028770297703077031770327703377034770357703677037770387703977040770417704277043770447704577046770477704877049770507705177052770537705477055770567705777058770597706077061770627706377064770657706677067770687706977070770717707277073770747707577076770777707877079770807708177082770837708477085770867708777088770897709077091770927709377094770957709677097770987709977100771017710277103771047710577106771077710877109771107711177112771137711477115771167711777118771197712077121771227712377124771257712677127771287712977130771317713277133771347713577136771377713877139771407714177142771437714477145771467714777148771497715077151771527715377154771557715677157771587715977160771617716277163771647716577166771677716877169771707717177172771737717477175771767717777178771797718077181771827718377184771857718677187771887718977190771917719277193771947719577196771977719877199772007720177202772037720477205772067720777208772097721077211772127721377214772157721677217772187721977220772217722277223772247722577226772277722877229772307723177232772337723477235772367723777238772397724077241772427724377244772457724677247772487724977250772517725277253772547725577256772577725877259772607726177262772637726477265772667726777268772697727077271772727727377274772757727677277772787727977280772817728277283772847728577286772877728877289772907729177292772937729477295772967729777298772997730077301773027730377304773057730677307773087730977310773117731277313773147731577316773177731877319773207732177322773237732477325773267732777328773297733077331773327733377334773357733677337773387733977340773417734277343773447734577346773477734877349773507735177352773537735477355773567735777358773597736077361773627736377364773657736677367773687736977370773717737277373773747737577376773777737877379773807738177382773837738477385773867738777388773897739077391773927739377394773957739677397773987739977400774017740277403774047740577406774077740877409774107741177412774137741477415774167741777418774197742077421774227742377424774257742677427774287742977430774317743277433774347743577436774377743877439774407744177442774437744477445774467744777448774497745077451774527745377454774557745677457774587745977460774617746277463774647746577466774677746877469774707747177472774737747477475774767747777478774797748077481774827748377484774857748677487774887748977490774917749277493774947749577496774977749877499775007750177502775037750477505775067750777508775097751077511775127751377514775157751677517775187751977520775217752277523775247752577526775277752877529775307753177532775337753477535775367753777538775397754077541775427754377544775457754677547775487754977550775517755277553775547755577556775577755877559775607756177562775637756477565775667756777568775697757077571775727757377574775757757677577775787757977580775817758277583775847758577586775877758877589775907759177592775937759477595775967759777598775997760077601776027760377604776057760677607776087760977610776117761277613776147761577616776177761877619776207762177622776237762477625776267762777628776297763077631776327763377634776357763677637776387763977640776417764277643776447764577646776477764877649776507765177652776537765477655776567765777658776597766077661776627766377664776657766677667776687766977670776717767277673776747767577676776777767877679776807768177682776837768477685776867768777688776897769077691776927769377694776957769677697776987769977700777017770277703777047770577706777077770877709777107771177712777137771477715777167771777718777197772077721777227772377724777257772677727777287772977730777317773277733777347773577736777377773877739777407774177742777437774477745777467774777748777497775077751777527775377754777557775677757777587775977760777617776277763777647776577766777677776877769777707777177772777737777477775777767777777778777797778077781777827778377784777857778677787777887778977790777917779277793777947779577796777977779877799778007780177802778037780477805778067780777808778097781077811778127781377814778157781677817778187781977820778217782277823778247782577826778277782877829778307783177832778337783477835778367783777838778397784077841778427784377844778457784677847778487784977850778517785277853778547785577856778577785877859778607786177862778637786477865778667786777868778697787077871778727787377874778757787677877778787787977880778817788277883778847788577886778877788877889778907789177892778937789477895778967789777898778997790077901779027790377904779057790677907779087790977910779117791277913779147791577916779177791877919779207792177922779237792477925779267792777928779297793077931779327793377934779357793677937779387793977940779417794277943779447794577946779477794877949779507795177952779537795477955779567795777958779597796077961779627796377964779657796677967779687796977970779717797277973779747797577976779777797877979779807798177982779837798477985779867798777988779897799077991779927799377994779957799677997779987799978000780017800278003780047800578006780077800878009780107801178012780137801478015780167801778018780197802078021780227802378024780257802678027780287802978030780317803278033780347803578036780377803878039780407804178042780437804478045780467804778048780497805078051780527805378054780557805678057780587805978060780617806278063780647806578066780677806878069780707807178072780737807478075780767807778078780797808078081780827808378084780857808678087780887808978090780917809278093780947809578096780977809878099781007810178102781037810478105781067810778108781097811078111781127811378114781157811678117781187811978120781217812278123781247812578126781277812878129781307813178132781337813478135781367813778138781397814078141781427814378144781457814678147781487814978150781517815278153781547815578156781577815878159781607816178162781637816478165781667816778168781697817078171781727817378174781757817678177781787817978180781817818278183781847818578186781877818878189781907819178192781937819478195781967819778198781997820078201782027820378204782057820678207782087820978210782117821278213782147821578216782177821878219782207822178222782237822478225782267822778228782297823078231782327823378234782357823678237782387823978240782417824278243782447824578246782477824878249782507825178252782537825478255782567825778258782597826078261782627826378264782657826678267782687826978270782717827278273782747827578276782777827878279782807828178282782837828478285782867828778288782897829078291782927829378294782957829678297782987829978300783017830278303783047830578306783077830878309783107831178312783137831478315783167831778318783197832078321783227832378324783257832678327783287832978330783317833278333783347833578336783377833878339783407834178342783437834478345783467834778348783497835078351783527835378354783557835678357783587835978360783617836278363783647836578366783677836878369783707837178372783737837478375783767837778378783797838078381783827838378384783857838678387783887838978390783917839278393783947839578396783977839878399784007840178402784037840478405784067840778408784097841078411784127841378414784157841678417784187841978420784217842278423784247842578426784277842878429784307843178432784337843478435784367843778438784397844078441784427844378444784457844678447784487844978450784517845278453784547845578456784577845878459784607846178462784637846478465784667846778468784697847078471784727847378474784757847678477784787847978480784817848278483784847848578486784877848878489784907849178492784937849478495784967849778498784997850078501785027850378504785057850678507785087850978510785117851278513785147851578516785177851878519785207852178522785237852478525785267852778528785297853078531785327853378534785357853678537785387853978540785417854278543785447854578546785477854878549785507855178552785537855478555785567855778558785597856078561785627856378564785657856678567785687856978570785717857278573785747857578576785777857878579785807858178582785837858478585785867858778588785897859078591785927859378594785957859678597785987859978600786017860278603786047860578606786077860878609786107861178612786137861478615786167861778618786197862078621786227862378624786257862678627786287862978630786317863278633786347863578636786377863878639786407864178642786437864478645786467864778648786497865078651786527865378654786557865678657786587865978660786617866278663786647866578666786677866878669786707867178672786737867478675786767867778678786797868078681786827868378684786857868678687786887868978690786917869278693786947869578696786977869878699787007870178702787037870478705787067870778708787097871078711787127871378714787157871678717787187871978720787217872278723787247872578726787277872878729787307873178732787337873478735787367873778738787397874078741787427874378744787457874678747787487874978750787517875278753787547875578756787577875878759787607876178762787637876478765787667876778768787697877078771787727877378774787757877678777787787877978780787817878278783787847878578786787877878878789787907879178792787937879478795787967879778798787997880078801788027880378804788057880678807788087880978810788117881278813788147881578816788177881878819788207882178822788237882478825788267882778828788297883078831788327883378834788357883678837788387883978840788417884278843788447884578846788477884878849788507885178852788537885478855788567885778858788597886078861788627886378864788657886678867788687886978870788717887278873788747887578876788777887878879788807888178882788837888478885788867888778888788897889078891788927889378894788957889678897788987889978900789017890278903789047890578906789077890878909789107891178912789137891478915789167891778918789197892078921789227892378924789257892678927789287892978930789317893278933789347893578936789377893878939789407894178942789437894478945789467894778948789497895078951789527895378954789557895678957789587895978960789617896278963789647896578966789677896878969789707897178972789737897478975789767897778978789797898078981789827898378984789857898678987789887898978990789917899278993789947899578996789977899878999790007900179002790037900479005790067900779008790097901079011790127901379014790157901679017790187901979020790217902279023790247902579026790277902879029790307903179032790337903479035790367903779038790397904079041790427904379044790457904679047790487904979050790517905279053790547905579056790577905879059790607906179062790637906479065790667906779068790697907079071790727907379074790757907679077790787907979080790817908279083790847908579086790877908879089790907909179092790937909479095790967909779098790997910079101791027910379104791057910679107791087910979110791117911279113791147911579116791177911879119791207912179122791237912479125791267912779128791297913079131791327913379134791357913679137791387913979140791417914279143791447914579146791477914879149791507915179152791537915479155791567915779158791597916079161791627916379164791657916679167791687916979170791717917279173791747917579176791777917879179791807918179182791837918479185791867918779188791897919079191791927919379194791957919679197791987919979200792017920279203792047920579206792077920879209792107921179212792137921479215792167921779218792197922079221792227922379224792257922679227792287922979230792317923279233792347923579236792377923879239792407924179242792437924479245792467924779248792497925079251792527925379254792557925679257792587925979260792617926279263792647926579266792677926879269792707927179272792737927479275792767927779278792797928079281792827928379284792857928679287792887928979290792917929279293792947929579296792977929879299793007930179302793037930479305793067930779308793097931079311793127931379314793157931679317793187931979320793217932279323793247932579326793277932879329793307933179332793337933479335793367933779338793397934079341793427934379344793457934679347793487934979350793517935279353793547935579356793577935879359793607936179362793637936479365793667936779368793697937079371793727937379374793757937679377793787937979380793817938279383793847938579386793877938879389793907939179392793937939479395793967939779398793997940079401794027940379404794057940679407794087940979410794117941279413794147941579416794177941879419794207942179422794237942479425794267942779428794297943079431794327943379434794357943679437794387943979440794417944279443794447944579446794477944879449794507945179452794537945479455794567945779458794597946079461794627946379464794657946679467794687946979470794717947279473794747947579476794777947879479794807948179482794837948479485794867948779488794897949079491794927949379494794957949679497794987949979500795017950279503795047950579506795077950879509795107951179512795137951479515795167951779518795197952079521795227952379524795257952679527795287952979530795317953279533795347953579536795377953879539795407954179542795437954479545795467954779548795497955079551795527955379554795557955679557795587955979560795617956279563795647956579566795677956879569795707957179572795737957479575795767957779578795797958079581795827958379584795857958679587795887958979590795917959279593795947959579596795977959879599796007960179602796037960479605796067960779608796097961079611796127961379614796157961679617796187961979620796217962279623796247962579626796277962879629796307963179632796337963479635796367963779638796397964079641796427964379644796457964679647796487964979650796517965279653796547965579656796577965879659796607966179662796637966479665796667966779668796697967079671796727967379674796757967679677796787967979680796817968279683796847968579686796877968879689796907969179692796937969479695796967969779698796997970079701797027970379704797057970679707797087970979710797117971279713797147971579716797177971879719797207972179722797237972479725797267972779728797297973079731797327973379734797357973679737797387973979740797417974279743797447974579746797477974879749797507975179752797537975479755797567975779758797597976079761797627976379764797657976679767797687976979770797717977279773797747977579776797777977879779797807978179782797837978479785797867978779788797897979079791797927979379794797957979679797797987979979800798017980279803798047980579806798077980879809798107981179812798137981479815798167981779818798197982079821798227982379824798257982679827798287982979830798317983279833798347983579836798377983879839798407984179842798437984479845798467984779848798497985079851798527985379854798557985679857798587985979860798617986279863798647986579866798677986879869798707987179872798737987479875798767987779878798797988079881798827988379884798857988679887798887988979890798917989279893798947989579896798977989879899799007990179902799037990479905799067990779908799097991079911799127991379914799157991679917799187991979920799217992279923799247992579926799277992879929799307993179932799337993479935799367993779938799397994079941799427994379944799457994679947799487994979950799517995279953799547995579956799577995879959799607996179962799637996479965799667996779968799697997079971799727997379974799757997679977799787997979980799817998279983799847998579986799877998879989799907999179992799937999479995799967999779998799998000080001800028000380004800058000680007800088000980010800118001280013800148001580016800178001880019800208002180022800238002480025800268002780028800298003080031800328003380034800358003680037800388003980040800418004280043800448004580046800478004880049800508005180052800538005480055800568005780058800598006080061800628006380064800658006680067800688006980070800718007280073800748007580076800778007880079800808008180082800838008480085800868008780088800898009080091800928009380094800958009680097800988009980100801018010280103801048010580106801078010880109801108011180112801138011480115801168011780118801198012080121801228012380124801258012680127801288012980130801318013280133801348013580136801378013880139801408014180142801438014480145801468014780148801498015080151801528015380154801558015680157801588015980160801618016280163801648016580166801678016880169801708017180172801738017480175801768017780178801798018080181801828018380184801858018680187801888018980190801918019280193801948019580196801978019880199802008020180202802038020480205802068020780208802098021080211802128021380214802158021680217802188021980220802218022280223802248022580226802278022880229802308023180232802338023480235802368023780238802398024080241802428024380244802458024680247802488024980250802518025280253802548025580256802578025880259802608026180262802638026480265802668026780268802698027080271802728027380274802758027680277802788027980280802818028280283802848028580286802878028880289802908029180292802938029480295802968029780298802998030080301803028030380304803058030680307803088030980310803118031280313803148031580316803178031880319803208032180322803238032480325803268032780328803298033080331803328033380334803358033680337803388033980340803418034280343803448034580346803478034880349803508035180352803538035480355803568035780358803598036080361803628036380364803658036680367803688036980370803718037280373803748037580376803778037880379803808038180382803838038480385803868038780388803898039080391803928039380394803958039680397803988039980400804018040280403804048040580406804078040880409804108041180412804138041480415804168041780418804198042080421804228042380424804258042680427804288042980430804318043280433804348043580436804378043880439804408044180442804438044480445804468044780448804498045080451804528045380454804558045680457804588045980460804618046280463804648046580466804678046880469804708047180472804738047480475804768047780478804798048080481804828048380484804858048680487804888048980490804918049280493804948049580496804978049880499805008050180502805038050480505805068050780508805098051080511805128051380514805158051680517805188051980520805218052280523805248052580526805278052880529805308053180532805338053480535805368053780538805398054080541805428054380544805458054680547805488054980550805518055280553805548055580556805578055880559805608056180562805638056480565805668056780568805698057080571805728057380574805758057680577805788057980580805818058280583805848058580586805878058880589805908059180592805938059480595805968059780598805998060080601806028060380604806058060680607806088060980610806118061280613806148061580616806178061880619806208062180622806238062480625806268062780628806298063080631806328063380634806358063680637806388063980640806418064280643806448064580646806478064880649806508065180652806538065480655806568065780658806598066080661806628066380664806658066680667806688066980670806718067280673806748067580676806778067880679806808068180682806838068480685806868068780688806898069080691806928069380694806958069680697806988069980700807018070280703807048070580706807078070880709807108071180712807138071480715807168071780718807198072080721807228072380724807258072680727807288072980730807318073280733807348073580736807378073880739807408074180742807438074480745807468074780748807498075080751807528075380754807558075680757807588075980760807618076280763807648076580766807678076880769807708077180772807738077480775807768077780778807798078080781807828078380784807858078680787807888078980790807918079280793807948079580796807978079880799808008080180802808038080480805808068080780808808098081080811808128081380814808158081680817808188081980820808218082280823808248082580826808278082880829808308083180832808338083480835808368083780838808398084080841808428084380844808458084680847808488084980850808518085280853808548085580856808578085880859808608086180862808638086480865808668086780868808698087080871808728087380874808758087680877808788087980880808818088280883808848088580886808878088880889808908089180892808938089480895808968089780898808998090080901809028090380904809058090680907809088090980910809118091280913809148091580916809178091880919809208092180922809238092480925809268092780928809298093080931809328093380934809358093680937809388093980940809418094280943809448094580946809478094880949809508095180952809538095480955809568095780958809598096080961809628096380964809658096680967809688096980970809718097280973809748097580976809778097880979809808098180982809838098480985809868098780988809898099080991809928099380994809958099680997809988099981000810018100281003810048100581006810078100881009810108101181012810138101481015810168101781018810198102081021810228102381024810258102681027810288102981030810318103281033810348103581036810378103881039810408104181042810438104481045810468104781048810498105081051810528105381054810558105681057810588105981060810618106281063810648106581066810678106881069810708107181072810738107481075810768107781078810798108081081810828108381084810858108681087810888108981090810918109281093810948109581096810978109881099811008110181102811038110481105811068110781108811098111081111811128111381114811158111681117811188111981120811218112281123811248112581126811278112881129811308113181132811338113481135811368113781138811398114081141811428114381144811458114681147811488114981150811518115281153811548115581156811578115881159811608116181162811638116481165
  1. diff -urN --no-dereference linux-5.16.14.orig/Documentation/process/changes.rst linux-5.16.14/Documentation/process/changes.rst
  2. --- linux-5.16.14.orig/Documentation/process/changes.rst 2022-03-11 12:42:10.000000000 +0100
  3. +++ linux-5.16.14/Documentation/process/changes.rst 2022-03-12 13:25:39.545794719 +0100
  4. @@ -214,6 +214,13 @@
  5. versions of ``mkreiserfs``, ``resize_reiserfs``, ``debugreiserfs`` and
  6. ``reiserfsck``. These utils work on both i386 and alpha platforms.
  7. +Reiser4progs
  8. +------------
  9. +
  10. +The reiser4progs package contains utilities for the reiser4 file system.
  11. +Detailed instructions are provided in the README file located at:
  12. +<https://github.com/edward6/reiser4progs>.
  13. +
  14. Xfsprogs
  15. --------
  16. @@ -421,6 +428,11 @@
  17. - <https://git.kernel.org/pub/scm/linux/kernel/git/jeffm/reiserfsprogs.git/>
  18. +Reiser4progs
  19. +------------
  20. +
  21. +- <http://sourceforge.net/projects/reiser4/>
  22. +
  23. Xfsprogs
  24. --------
  25. diff -urN --no-dereference linux-5.16.14.orig/fs/fs-writeback.c linux-5.16.14/fs/fs-writeback.c
  26. --- linux-5.16.14.orig/fs/fs-writeback.c 2022-03-11 12:42:10.000000000 +0100
  27. +++ linux-5.16.14/fs/fs-writeback.c 2022-03-12 13:25:39.546794721 +0100
  28. @@ -37,25 +37,6 @@
  29. #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
  30. /*
  31. - * Passed into wb_writeback(), essentially a subset of writeback_control
  32. - */
  33. -struct wb_writeback_work {
  34. - long nr_pages;
  35. - struct super_block *sb;
  36. - enum writeback_sync_modes sync_mode;
  37. - unsigned int tagged_writepages:1;
  38. - unsigned int for_kupdate:1;
  39. - unsigned int range_cyclic:1;
  40. - unsigned int for_background:1;
  41. - unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
  42. - unsigned int auto_free:1; /* free on completion */
  43. - enum wb_reason reason; /* why was writeback initiated? */
  44. -
  45. - struct list_head list; /* pending work list */
  46. - struct wb_completion *done; /* set if the caller waits */
  47. -};
  48. -
  49. -/*
  50. * If an inode is constantly having its pages dirtied, but then the
  51. * updates stop dirtytime_expire_interval seconds in the past, it's
  52. * possible for the worst case time between when an inode has its
  53. @@ -1786,20 +1767,12 @@
  54. * unlock and relock that for each inode it ends up doing
  55. * IO for.
  56. */
  57. -static long writeback_sb_inodes(struct super_block *sb,
  58. - struct bdi_writeback *wb,
  59. - struct wb_writeback_work *work)
  60. +long generic_writeback_sb_inodes(struct super_block *sb,
  61. + struct bdi_writeback *wb,
  62. + struct writeback_control *wbc,
  63. + struct wb_writeback_work *work,
  64. + bool flush_all)
  65. {
  66. - struct writeback_control wbc = {
  67. - .sync_mode = work->sync_mode,
  68. - .tagged_writepages = work->tagged_writepages,
  69. - .for_kupdate = work->for_kupdate,
  70. - .for_background = work->for_background,
  71. - .for_sync = work->for_sync,
  72. - .range_cyclic = work->range_cyclic,
  73. - .range_start = 0,
  74. - .range_end = LLONG_MAX,
  75. - };
  76. unsigned long start_time = jiffies;
  77. long write_chunk;
  78. long wrote = 0; /* count both pages and inodes */
  79. @@ -1838,7 +1811,7 @@
  80. spin_unlock(&inode->i_lock);
  81. continue;
  82. }
  83. - if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
  84. + if ((inode->i_state & I_SYNC) && wbc->sync_mode != WB_SYNC_ALL) {
  85. /*
  86. * If this inode is locked for writeback and we are not
  87. * doing writeback-for-data-integrity, move it to
  88. @@ -1868,21 +1841,21 @@
  89. continue;
  90. }
  91. inode->i_state |= I_SYNC;
  92. - wbc_attach_and_unlock_inode(&wbc, inode);
  93. + wbc_attach_and_unlock_inode(wbc, inode);
  94. write_chunk = writeback_chunk_size(wb, work);
  95. - wbc.nr_to_write = write_chunk;
  96. - wbc.pages_skipped = 0;
  97. + wbc->nr_to_write = write_chunk;
  98. + wbc->pages_skipped = 0;
  99. /*
  100. * We use I_SYNC to pin the inode in memory. While it is set
  101. * evict_inode() will wait so the inode cannot be freed.
  102. */
  103. - __writeback_single_inode(inode, &wbc);
  104. + __writeback_single_inode(inode, wbc);
  105. - wbc_detach_inode(&wbc);
  106. - work->nr_pages -= write_chunk - wbc.nr_to_write;
  107. - wrote += write_chunk - wbc.nr_to_write;
  108. + wbc_detach_inode(wbc);
  109. + work->nr_pages -= write_chunk - wbc->nr_to_write;
  110. + wrote += write_chunk - wbc->nr_to_write;
  111. if (need_resched()) {
  112. /*
  113. @@ -1906,7 +1879,7 @@
  114. spin_lock(&inode->i_lock);
  115. if (!(inode->i_state & I_DIRTY_ALL))
  116. wrote++;
  117. - requeue_inode(inode, tmp_wb, &wbc);
  118. + requeue_inode(inode, tmp_wb, wbc);
  119. inode_sync_complete(inode);
  120. spin_unlock(&inode->i_lock);
  121. @@ -1920,7 +1893,7 @@
  122. * background threshold and other termination conditions.
  123. */
  124. if (wrote) {
  125. - if (time_is_before_jiffies(start_time + HZ / 10UL))
  126. + if (!flush_all && time_is_before_jiffies(start_time + HZ / 10UL))
  127. break;
  128. if (work->nr_pages <= 0)
  129. break;
  130. @@ -1928,6 +1901,26 @@
  131. }
  132. return wrote;
  133. }
  134. +EXPORT_SYMBOL(generic_writeback_sb_inodes);
  135. +
  136. +long writeback_sb_inodes(struct super_block *sb,
  137. + struct bdi_writeback *wb,
  138. + struct wb_writeback_work *work)
  139. +{
  140. + struct writeback_control wbc = {
  141. + .sync_mode = work->sync_mode,
  142. + .tagged_writepages = work->tagged_writepages,
  143. + .for_kupdate = work->for_kupdate,
  144. + .for_background = work->for_background,
  145. + .range_cyclic = work->range_cyclic,
  146. + .range_start = 0,
  147. + .range_end = LLONG_MAX,
  148. + };
  149. + if (sb->s_op->writeback_inodes)
  150. + return sb->s_op->writeback_inodes(sb, wb, &wbc, work, false);
  151. + else
  152. + return generic_writeback_sb_inodes(sb, wb, &wbc, work, false);
  153. +}
  154. static long __writeback_inodes_wb(struct bdi_writeback *wb,
  155. struct wb_writeback_work *work)
  156. @@ -2214,6 +2207,31 @@
  157. }
  158. /*
  159. + * This function is for file systems which have their
  160. + * own means of periodical write-out of old data.
  161. + * NOTE: inode_lock should be hold.
  162. + *
  163. + * Skip a portion of b_io inodes which belong to @sb
  164. + * and go sequentially in reverse order.
  165. + */
  166. +void writeback_skip_sb_inodes(struct super_block *sb,
  167. + struct bdi_writeback *wb)
  168. +{
  169. + while (1) {
  170. + struct inode *inode;
  171. +
  172. + if (list_empty(&wb->b_io))
  173. + break;
  174. + inode = wb_inode(wb->b_io.prev);
  175. + if (sb != inode->i_sb)
  176. + break;
  177. + redirty_tail(inode, wb);
  178. + }
  179. +}
  180. +EXPORT_SYMBOL(writeback_skip_sb_inodes);
  181. +
  182. +
  183. +/*
  184. * Handle writeback of dirty data for the device backed by this bdi. Also
  185. * reschedules periodically and does kupdated style flushing.
  186. */
  187. diff -urN --no-dereference linux-5.16.14.orig/fs/Kconfig linux-5.16.14/fs/Kconfig
  188. --- linux-5.16.14.orig/fs/Kconfig 2022-03-11 12:42:10.000000000 +0100
  189. +++ linux-5.16.14/fs/Kconfig 2022-03-12 13:25:39.545794719 +0100
  190. @@ -31,6 +31,7 @@
  191. default y if EXT4_FS=y
  192. default m if EXT2_FS_XATTR || EXT4_FS
  193. +source "fs/reiser4/Kconfig"
  194. source "fs/reiserfs/Kconfig"
  195. source "fs/jfs/Kconfig"
  196. diff -urN --no-dereference linux-5.16.14.orig/fs/Makefile linux-5.16.14/fs/Makefile
  197. --- linux-5.16.14.orig/fs/Makefile 2022-03-11 12:42:10.000000000 +0100
  198. +++ linux-5.16.14/fs/Makefile 2022-03-12 13:25:39.545794719 +0100
  199. @@ -69,6 +69,7 @@
  200. obj-$(CONFIG_NETFS_SUPPORT) += netfs/
  201. obj-$(CONFIG_FSCACHE) += fscache/
  202. obj-$(CONFIG_REISERFS_FS) += reiserfs/
  203. +obj-$(CONFIG_REISER4_FS) += reiser4/
  204. obj-$(CONFIG_EXT4_FS) += ext4/
  205. # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the
  206. # ext2 driver, which doesn't know about journalling! Explicitly request ext2
  207. diff -urN --no-dereference linux-5.16.14.orig/fs/read_write.c linux-5.16.14/fs/read_write.c
  208. --- linux-5.16.14.orig/fs/read_write.c 2022-03-11 12:42:10.000000000 +0100
  209. +++ linux-5.16.14/fs/read_write.c 2022-03-12 13:25:56.405835994 +0100
  210. @@ -233,12 +233,11 @@
  211. }
  212. EXPORT_SYMBOL(no_llseek);
  213. -loff_t default_llseek(struct file *file, loff_t offset, int whence)
  214. +loff_t default_llseek_unlocked(struct file *file, loff_t offset, int whence)
  215. {
  216. struct inode *inode = file_inode(file);
  217. loff_t retval;
  218. - inode_lock(inode);
  219. switch (whence) {
  220. case SEEK_END:
  221. offset += i_size_read(inode);
  222. @@ -283,7 +282,17 @@
  223. retval = offset;
  224. }
  225. out:
  226. - inode_unlock(inode);
  227. + return retval;
  228. +}
  229. +EXPORT_SYMBOL(default_llseek_unlocked);
  230. +
  231. +loff_t default_llseek(struct file *file, loff_t offset, int origin)
  232. +{
  233. + loff_t retval;
  234. +
  235. + inode_lock(file_inode(file));
  236. + retval = default_llseek_unlocked(file, offset, origin);
  237. + inode_unlock(file_inode(file));
  238. return retval;
  239. }
  240. EXPORT_SYMBOL(default_llseek);
  241. @@ -386,7 +395,7 @@
  242. read_write == READ ? MAY_READ : MAY_WRITE);
  243. }
  244. -static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  245. +ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  246. {
  247. struct iovec iov = { .iov_base = buf, .iov_len = len };
  248. struct kiocb kiocb;
  249. @@ -403,6 +412,7 @@
  250. *ppos = kiocb.ki_pos;
  251. return ret;
  252. }
  253. +EXPORT_SYMBOL(new_sync_read);
  254. static int warn_unsupported(struct file *file, const char *op)
  255. {
  256. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/as_ops.c linux-5.16.14/fs/reiser4/as_ops.c
  257. --- linux-5.16.14.orig/fs/reiser4/as_ops.c 1970-01-01 01:00:00.000000000 +0100
  258. +++ linux-5.16.14/fs/reiser4/as_ops.c 2022-03-12 13:26:19.638892695 +0100
  259. @@ -0,0 +1,354 @@
  260. +/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
  261. +
  262. +/* Interface to VFS. Reiser4 address_space_operations are defined here. */
  263. +
  264. +#include "forward.h"
  265. +#include "debug.h"
  266. +#include "dformat.h"
  267. +#include "coord.h"
  268. +#include "plugin/item/item.h"
  269. +#include "plugin/file/file.h"
  270. +#include "plugin/security/perm.h"
  271. +#include "plugin/disk_format/disk_format.h"
  272. +#include "plugin/plugin.h"
  273. +#include "plugin/plugin_set.h"
  274. +#include "plugin/object.h"
  275. +#include "txnmgr.h"
  276. +#include "jnode.h"
  277. +#include "znode.h"
  278. +#include "block_alloc.h"
  279. +#include "tree.h"
  280. +#include "vfs_ops.h"
  281. +#include "inode.h"
  282. +#include "page_cache.h"
  283. +#include "ktxnmgrd.h"
  284. +#include "super.h"
  285. +#include "reiser4.h"
  286. +#include "entd.h"
  287. +
  288. +#include <linux/profile.h>
  289. +#include <linux/types.h>
  290. +#include <linux/mount.h>
  291. +#include <linux/vfs.h>
  292. +#include <linux/mm.h>
  293. +#include <linux/buffer_head.h>
  294. +#include <linux/dcache.h>
  295. +#include <linux/list.h>
  296. +#include <linux/pagemap.h>
  297. +#include <linux/slab.h>
  298. +#include <linux/seq_file.h>
  299. +#include <linux/init.h>
  300. +#include <linux/module.h>
  301. +#include <linux/writeback.h>
  302. +#include <linux/backing-dev.h>
  303. +#include <linux/security.h>
  304. +#include <linux/migrate.h>
  305. +
  306. +/* address space operations */
  307. +
  308. +/**
  309. + * reiser4_set_page_dirty - set dirty bit, tag in page tree, dirty accounting
  310. + * @page: page to be dirtied
  311. + *
  312. + * Operation of struct address_space_operations. This implementation is used by
  313. + * unix and cryptcompress file plugins.
  314. + *
  315. + * This is called when reiser4 page gets dirtied outside of reiser4, for
  316. + * example, when dirty bit is moved from pte to physical page.
  317. + *
  318. + * Tags page in the mapping's page tree with special tag so that it is possible
  319. + * to do all the reiser4 specific work wrt dirty pages (jnode creation,
  320. + * capturing by an atom) later because it can not be done in the contexts where
  321. + * set_page_dirty is called.
  322. + */
  323. +int reiser4_set_page_dirty(struct page *page)
  324. +{
  325. + /* this page can be unformatted only */
  326. + assert("vs-1734", (page->mapping &&
  327. + page->mapping->host &&
  328. + reiser4_get_super_fake(page->mapping->host->i_sb) !=
  329. + page->mapping->host &&
  330. + reiser4_get_cc_fake(page->mapping->host->i_sb) !=
  331. + page->mapping->host &&
  332. + reiser4_get_bitmap_fake(page->mapping->host->i_sb) !=
  333. + page->mapping->host));
  334. + return __set_page_dirty_nobuffers(page);
  335. +}
  336. +
  337. +/* ->invalidatepage method for reiser4 */
  338. +
  339. +/*
  340. + * this is called for each truncated page from
  341. + * truncate_inode_pages()->truncate_{complete,partial}_page().
  342. + *
  343. + * At the moment of call, page is under lock, and outstanding io (if any) has
  344. + * completed.
  345. + */
  346. +
  347. +/**
  348. + * reiser4_invalidatepage
  349. + * @page: page to invalidate
  350. + * @offset: starting offset for partial invalidation
  351. + *
  352. + */
  353. +void reiser4_invalidatepage(struct page *page, unsigned int offset, unsigned int length)
  354. +{
  355. + int ret = 0;
  356. + int partial_page = (offset || length < PAGE_SIZE);
  357. + reiser4_context *ctx;
  358. + struct inode *inode;
  359. + jnode *node;
  360. +
  361. + /*
  362. + * This is called to truncate file's page.
  363. + *
  364. + * Originally, reiser4 implemented truncate in a standard way
  365. + * (vmtruncate() calls ->invalidatepage() on all truncated pages
  366. + * first, then file system ->truncate() call-back is invoked).
  367. + *
  368. + * This lead to the problem when ->invalidatepage() was called on a
  369. + * page with jnode that was captured into atom in ASTAGE_PRE_COMMIT
  370. + * process. That is, truncate was bypassing transactions. To avoid
  371. + * this, try_capture_page_to_invalidate() call was added here.
  372. + *
  373. + * After many troubles with vmtruncate() based truncate (including
  374. + * races with flush, tail conversion, etc.) it was re-written in the
  375. + * top-to-bottom style: items are killed in reiser4_cut_tree_object()
  376. + * and pages belonging to extent are invalidated in kill_hook_extent().
  377. + * So probably now additional call to capture is not needed here.
  378. + */
  379. +
  380. + assert("nikita-3137", PageLocked(page));
  381. + assert("nikita-3138", !PageWriteback(page));
  382. + inode = page->mapping->host;
  383. +
  384. + /*
  385. + * ->invalidatepage() should only be called for the unformatted
  386. + * jnodes. Destruction of all other types of jnodes is performed
  387. + * separately. But, during some corner cases (like handling errors
  388. + * during mount) it is simpler to let ->invalidatepage to be called on
  389. + * them. Check for this, and do nothing.
  390. + */
  391. + if (reiser4_get_super_fake(inode->i_sb) == inode)
  392. + return;
  393. + if (reiser4_get_cc_fake(inode->i_sb) == inode)
  394. + return;
  395. + if (reiser4_get_bitmap_fake(inode->i_sb) == inode)
  396. + return;
  397. + assert("vs-1426", PagePrivate(page));
  398. + assert("vs-1427",
  399. + page->mapping == jnode_get_mapping(jnode_by_page(page)));
  400. + assert("", jprivate(page) != NULL);
  401. + assert("", ergo(inode_file_plugin(inode) !=
  402. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID),
  403. + offset == 0));
  404. +
  405. + ctx = reiser4_init_context(inode->i_sb);
  406. + if (IS_ERR(ctx))
  407. + return;
  408. +
  409. + node = jprivate(page);
  410. + spin_lock_jnode(node);
  411. + if (!(node->state & ((1 << JNODE_DIRTY) | (1 << JNODE_FLUSH_QUEUED) |
  412. + (1 << JNODE_WRITEBACK) | (1 << JNODE_OVRWR)))) {
  413. + /* there is not need to capture */
  414. + jref(node);
  415. + JF_SET(node, JNODE_HEARD_BANSHEE);
  416. + page_clear_jnode(page, node);
  417. + reiser4_uncapture_jnode(node);
  418. + unhash_unformatted_jnode(node);
  419. + jput(node);
  420. + reiser4_exit_context(ctx);
  421. + return;
  422. + }
  423. + spin_unlock_jnode(node);
  424. +
  425. + /* capture page being truncated. */
  426. + ret = try_capture_page_to_invalidate(page);
  427. + if (ret != 0)
  428. + warning("nikita-3141", "Cannot capture: %i", ret);
  429. +
  430. + if (!partial_page) {
  431. + /* remove jnode from transaction and detach it from page. */
  432. + jref(node);
  433. + JF_SET(node, JNODE_HEARD_BANSHEE);
  434. + /* page cannot be detached from jnode concurrently, because it
  435. + * is locked */
  436. + reiser4_uncapture_page(page);
  437. +
  438. + /* this detaches page from jnode, so that jdelete will not try
  439. + * to lock page which is already locked */
  440. + spin_lock_jnode(node);
  441. + page_clear_jnode(page, node);
  442. + spin_unlock_jnode(node);
  443. + unhash_unformatted_jnode(node);
  444. +
  445. + jput(node);
  446. + }
  447. +
  448. + reiser4_exit_context(ctx);
  449. +}
  450. +
  451. +/* help function called from reiser4_releasepage(). It returns true if jnode
  452. + * can be detached from its page and page released. */
  453. +int jnode_is_releasable(jnode * node/* node to check */)
  454. +{
  455. + assert("nikita-2781", node != NULL);
  456. + assert_spin_locked(&(node->guard));
  457. + assert_spin_locked(&(node->load));
  458. +
  459. + /* is some thread is currently using jnode page, later cannot be
  460. + * detached */
  461. + if (atomic_read(&node->d_count) != 0)
  462. + return 0;
  463. +
  464. + assert("vs-1214", !jnode_is_loaded(node));
  465. +
  466. + /*
  467. + * can only release page if real block number is assigned to it. Simple
  468. + * check for ->atom wouldn't do, because it is possible for node to be
  469. + * clean, not it atom yet, and still having fake block number. For
  470. + * example, node just created in jinit_new().
  471. + */
  472. + if (reiser4_blocknr_is_fake(jnode_get_block(node)))
  473. + return 0;
  474. +
  475. + /*
  476. + * pages prepared for write can not be released anyway, so avoid
  477. + * detaching jnode from the page
  478. + */
  479. + if (JF_ISSET(node, JNODE_WRITE_PREPARED))
  480. + return 0;
  481. +
  482. + /*
  483. + * dirty jnode cannot be released. It can however be submitted to disk
  484. + * as part of early flushing, but only after getting flush-prepped.
  485. + */
  486. + if (JF_ISSET(node, JNODE_DIRTY))
  487. + return 0;
  488. +
  489. + /* overwrite set is only written by log writer. */
  490. + if (JF_ISSET(node, JNODE_OVRWR))
  491. + return 0;
  492. +
  493. + /* jnode is already under writeback */
  494. + if (JF_ISSET(node, JNODE_WRITEBACK))
  495. + return 0;
  496. +
  497. + /* don't flush bitmaps or journal records */
  498. + if (!jnode_is_znode(node) && !jnode_is_unformatted(node))
  499. + return 0;
  500. +
  501. + return 1;
  502. +}
  503. +
  504. +/*
  505. + * ->releasepage method for reiser4
  506. + *
  507. + * This is called by VM scanner when it comes across clean page. What we have
  508. + * to do here is to check whether page can really be released (freed that is)
  509. + * and if so, detach jnode from it and remove page from the page cache.
  510. + *
  511. + * Check for releasability is done by releasable() function.
  512. + */
  513. +int reiser4_releasepage(struct page *page, gfp_t gfp UNUSED_ARG)
  514. +{
  515. + jnode *node;
  516. +
  517. + assert("nikita-2257", PagePrivate(page));
  518. + assert("nikita-2259", PageLocked(page));
  519. + assert("nikita-2892", !PageWriteback(page));
  520. + assert("nikita-3019", reiser4_schedulable());
  521. +
  522. + /* NOTE-NIKITA: this can be called in the context of reiser4 call. It
  523. + is not clear what to do in this case. A lot of deadlocks seems be
  524. + possible. */
  525. +
  526. + node = jnode_by_page(page);
  527. + assert("nikita-2258", node != NULL);
  528. + assert("reiser4-4", page->mapping != NULL);
  529. + assert("reiser4-5", page->mapping->host != NULL);
  530. +
  531. + if (PageDirty(page))
  532. + return 0;
  533. +
  534. + /* extra page reference is used by reiser4 to protect
  535. + * jnode<->page link from this ->releasepage(). */
  536. + if (page_count(page) > 3)
  537. + return 0;
  538. +
  539. + /* releasable() needs jnode lock, because it looks at the jnode fields
  540. + * and we need jload_lock here to avoid races with jload(). */
  541. + spin_lock_jnode(node);
  542. + spin_lock(&(node->load));
  543. + if (jnode_is_releasable(node)) {
  544. + struct address_space *mapping;
  545. +
  546. + mapping = page->mapping;
  547. + jref(node);
  548. + /* there is no need to synchronize against
  549. + * jnode_extent_write() here, because pages seen by
  550. + * jnode_extent_write() are !releasable(). */
  551. + page_clear_jnode(page, node);
  552. + spin_unlock(&(node->load));
  553. + spin_unlock_jnode(node);
  554. +
  555. + /* we are under memory pressure so release jnode also. */
  556. + jput(node);
  557. +
  558. + return 1;
  559. + } else {
  560. + spin_unlock(&(node->load));
  561. + spin_unlock_jnode(node);
  562. + assert("nikita-3020", reiser4_schedulable());
  563. + return 0;
  564. + }
  565. +}
  566. +
  567. +#ifdef CONFIG_MIGRATION
  568. +int reiser4_migratepage(struct address_space *mapping, struct page *newpage,
  569. + struct page *page, enum migrate_mode mode)
  570. +{
  571. + /* TODO: implement movable mapping
  572. + */
  573. + return -EIO;
  574. +}
  575. +#endif /* CONFIG_MIGRATION */
  576. +
  577. +int reiser4_readpage_dispatch(struct file *file, struct page *page)
  578. +{
  579. + assert("edward-1533", PageLocked(page));
  580. + assert("edward-1534", !PageUptodate(page));
  581. + assert("edward-1535", page->mapping && page->mapping->host);
  582. +
  583. + return inode_file_plugin(page->mapping->host)->readpage(file, page);
  584. +}
  585. +
  586. +int reiser4_readpages_dispatch(struct file *file, struct address_space *mapping,
  587. + struct list_head *pages, unsigned nr_pages)
  588. +{
  589. + return inode_file_plugin(mapping->host)->readpages(file, mapping,
  590. + pages, nr_pages);
  591. +}
  592. +
  593. +int reiser4_writepages_dispatch(struct address_space *mapping,
  594. + struct writeback_control *wbc)
  595. +{
  596. + return inode_file_plugin(mapping->host)->writepages(mapping, wbc);
  597. +}
  598. +
  599. +int reiser4_writepages_directory(struct address_space *mapping,
  600. + struct writeback_control *wbc)
  601. +{
  602. + return 0;
  603. +}
  604. +
  605. +/* Make Linus happy.
  606. + Local variables:
  607. + c-indentation-style: "K&R"
  608. + mode-name: "LC"
  609. + c-basic-offset: 8
  610. + tab-width: 8
  611. + fill-column: 120
  612. + End:
  613. +*/
  614. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/block_alloc.c linux-5.16.14/fs/reiser4/block_alloc.c
  615. --- linux-5.16.14.orig/fs/reiser4/block_alloc.c 1970-01-01 01:00:00.000000000 +0100
  616. +++ linux-5.16.14/fs/reiser4/block_alloc.c 2022-03-12 13:26:19.639892697 +0100
  617. @@ -0,0 +1,1177 @@
  618. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  619. +reiser4/README */
  620. +
  621. +#include "debug.h"
  622. +#include "dformat.h"
  623. +#include "plugin/plugin.h"
  624. +#include "txnmgr.h"
  625. +#include "znode.h"
  626. +#include "block_alloc.h"
  627. +#include "tree.h"
  628. +#include "super.h"
  629. +#include "discard.h"
  630. +
  631. +#include <linux/types.h> /* for __u?? */
  632. +#include <linux/fs.h> /* for struct super_block */
  633. +#include <linux/spinlock.h>
  634. +
  635. +/* THE REISER4 DISK SPACE RESERVATION SCHEME. */
  636. +
  637. +/* We need to be able to reserve enough disk space to ensure that an atomic
  638. + operation will have enough disk space to flush (see flush.c and
  639. + http://namesys.com/v4/v4.html) and commit it once it is started.
  640. +
  641. + In our design a call for reserving disk space may fail but not an actual
  642. + block allocation.
  643. +
  644. + All free blocks, already allocated blocks, and all kinds of reserved blocks
  645. + are counted in different per-fs block counters.
  646. +
  647. + A reiser4 super block's set of block counters currently is:
  648. +
  649. + free -- free blocks,
  650. + used -- already allocated blocks,
  651. +
  652. + grabbed -- initially reserved for performing an fs operation, those blocks
  653. + are taken from free blocks, then grabbed disk space leaks from grabbed
  654. + blocks counter to other counters like "fake allocated", "flush
  655. + reserved", "used", the rest of not used grabbed space is returned to
  656. + free space at the end of fs operation;
  657. +
  658. + fake allocated -- counts all nodes without real disk block numbers assigned,
  659. + we have separate accounting for formatted and unformatted
  660. + nodes (for easier debugging);
  661. +
  662. + flush reserved -- disk space needed for flushing and committing an atom.
  663. + Each dirty already allocated block could be written as a
  664. + part of atom's overwrite set or as a part of atom's
  665. + relocate set. In both case one additional block is needed,
  666. + it is used as a wandered block if we do overwrite or as a
  667. + new location for a relocated block.
  668. +
  669. + In addition, blocks in some states are counted on per-thread and per-atom
  670. + basis. A reiser4 context has a counter of blocks grabbed by this transaction
  671. + and the sb's grabbed blocks counter is a sum of grabbed blocks counter values
  672. + of each reiser4 context. Each reiser4 atom has a counter of "flush reserved"
  673. + blocks, which are reserved for flush processing and atom commit. */
  674. +
  675. +/* AN EXAMPLE: suppose we insert new item to the reiser4 tree. We estimate
  676. + number of blocks to grab for most expensive case of balancing when the leaf
  677. + node we insert new item to gets split and new leaf node is allocated.
  678. +
  679. + So, we need to grab blocks for
  680. +
  681. + 1) one block for possible dirtying the node we insert an item to. That block
  682. + would be used for node relocation at flush time or for allocating of a
  683. + wandered one, it depends what will be a result (what set, relocate or
  684. + overwrite the node gets assigned to) of the node processing by the flush
  685. + algorithm.
  686. +
  687. + 2) one block for either allocating a new node, or dirtying of right or left
  688. + clean neighbor, only one case may happen.
  689. +
  690. + VS-FIXME-HANS: why can only one case happen? I would expect to see dirtying
  691. + of left neighbor, right neighbor, current node, and creation of new node.
  692. + Have I forgotten something? email me.
  693. +
  694. + These grabbed blocks are counted in both reiser4 context "grabbed blocks"
  695. + counter and in the fs-wide one (both ctx->grabbed_blocks and
  696. + sbinfo->blocks_grabbed get incremented by 2), sb's free blocks counter is
  697. + decremented by 2.
  698. +
  699. + Suppose both two blocks were spent for dirtying of an already allocated clean
  700. + node (one block went from "grabbed" to "flush reserved") and for new block
  701. + allocating (one block went from "grabbed" to "fake allocated formatted").
  702. +
  703. + Inserting of a child pointer to the parent node caused parent node to be
  704. + split, the balancing code takes care about this grabbing necessary space
  705. + immediately by calling reiser4_grab with BA_RESERVED flag set which means
  706. + "can use the 5% reserved disk space".
  707. +
  708. + At this moment insertion completes and grabbed blocks (if they were not used)
  709. + should be returned to the free space counter.
  710. +
  711. + However the atom life-cycle is not completed. The atom had one "flush
  712. + reserved" block added by our insertion and the new fake allocated node is
  713. + counted as a "fake allocated formatted" one. The atom has to be fully
  714. + processed by flush before commit. Suppose that the flush moved the first,
  715. + already allocated node to the atom's overwrite list, the new fake allocated
  716. + node, obviously, went into the atom relocate set. The reiser4 flush
  717. + allocates the new node using one unit from "fake allocated formatted"
  718. + counter, the log writer uses one from "flush reserved" for wandered block
  719. + allocation.
  720. +
  721. + And, it is not the end. When the wandered block is deallocated after the
  722. + atom gets fully played (see wander.c for term description), the disk space
  723. + occupied for it is returned to free blocks. */
  724. +
  725. +/* BLOCK NUMBERS */
  726. +
  727. +/* Any reiser4 node has a block number assigned to it. We use these numbers for
  728. + indexing in hash tables, so if a block has not yet been assigned a location
  729. + on disk we need to give it a temporary fake block number.
  730. +
  731. + Current implementation of reiser4 uses 64-bit integers for block numbers. We
  732. + use highest bit in 64-bit block number to distinguish fake and real block
  733. + numbers. So, only 63 bits may be used to addressing of real device
  734. + blocks. That "fake" block numbers space is divided into subspaces of fake
  735. + block numbers for data blocks and for shadow (working) bitmap blocks.
  736. +
  737. + Fake block numbers for data blocks are generated by a cyclic counter, which
  738. + gets incremented after each real block allocation. We assume that it is
  739. + impossible to overload this counter during one transaction life. */
  740. +
  741. +/* Initialize a blocknr hint. */
  742. +void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint)
  743. +{
  744. + memset(hint, 0, sizeof(reiser4_blocknr_hint));
  745. +}
  746. +
  747. +/* Release any resources of a blocknr hint. */
  748. +void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint UNUSED_ARG)
  749. +{
  750. +/* No resources should be freed in current blocknr_hint implementation. */
  751. +}
  752. +
  753. +/* see above for explanation of fake block number. */
  754. +/* Audited by: green(2002.06.11) */
  755. +int reiser4_blocknr_is_fake(const reiser4_block_nr * da)
  756. +{
  757. + /* The reason for not simply returning result of '&' operation is that
  758. + while return value is (possibly 32bit) int, the reiser4_block_nr is
  759. + at least 64 bits long, and high bit (which is the only possible
  760. + non zero bit after the masking) would be stripped off */
  761. + return (*da & REISER4_FAKE_BLOCKNR_BIT_MASK) ? 1 : 0;
  762. +}
  763. +
  764. +/* Static functions for <reiser4 super block>/<reiser4 context> block counters
  765. + arithmetic. Mostly, they are isolated to not to code same assertions in
  766. + several places. */
  767. +static void sub_from_ctx_grabbed(reiser4_context * ctx, __u64 count)
  768. +{
  769. + BUG_ON(ctx->grabbed_blocks < count);
  770. + assert("zam-527", ctx->grabbed_blocks >= count);
  771. + ctx->grabbed_blocks -= count;
  772. +}
  773. +
  774. +static void add_to_ctx_grabbed(reiser4_context * ctx, __u64 count)
  775. +{
  776. + ctx->grabbed_blocks += count;
  777. +}
  778. +
  779. +static void sub_from_sb_grabbed(reiser4_super_info_data * sbinfo, __u64 count)
  780. +{
  781. + assert("zam-525", sbinfo->blocks_grabbed >= count);
  782. + sbinfo->blocks_grabbed -= count;
  783. +}
  784. +
  785. +/* Decrease the counter of block reserved for flush in super block. */
  786. +static void
  787. +sub_from_sb_flush_reserved(reiser4_super_info_data * sbinfo, __u64 count)
  788. +{
  789. + assert("vpf-291", sbinfo->blocks_flush_reserved >= count);
  790. + sbinfo->blocks_flush_reserved -= count;
  791. +}
  792. +
  793. +static void
  794. +sub_from_sb_fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
  795. + reiser4_ba_flags_t flags)
  796. +{
  797. + if (flags & BA_FORMATTED) {
  798. + assert("zam-806", sbinfo->blocks_fake_allocated >= count);
  799. + sbinfo->blocks_fake_allocated -= count;
  800. + } else {
  801. + assert("zam-528",
  802. + sbinfo->blocks_fake_allocated_unformatted >= count);
  803. + sbinfo->blocks_fake_allocated_unformatted -= count;
  804. + }
  805. +}
  806. +
  807. +static void sub_from_sb_used(reiser4_super_info_data * sbinfo, __u64 count)
  808. +{
  809. + assert("zam-530",
  810. + sbinfo->blocks_used >= count + sbinfo->min_blocks_used);
  811. + sbinfo->blocks_used -= count;
  812. +}
  813. +
  814. +static void
  815. +sub_from_cluster_reserved(reiser4_super_info_data * sbinfo, __u64 count)
  816. +{
  817. + assert("edward-501", sbinfo->blocks_clustered >= count);
  818. + sbinfo->blocks_clustered -= count;
  819. +}
  820. +
  821. +/* Increase the counter of block reserved for flush in atom. */
  822. +static void add_to_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
  823. +{
  824. + assert("zam-772", atom != NULL);
  825. + assert_spin_locked(&(atom->alock));
  826. + atom->flush_reserved += count;
  827. +}
  828. +
  829. +/* Decrease the counter of block reserved for flush in atom. */
  830. +static void sub_from_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
  831. +{
  832. + assert("zam-774", atom != NULL);
  833. + assert_spin_locked(&(atom->alock));
  834. + assert("nikita-2790", atom->flush_reserved >= count);
  835. + atom->flush_reserved -= count;
  836. +}
  837. +
  838. +/* super block has 6 counters: free, used, grabbed, fake allocated
  839. + (formatted and unformatted) and flush reserved. Their sum must be
  840. + number of blocks on a device. This function checks this */
  841. +int reiser4_check_block_counters(const struct super_block *super)
  842. +{
  843. + __u64 sum;
  844. +
  845. + sum = reiser4_grabbed_blocks(super) + reiser4_free_blocks(super) +
  846. + reiser4_data_blocks(super) + reiser4_fake_allocated(super) +
  847. + reiser4_fake_allocated_unformatted(super) + reiser4_flush_reserved(super) +
  848. + reiser4_clustered_blocks(super);
  849. + if (reiser4_block_count(super) != sum) {
  850. + printk("super block counters: "
  851. + "used %llu, free %llu, "
  852. + "grabbed %llu, fake allocated (formatetd %llu, unformatted %llu), "
  853. + "reserved %llu, clustered %llu, sum %llu, must be (block count) %llu\n",
  854. + (unsigned long long)reiser4_data_blocks(super),
  855. + (unsigned long long)reiser4_free_blocks(super),
  856. + (unsigned long long)reiser4_grabbed_blocks(super),
  857. + (unsigned long long)reiser4_fake_allocated(super),
  858. + (unsigned long long)
  859. + reiser4_fake_allocated_unformatted(super),
  860. + (unsigned long long)reiser4_flush_reserved(super),
  861. + (unsigned long long)reiser4_clustered_blocks(super),
  862. + (unsigned long long)sum,
  863. + (unsigned long long)reiser4_block_count(super));
  864. + return 0;
  865. + }
  866. + return 1;
  867. +}
  868. +
  869. +/* Adjust "working" free blocks counter for number of blocks we are going to
  870. + allocate. Record number of grabbed blocks in fs-wide and per-thread
  871. + counters. This function should be called before bitmap scanning or
  872. + allocating fake block numbers
  873. +
  874. + @super -- pointer to reiser4 super block;
  875. + @count -- number of blocks we reserve;
  876. +
  877. + @return -- 0 if success, -ENOSPC, if all
  878. + free blocks are preserved or already allocated.
  879. +*/
  880. +
  881. +static int
  882. +reiser4_grab(reiser4_context * ctx, __u64 count, reiser4_ba_flags_t flags)
  883. +{
  884. + __u64 free_blocks;
  885. + int ret = 0, use_reserved = flags & BA_RESERVED;
  886. + reiser4_super_info_data *sbinfo;
  887. +
  888. + assert("vs-1276", ctx == get_current_context());
  889. +
  890. + /* Do not grab anything on ro-mounted fs. */
  891. + if (sb_rdonly(ctx->super)) {
  892. + ctx->grab_enabled = 0;
  893. + ctx->ro = 1;
  894. + return 0;
  895. + }
  896. +
  897. + sbinfo = get_super_private(ctx->super);
  898. +
  899. + spin_lock_reiser4_super(sbinfo);
  900. +
  901. + free_blocks = sbinfo->blocks_free;
  902. +
  903. + if ((use_reserved && free_blocks < count) ||
  904. + (!use_reserved && free_blocks < count + sbinfo->blocks_reserved)) {
  905. + ret = RETERR(-ENOSPC);
  906. + goto unlock_and_ret;
  907. + }
  908. +
  909. + add_to_ctx_grabbed(ctx, count);
  910. +
  911. + sbinfo->blocks_grabbed += count;
  912. + sbinfo->blocks_free -= count;
  913. +
  914. +#if REISER4_DEBUG
  915. + if (ctx->grabbed_initially == 0)
  916. + ctx->grabbed_initially = count;
  917. +#endif
  918. +
  919. + assert("nikita-2986", reiser4_check_block_counters(ctx->super));
  920. +
  921. + /* disable grab space in current context */
  922. + ctx->grab_enabled = 0;
  923. +
  924. +unlock_and_ret:
  925. + spin_unlock_reiser4_super(sbinfo);
  926. +
  927. + return ret;
  928. +}
  929. +
  930. +int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags)
  931. +{
  932. + int ret;
  933. + reiser4_context *ctx;
  934. +
  935. + assert("nikita-2964", ergo(flags & BA_CAN_COMMIT,
  936. + lock_stack_isclean(get_current_lock_stack
  937. + ())));
  938. + ctx = get_current_context();
  939. + if (!(flags & BA_FORCE) && !is_grab_enabled(ctx))
  940. + return 0;
  941. +
  942. + ret = reiser4_grab(ctx, count, flags);
  943. + if (ret == -ENOSPC) {
  944. +
  945. + /* Trying to commit the all transactions if BA_CAN_COMMIT flag
  946. + present */
  947. + if (flags & BA_CAN_COMMIT) {
  948. + txnmgr_force_commit_all(ctx->super, 0);
  949. + ctx->grab_enabled = 1;
  950. + ret = reiser4_grab(ctx, count, flags);
  951. + }
  952. + }
  953. + /*
  954. + * allocation from reserved pool cannot fail. This is severe error.
  955. + */
  956. + assert("nikita-3005", ergo(flags & BA_RESERVED, ret == 0));
  957. + return ret;
  958. +}
  959. +
  960. +/*
  961. + * SPACE RESERVED FOR UNLINK/TRUNCATE
  962. + *
  963. + * Unlink and truncate require space in transaction (to update stat data, at
  964. + * least). But we don't want rm(1) to fail with "No space on device" error.
  965. + *
  966. + * Solution is to reserve 5% of disk space for truncates and
  967. + * unlinks. Specifically, normal space grabbing requests don't grab space from
  968. + * reserved area. Only requests with BA_RESERVED bit in flags are allowed to
  969. + * drain it. Per super block delete mutex is used to allow only one
  970. + * thread at a time to grab from reserved area.
  971. + *
  972. + * Grabbing from reserved area should always be performed with BA_CAN_COMMIT
  973. + * flag.
  974. + *
  975. + */
  976. +
  977. +int reiser4_grab_reserved(struct super_block *super,
  978. + __u64 count, reiser4_ba_flags_t flags)
  979. +{
  980. + reiser4_super_info_data *sbinfo = get_super_private(super);
  981. +
  982. + assert("nikita-3175", flags & BA_CAN_COMMIT);
  983. +
  984. + /* Check the delete mutex already taken by us, we assume that
  985. + * reading of machine word is atomic. */
  986. + if (sbinfo->delete_mutex_owner == current) {
  987. + if (reiser4_grab_space
  988. + (count, (flags | BA_RESERVED) & ~BA_CAN_COMMIT)) {
  989. + warning("zam-1003",
  990. + "nested call of grab_reserved fails count=(%llu)",
  991. + (unsigned long long)count);
  992. + reiser4_release_reserved(super);
  993. + return RETERR(-ENOSPC);
  994. + }
  995. + return 0;
  996. + }
  997. +
  998. + if (reiser4_grab_space(count, flags)) {
  999. + mutex_lock(&sbinfo->delete_mutex);
  1000. + assert("nikita-2929", sbinfo->delete_mutex_owner == NULL);
  1001. + sbinfo->delete_mutex_owner = current;
  1002. +
  1003. + if (reiser4_grab_space(count, flags | BA_RESERVED)) {
  1004. + warning("zam-833",
  1005. + "reserved space is not enough (%llu)",
  1006. + (unsigned long long)count);
  1007. + reiser4_release_reserved(super);
  1008. + return RETERR(-ENOSPC);
  1009. + }
  1010. + }
  1011. + return 0;
  1012. +}
  1013. +
  1014. +void reiser4_release_reserved(struct super_block *super)
  1015. +{
  1016. + reiser4_super_info_data *info;
  1017. +
  1018. + info = get_super_private(super);
  1019. + if (info->delete_mutex_owner == current) {
  1020. + info->delete_mutex_owner = NULL;
  1021. + mutex_unlock(&info->delete_mutex);
  1022. + }
  1023. +}
  1024. +
  1025. +static reiser4_super_info_data *grabbed2fake_allocated_head(int count)
  1026. +{
  1027. + reiser4_context *ctx;
  1028. + reiser4_super_info_data *sbinfo;
  1029. +
  1030. + ctx = get_current_context();
  1031. + sub_from_ctx_grabbed(ctx, count);
  1032. +
  1033. + sbinfo = get_super_private(ctx->super);
  1034. + spin_lock_reiser4_super(sbinfo);
  1035. +
  1036. + sub_from_sb_grabbed(sbinfo, count);
  1037. + /* return sbinfo locked */
  1038. + return sbinfo;
  1039. +}
  1040. +
  1041. +/* is called after @count fake block numbers are allocated and pointer to
  1042. + those blocks are inserted into tree. */
  1043. +static void grabbed2fake_allocated_formatted(void)
  1044. +{
  1045. + reiser4_super_info_data *sbinfo;
  1046. +
  1047. + sbinfo = grabbed2fake_allocated_head(1);
  1048. + sbinfo->blocks_fake_allocated++;
  1049. +
  1050. + assert("vs-922", reiser4_check_block_counters(reiser4_get_current_sb()));
  1051. +
  1052. + spin_unlock_reiser4_super(sbinfo);
  1053. +}
  1054. +
  1055. +/**
  1056. + * grabbed2fake_allocated_unformatted
  1057. + * @count:
  1058. + *
  1059. + */
  1060. +static void grabbed2fake_allocated_unformatted(int count)
  1061. +{
  1062. + reiser4_super_info_data *sbinfo;
  1063. +
  1064. + sbinfo = grabbed2fake_allocated_head(count);
  1065. + sbinfo->blocks_fake_allocated_unformatted += count;
  1066. +
  1067. + assert("vs-9221", reiser4_check_block_counters(reiser4_get_current_sb()));
  1068. +
  1069. + spin_unlock_reiser4_super(sbinfo);
  1070. +}
  1071. +
  1072. +void grabbed2cluster_reserved(int count)
  1073. +{
  1074. + reiser4_context *ctx;
  1075. + reiser4_super_info_data *sbinfo;
  1076. +
  1077. + ctx = get_current_context();
  1078. + sub_from_ctx_grabbed(ctx, count);
  1079. +
  1080. + sbinfo = get_super_private(ctx->super);
  1081. + spin_lock_reiser4_super(sbinfo);
  1082. +
  1083. + sub_from_sb_grabbed(sbinfo, count);
  1084. + sbinfo->blocks_clustered += count;
  1085. +
  1086. + assert("edward-504", reiser4_check_block_counters(ctx->super));
  1087. +
  1088. + spin_unlock_reiser4_super(sbinfo);
  1089. +}
  1090. +
  1091. +void cluster_reserved2grabbed(int count)
  1092. +{
  1093. + reiser4_context *ctx;
  1094. + reiser4_super_info_data *sbinfo;
  1095. +
  1096. + ctx = get_current_context();
  1097. +
  1098. + sbinfo = get_super_private(ctx->super);
  1099. + spin_lock_reiser4_super(sbinfo);
  1100. +
  1101. + sub_from_cluster_reserved(sbinfo, count);
  1102. + sbinfo->blocks_grabbed += count;
  1103. +
  1104. + assert("edward-505", reiser4_check_block_counters(ctx->super));
  1105. +
  1106. + spin_unlock_reiser4_super(sbinfo);
  1107. + add_to_ctx_grabbed(ctx, count);
  1108. +}
  1109. +
  1110. +void cluster_reserved2free(int count)
  1111. +{
  1112. + reiser4_context *ctx;
  1113. + reiser4_super_info_data *sbinfo;
  1114. +
  1115. + ctx = get_current_context();
  1116. + sbinfo = get_super_private(ctx->super);
  1117. +
  1118. + cluster_reserved2grabbed(count);
  1119. + grabbed2free(ctx, sbinfo, count);
  1120. +}
  1121. +
  1122. +static DEFINE_SPINLOCK(fake_lock);
  1123. +static reiser4_block_nr fake_gen = 0;
  1124. +
  1125. +/**
  1126. + * assign_fake_blocknr
  1127. + * @blocknr:
  1128. + * @count:
  1129. + *
  1130. + * Obtain a fake block number for new node which will be used to refer to
  1131. + * this newly allocated node until real allocation is done.
  1132. + */
  1133. +static void assign_fake_blocknr(reiser4_block_nr *blocknr, int count)
  1134. +{
  1135. + spin_lock(&fake_lock);
  1136. + *blocknr = fake_gen;
  1137. + fake_gen += count;
  1138. + spin_unlock(&fake_lock);
  1139. +
  1140. + BUG_ON(*blocknr & REISER4_BLOCKNR_STATUS_BIT_MASK);
  1141. + /**blocknr &= ~REISER4_BLOCKNR_STATUS_BIT_MASK;*/
  1142. + *blocknr |= REISER4_UNALLOCATED_STATUS_VALUE;
  1143. + assert("zam-394", zlook(current_tree, blocknr) == NULL);
  1144. +}
  1145. +
  1146. +int assign_fake_blocknr_formatted(reiser4_block_nr * blocknr)
  1147. +{
  1148. + assign_fake_blocknr(blocknr, 1);
  1149. + grabbed2fake_allocated_formatted();
  1150. + return 0;
  1151. +}
  1152. +
  1153. +/**
  1154. + * fake_blocknrs_unformatted
  1155. + * @count: number of fake numbers to get
  1156. + *
  1157. + * Allocates @count fake block numbers which will be assigned to jnodes
  1158. + */
  1159. +reiser4_block_nr fake_blocknr_unformatted(int count)
  1160. +{
  1161. + reiser4_block_nr blocknr;
  1162. +
  1163. + assign_fake_blocknr(&blocknr, count);
  1164. + grabbed2fake_allocated_unformatted(count);
  1165. +
  1166. + return blocknr;
  1167. +}
  1168. +
  1169. +/* adjust sb block counters, if real (on-disk) block allocation immediately
  1170. + follows grabbing of free disk space. */
  1171. +static void grabbed2used(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
  1172. + __u64 count)
  1173. +{
  1174. + sub_from_ctx_grabbed(ctx, count);
  1175. +
  1176. + spin_lock_reiser4_super(sbinfo);
  1177. +
  1178. + sub_from_sb_grabbed(sbinfo, count);
  1179. + sbinfo->blocks_used += count;
  1180. +
  1181. + assert("nikita-2679", reiser4_check_block_counters(ctx->super));
  1182. +
  1183. + spin_unlock_reiser4_super(sbinfo);
  1184. +}
  1185. +
  1186. +/* adjust sb block counters when @count unallocated blocks get mapped to disk */
  1187. +static void fake_allocated2used(reiser4_super_info_data *sbinfo, __u64 count,
  1188. + reiser4_ba_flags_t flags)
  1189. +{
  1190. + spin_lock_reiser4_super(sbinfo);
  1191. +
  1192. + sub_from_sb_fake_allocated(sbinfo, count, flags);
  1193. + sbinfo->blocks_used += count;
  1194. +
  1195. + assert("nikita-2680",
  1196. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1197. +
  1198. + spin_unlock_reiser4_super(sbinfo);
  1199. +}
  1200. +
  1201. +static void flush_reserved2used(txn_atom * atom, __u64 count)
  1202. +{
  1203. + reiser4_super_info_data *sbinfo;
  1204. +
  1205. + assert("zam-787", atom != NULL);
  1206. + assert_spin_locked(&(atom->alock));
  1207. +
  1208. + sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
  1209. +
  1210. + sbinfo = get_current_super_private();
  1211. + spin_lock_reiser4_super(sbinfo);
  1212. +
  1213. + sub_from_sb_flush_reserved(sbinfo, count);
  1214. + sbinfo->blocks_used += count;
  1215. +
  1216. + assert("zam-789",
  1217. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1218. +
  1219. + spin_unlock_reiser4_super(sbinfo);
  1220. +}
  1221. +
  1222. +/* update the per fs blocknr hint default value. */
  1223. +void
  1224. +update_blocknr_hint_default(const struct super_block *s,
  1225. + const reiser4_block_nr * block)
  1226. +{
  1227. + reiser4_super_info_data *sbinfo = get_super_private(s);
  1228. +
  1229. + assert("nikita-3342", !reiser4_blocknr_is_fake(block));
  1230. +
  1231. + spin_lock_reiser4_super(sbinfo);
  1232. + if (*block < sbinfo->block_count) {
  1233. + sbinfo->blocknr_hint_default = *block;
  1234. + } else {
  1235. + warning("zam-676",
  1236. + "block number %llu is too large to be used in a blocknr hint\n",
  1237. + (unsigned long long)*block);
  1238. + dump_stack();
  1239. + DEBUGON(1);
  1240. + }
  1241. + spin_unlock_reiser4_super(sbinfo);
  1242. +}
  1243. +
  1244. +/* get current value of the default blocknr hint. */
  1245. +void get_blocknr_hint_default(reiser4_block_nr * result)
  1246. +{
  1247. + reiser4_super_info_data *sbinfo = get_current_super_private();
  1248. +
  1249. + spin_lock_reiser4_super(sbinfo);
  1250. + *result = sbinfo->blocknr_hint_default;
  1251. + assert("zam-677", *result < sbinfo->block_count);
  1252. + spin_unlock_reiser4_super(sbinfo);
  1253. +}
  1254. +
  1255. +/* Allocate "real" disk blocks by calling a proper space allocation plugin
  1256. + * method. Blocks are allocated in one contiguous disk region. The plugin
  1257. + * independent part accounts blocks by subtracting allocated amount from grabbed
  1258. + * or fake block counter and add the same amount to the counter of allocated
  1259. + * blocks.
  1260. + *
  1261. + * @hint -- a reiser4 blocknr hint object which contains further block
  1262. + * allocation hints and parameters (search start, a stage of block
  1263. + * which will be mapped to disk, etc.),
  1264. + * @blk -- an out parameter for the beginning of the allocated region,
  1265. + * @len -- in/out parameter, it should contain the maximum number of allocated
  1266. + * blocks, after block allocation completes, it contains the length of
  1267. + * allocated disk region.
  1268. + * @flags -- see reiser4_ba_flags_t description.
  1269. + *
  1270. + * @return -- 0 if success, error code otherwise.
  1271. + */
  1272. +int
  1273. +reiser4_alloc_blocks(reiser4_blocknr_hint * hint, reiser4_block_nr * blk,
  1274. + reiser4_block_nr * len, reiser4_ba_flags_t flags)
  1275. +{
  1276. + __u64 needed = *len;
  1277. + reiser4_context *ctx;
  1278. + reiser4_super_info_data *sbinfo;
  1279. + int ret;
  1280. +
  1281. + assert("zam-986", hint != NULL);
  1282. +
  1283. + ctx = get_current_context();
  1284. + sbinfo = get_super_private(ctx->super);
  1285. +
  1286. + /* For write-optimized data we use default search start value, which is
  1287. + * close to last write location. */
  1288. + if (flags & BA_USE_DEFAULT_SEARCH_START)
  1289. + get_blocknr_hint_default(&hint->blk);
  1290. +
  1291. + /* VITALY: allocator should grab this for internal/tx-lists/similar
  1292. + only. */
  1293. +/* VS-FIXME-HANS: why is this comment above addressed to vitaly (from vitaly)?*/
  1294. + if (hint->block_stage == BLOCK_NOT_COUNTED) {
  1295. + ret = reiser4_grab_space_force(*len, flags);
  1296. + if (ret != 0)
  1297. + return ret;
  1298. + }
  1299. +
  1300. + ret =
  1301. + sa_alloc_blocks(reiser4_get_space_allocator(ctx->super),
  1302. + hint, (int)needed, blk, len);
  1303. +
  1304. + if (!ret) {
  1305. + assert("zam-680", *blk < reiser4_block_count(ctx->super));
  1306. + assert("zam-681",
  1307. + *blk + *len <= reiser4_block_count(ctx->super));
  1308. +
  1309. + if (flags & BA_PERMANENT) {
  1310. + /* we assume that current atom exists at this moment */
  1311. + txn_atom *atom = get_current_atom_locked();
  1312. + atom->nr_blocks_allocated += *len;
  1313. + spin_unlock_atom(atom);
  1314. + }
  1315. +
  1316. + switch (hint->block_stage) {
  1317. + case BLOCK_NOT_COUNTED:
  1318. + case BLOCK_GRABBED:
  1319. + grabbed2used(ctx, sbinfo, *len);
  1320. + break;
  1321. + case BLOCK_UNALLOCATED:
  1322. + fake_allocated2used(sbinfo, *len, flags);
  1323. + break;
  1324. + case BLOCK_FLUSH_RESERVED:
  1325. + {
  1326. + txn_atom *atom = get_current_atom_locked();
  1327. + flush_reserved2used(atom, *len);
  1328. + spin_unlock_atom(atom);
  1329. + }
  1330. + break;
  1331. + default:
  1332. + impossible("zam-531", "wrong block stage");
  1333. + }
  1334. + } else {
  1335. + assert("zam-821",
  1336. + ergo(hint->max_dist == 0
  1337. + && !hint->backward, ret != -ENOSPC));
  1338. + if (hint->block_stage == BLOCK_NOT_COUNTED)
  1339. + grabbed2free(ctx, sbinfo, needed);
  1340. + }
  1341. +
  1342. + return ret;
  1343. +}
  1344. +
  1345. +/**
  1346. + * ask block allocator for some unformatted blocks
  1347. + */
  1348. +void allocate_blocks_unformatted(reiser4_blocknr_hint *preceder,
  1349. + reiser4_block_nr wanted_count,
  1350. + reiser4_block_nr *first_allocated,
  1351. + reiser4_block_nr *allocated,
  1352. + block_stage_t block_stage)
  1353. +{
  1354. + *allocated = wanted_count;
  1355. + preceder->max_dist = 0; /* scan whole disk, if needed */
  1356. +
  1357. + /* that number of blocks (wanted_count) is either in UNALLOCATED or in GRABBED */
  1358. + preceder->block_stage = block_stage;
  1359. +
  1360. + /* FIXME: we do not handle errors here now */
  1361. + check_me("vs-420",
  1362. + reiser4_alloc_blocks(preceder, first_allocated, allocated,
  1363. + BA_PERMANENT) == 0);
  1364. + /* update flush_pos's preceder to last allocated block number */
  1365. + preceder->blk = *first_allocated + *allocated - 1;
  1366. +}
  1367. +
  1368. +/* used -> fake_allocated -> grabbed -> free */
  1369. +
  1370. +/* adjust sb block counters when @count unallocated blocks get unmapped from
  1371. + disk */
  1372. +static void
  1373. +used2fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
  1374. + int formatted)
  1375. +{
  1376. + spin_lock_reiser4_super(sbinfo);
  1377. +
  1378. + if (formatted)
  1379. + sbinfo->blocks_fake_allocated += count;
  1380. + else
  1381. + sbinfo->blocks_fake_allocated_unformatted += count;
  1382. +
  1383. + sub_from_sb_used(sbinfo, count);
  1384. +
  1385. + assert("nikita-2681",
  1386. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1387. +
  1388. + spin_unlock_reiser4_super(sbinfo);
  1389. +}
  1390. +
  1391. +static void
  1392. +used2flush_reserved(reiser4_super_info_data * sbinfo, txn_atom * atom,
  1393. + __u64 count, reiser4_ba_flags_t flags UNUSED_ARG)
  1394. +{
  1395. + assert("nikita-2791", atom != NULL);
  1396. + assert_spin_locked(&(atom->alock));
  1397. +
  1398. + add_to_atom_flush_reserved_nolock(atom, (__u32) count);
  1399. +
  1400. + spin_lock_reiser4_super(sbinfo);
  1401. +
  1402. + sbinfo->blocks_flush_reserved += count;
  1403. + /*add_to_sb_flush_reserved(sbinfo, count); */
  1404. + sub_from_sb_used(sbinfo, count);
  1405. +
  1406. + assert("nikita-2681",
  1407. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1408. +
  1409. + spin_unlock_reiser4_super(sbinfo);
  1410. +}
  1411. +
  1412. +/* disk space, virtually used by fake block numbers is counted as "grabbed"
  1413. + again. */
  1414. +static void
  1415. +fake_allocated2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
  1416. + __u64 count, reiser4_ba_flags_t flags)
  1417. +{
  1418. + add_to_ctx_grabbed(ctx, count);
  1419. +
  1420. + spin_lock_reiser4_super(sbinfo);
  1421. +
  1422. + assert("nikita-2682", reiser4_check_block_counters(ctx->super));
  1423. +
  1424. + sbinfo->blocks_grabbed += count;
  1425. + sub_from_sb_fake_allocated(sbinfo, count, flags & BA_FORMATTED);
  1426. +
  1427. + assert("nikita-2683", reiser4_check_block_counters(ctx->super));
  1428. +
  1429. + spin_unlock_reiser4_super(sbinfo);
  1430. +}
  1431. +
  1432. +void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags)
  1433. +{
  1434. + reiser4_context *ctx;
  1435. + reiser4_super_info_data *sbinfo;
  1436. +
  1437. + ctx = get_current_context();
  1438. + sbinfo = get_super_private(ctx->super);
  1439. +
  1440. + fake_allocated2grabbed(ctx, sbinfo, count, flags);
  1441. + grabbed2free(ctx, sbinfo, count);
  1442. +}
  1443. +
  1444. +void grabbed2free_mark(__u64 mark)
  1445. +{
  1446. + reiser4_context *ctx;
  1447. + reiser4_super_info_data *sbinfo;
  1448. +
  1449. + ctx = get_current_context();
  1450. + sbinfo = get_super_private(ctx->super);
  1451. +
  1452. + assert("nikita-3007", (__s64) mark >= 0);
  1453. + assert("nikita-3006", ctx->grabbed_blocks >= mark);
  1454. + grabbed2free(ctx, sbinfo, ctx->grabbed_blocks - mark);
  1455. +}
  1456. +
  1457. +/**
  1458. + * grabbed2free - adjust grabbed and free block counters
  1459. + * @ctx: context to update grabbed block counter of
  1460. + * @sbinfo: super block to update grabbed and free block counters of
  1461. + * @count: number of blocks to adjust counters by
  1462. + *
  1463. + * Decreases context's and per filesystem's counters of grabbed
  1464. + * blocks. Increases per filesystem's counter of free blocks.
  1465. + */
  1466. +void grabbed2free(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
  1467. + __u64 count)
  1468. +{
  1469. + sub_from_ctx_grabbed(ctx, count);
  1470. +
  1471. + spin_lock_reiser4_super(sbinfo);
  1472. +
  1473. + sub_from_sb_grabbed(sbinfo, count);
  1474. + sbinfo->blocks_free += count;
  1475. + assert("nikita-2684", reiser4_check_block_counters(ctx->super));
  1476. +
  1477. + spin_unlock_reiser4_super(sbinfo);
  1478. +}
  1479. +
  1480. +void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count)
  1481. +{
  1482. + reiser4_context *ctx;
  1483. + reiser4_super_info_data *sbinfo;
  1484. +
  1485. + assert("vs-1095", atom);
  1486. +
  1487. + ctx = get_current_context();
  1488. + sbinfo = get_super_private(ctx->super);
  1489. +
  1490. + sub_from_ctx_grabbed(ctx, count);
  1491. +
  1492. + add_to_atom_flush_reserved_nolock(atom, count);
  1493. +
  1494. + spin_lock_reiser4_super(sbinfo);
  1495. +
  1496. + sbinfo->blocks_flush_reserved += count;
  1497. + sub_from_sb_grabbed(sbinfo, count);
  1498. +
  1499. + assert("vpf-292", reiser4_check_block_counters(ctx->super));
  1500. +
  1501. + spin_unlock_reiser4_super(sbinfo);
  1502. +}
  1503. +
  1504. +void grabbed2flush_reserved(__u64 count)
  1505. +{
  1506. + txn_atom *atom = get_current_atom_locked();
  1507. +
  1508. + grabbed2flush_reserved_nolock(atom, count);
  1509. +
  1510. + spin_unlock_atom(atom);
  1511. +}
  1512. +
  1513. +void flush_reserved2grabbed(txn_atom * atom, __u64 count)
  1514. +{
  1515. + reiser4_context *ctx;
  1516. + reiser4_super_info_data *sbinfo;
  1517. +
  1518. + assert("nikita-2788", atom != NULL);
  1519. + assert_spin_locked(&(atom->alock));
  1520. +
  1521. + ctx = get_current_context();
  1522. + sbinfo = get_super_private(ctx->super);
  1523. +
  1524. + add_to_ctx_grabbed(ctx, count);
  1525. +
  1526. + sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
  1527. +
  1528. + spin_lock_reiser4_super(sbinfo);
  1529. +
  1530. + sbinfo->blocks_grabbed += count;
  1531. + sub_from_sb_flush_reserved(sbinfo, count);
  1532. +
  1533. + assert("vpf-292", reiser4_check_block_counters(ctx->super));
  1534. +
  1535. + spin_unlock_reiser4_super(sbinfo);
  1536. +}
  1537. +
  1538. +/**
  1539. + * all_grabbed2free - releases all blocks grabbed in context
  1540. + *
  1541. + * Decreases context's and super block's grabbed block counters by number of
  1542. + * blocks grabbed by current context and increases super block's free block
  1543. + * counter correspondingly.
  1544. + */
  1545. +void all_grabbed2free(void)
  1546. +{
  1547. + reiser4_context *ctx = get_current_context();
  1548. +
  1549. + grabbed2free(ctx, get_super_private(ctx->super), ctx->grabbed_blocks);
  1550. +}
  1551. +
  1552. +/* adjust sb block counters if real (on-disk) blocks do not become unallocated
  1553. + after freeing, @count blocks become "grabbed". */
  1554. +static void
  1555. +used2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
  1556. + __u64 count)
  1557. +{
  1558. + add_to_ctx_grabbed(ctx, count);
  1559. +
  1560. + spin_lock_reiser4_super(sbinfo);
  1561. +
  1562. + sbinfo->blocks_grabbed += count;
  1563. + sub_from_sb_used(sbinfo, count);
  1564. +
  1565. + assert("nikita-2685", reiser4_check_block_counters(ctx->super));
  1566. +
  1567. + spin_unlock_reiser4_super(sbinfo);
  1568. +}
  1569. +
  1570. +/* this used to be done through used2grabbed and grabbed2free*/
  1571. +static void used2free(reiser4_super_info_data * sbinfo, __u64 count)
  1572. +{
  1573. + spin_lock_reiser4_super(sbinfo);
  1574. +
  1575. + sbinfo->blocks_free += count;
  1576. + sub_from_sb_used(sbinfo, count);
  1577. +
  1578. + assert("nikita-2685",
  1579. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1580. +
  1581. + spin_unlock_reiser4_super(sbinfo);
  1582. +}
  1583. +
  1584. +/* check "allocated" state of given block range */
  1585. +int
  1586. +reiser4_check_blocks(const reiser4_block_nr * start,
  1587. + const reiser4_block_nr * len, int desired)
  1588. +{
  1589. + return sa_check_blocks(start, len, desired);
  1590. +}
  1591. +
  1592. +/* Blocks deallocation function may do an actual deallocation through space
  1593. + plugin allocation or store deleted block numbers in atom's delete_set data
  1594. + structure depend on @defer parameter. */
  1595. +
  1596. +/* if BA_DEFER bit is not turned on, @target_stage means the stage of blocks
  1597. + which will be deleted from WORKING bitmap. They might be just unmapped from
  1598. + disk, or freed but disk space is still grabbed by current thread, or these
  1599. + blocks must not be counted in any reiser4 sb block counters,
  1600. + see block_stage_t comment */
  1601. +
  1602. +/* BA_FORMATTED bit is only used when BA_DEFER in not present: it is used to
  1603. + distinguish blocks allocated for unformatted and formatted nodes */
  1604. +
  1605. +int
  1606. +reiser4_dealloc_blocks(const reiser4_block_nr * start,
  1607. + const reiser4_block_nr * len,
  1608. + block_stage_t target_stage, reiser4_ba_flags_t flags)
  1609. +{
  1610. + txn_atom *atom = NULL;
  1611. + int ret;
  1612. + reiser4_context *ctx;
  1613. + reiser4_super_info_data *sbinfo;
  1614. + void *new_entry = NULL;
  1615. +
  1616. + ctx = get_current_context();
  1617. + sbinfo = get_super_private(ctx->super);
  1618. +
  1619. + if (REISER4_DEBUG) {
  1620. + assert("zam-431", *len != 0);
  1621. + assert("zam-432", *start != 0);
  1622. + assert("zam-558", !reiser4_blocknr_is_fake(start));
  1623. +
  1624. + spin_lock_reiser4_super(sbinfo);
  1625. + assert("zam-562", *start < sbinfo->block_count);
  1626. + spin_unlock_reiser4_super(sbinfo);
  1627. + }
  1628. +
  1629. + if (flags & BA_DEFER) {
  1630. + /*
  1631. + * These blocks will be later deallocated by apply_dset().
  1632. + * It is equivalent to a non-deferred deallocation with target
  1633. + * stage BLOCK_NOT_COUNTED.
  1634. + */
  1635. +
  1636. + /* store deleted block numbers in the atom's deferred delete set
  1637. + for further actual deletion */
  1638. + do {
  1639. + atom = get_current_atom_locked();
  1640. + assert("zam-430", atom != NULL);
  1641. +
  1642. + ret = atom_dset_deferred_add_extent(atom, &new_entry, start, len);
  1643. +
  1644. + if (ret == -ENOMEM)
  1645. + return ret;
  1646. +
  1647. + /* This loop might spin at most two times */
  1648. + } while (ret == -E_REPEAT);
  1649. +
  1650. + assert("zam-477", ret == 0);
  1651. + assert("zam-433", atom != NULL);
  1652. +
  1653. + spin_unlock_atom(atom);
  1654. +
  1655. + } else {
  1656. + assert("zam-425", get_current_super_private() != NULL);
  1657. + sa_dealloc_blocks(reiser4_get_space_allocator(ctx->super),
  1658. + *start, *len);
  1659. +
  1660. + if (flags & BA_PERMANENT) {
  1661. + /* These blocks were counted as allocated, we have to
  1662. + * revert it back if allocation is discarded. */
  1663. + txn_atom *atom = get_current_atom_locked();
  1664. + atom->nr_blocks_allocated -= *len;
  1665. + spin_unlock_atom(atom);
  1666. + }
  1667. +
  1668. + switch (target_stage) {
  1669. + case BLOCK_NOT_COUNTED:
  1670. + assert("vs-960", flags & BA_FORMATTED);
  1671. + /* VITALY: This is what was grabbed for
  1672. + internal/tx-lists/similar only */
  1673. + used2free(sbinfo, *len);
  1674. + break;
  1675. +
  1676. + case BLOCK_GRABBED:
  1677. + used2grabbed(ctx, sbinfo, *len);
  1678. + break;
  1679. +
  1680. + case BLOCK_UNALLOCATED:
  1681. + used2fake_allocated(sbinfo, *len, flags & BA_FORMATTED);
  1682. + break;
  1683. +
  1684. + case BLOCK_FLUSH_RESERVED:{
  1685. + txn_atom *atom;
  1686. +
  1687. + atom = get_current_atom_locked();
  1688. + used2flush_reserved(sbinfo, atom, *len,
  1689. + flags & BA_FORMATTED);
  1690. + spin_unlock_atom(atom);
  1691. + break;
  1692. + }
  1693. + default:
  1694. + impossible("zam-532", "wrong block stage");
  1695. + }
  1696. + }
  1697. +
  1698. + return 0;
  1699. +}
  1700. +
  1701. +/* wrappers for block allocator plugin methods */
  1702. +int reiser4_pre_commit_hook(void)
  1703. +{
  1704. + assert("zam-502", get_current_super_private() != NULL);
  1705. + sa_pre_commit_hook();
  1706. + return 0;
  1707. +}
  1708. +
  1709. +/* an actor which applies delete set to block allocator data */
  1710. +static int
  1711. +apply_dset(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
  1712. + const reiser4_block_nr * b, void *data UNUSED_ARG)
  1713. +{
  1714. + reiser4_context *ctx;
  1715. + reiser4_super_info_data *sbinfo;
  1716. +
  1717. + __u64 len = 1;
  1718. +
  1719. + ctx = get_current_context();
  1720. + sbinfo = get_super_private(ctx->super);
  1721. +
  1722. + assert("zam-877", atom->stage >= ASTAGE_PRE_COMMIT);
  1723. + assert("zam-552", sbinfo != NULL);
  1724. +
  1725. + if (b != NULL)
  1726. + len = *b;
  1727. +
  1728. + if (REISER4_DEBUG) {
  1729. + spin_lock_reiser4_super(sbinfo);
  1730. +
  1731. + assert("zam-554", *a < reiser4_block_count(ctx->super));
  1732. + assert("zam-555", *a + len <= reiser4_block_count(ctx->super));
  1733. +
  1734. + spin_unlock_reiser4_super(sbinfo);
  1735. + }
  1736. +
  1737. + sa_dealloc_blocks(&sbinfo->space_allocator, *a, len);
  1738. + /* adjust sb block counters */
  1739. + used2free(sbinfo, len);
  1740. + return 0;
  1741. +}
  1742. +
  1743. +void reiser4_post_commit_hook(void)
  1744. +{
  1745. +#ifdef REISER4_DEBUG
  1746. + txn_atom *atom;
  1747. +
  1748. + atom = get_current_atom_locked();
  1749. + assert("zam-452", atom->stage == ASTAGE_POST_COMMIT);
  1750. + spin_unlock_atom(atom);
  1751. +#endif
  1752. +
  1753. + assert("zam-504", get_current_super_private() != NULL);
  1754. + sa_post_commit_hook();
  1755. +}
  1756. +
  1757. +void reiser4_post_write_back_hook(void)
  1758. +{
  1759. + struct list_head discarded_set;
  1760. + txn_atom *atom;
  1761. + int ret;
  1762. +
  1763. + /* process and issue discard requests */
  1764. + blocknr_list_init (&discarded_set);
  1765. + do {
  1766. + atom = get_current_atom_locked();
  1767. + ret = discard_atom(atom, &discarded_set);
  1768. + } while (ret == -E_REPEAT);
  1769. +
  1770. + if (ret) {
  1771. + warning("intelfx-8", "discard atom failed (%d)", ret);
  1772. + }
  1773. +
  1774. + atom = get_current_atom_locked();
  1775. + discard_atom_post(atom, &discarded_set);
  1776. +
  1777. + /* do the block deallocation which was deferred
  1778. + until commit is done */
  1779. + atom_dset_deferred_apply(atom, apply_dset, NULL, 1);
  1780. +
  1781. + assert("zam-504", get_current_super_private() != NULL);
  1782. + sa_post_write_back_hook();
  1783. +}
  1784. +
  1785. +/*
  1786. + Local variables:
  1787. + c-indentation-style: "K&R"
  1788. + mode-name: "LC"
  1789. + c-basic-offset: 8
  1790. + tab-width: 8
  1791. + fill-column: 120
  1792. + scroll-step: 1
  1793. + End:
  1794. +*/
  1795. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/block_alloc.h linux-5.16.14/fs/reiser4/block_alloc.h
  1796. --- linux-5.16.14.orig/fs/reiser4/block_alloc.h 1970-01-01 01:00:00.000000000 +0100
  1797. +++ linux-5.16.14/fs/reiser4/block_alloc.h 2022-03-12 13:26:19.640892700 +0100
  1798. @@ -0,0 +1,177 @@
  1799. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  1800. +
  1801. +#if !defined(__FS_REISER4_BLOCK_ALLOC_H__)
  1802. +#define __FS_REISER4_BLOCK_ALLOC_H__
  1803. +
  1804. +#include "dformat.h"
  1805. +#include "forward.h"
  1806. +
  1807. +#include <linux/types.h> /* for __u?? */
  1808. +#include <linux/fs.h>
  1809. +
  1810. +/* Mask when is applied to given block number shows is that block number is a
  1811. + fake one */
  1812. +#define REISER4_FAKE_BLOCKNR_BIT_MASK 0x8000000000000000ULL
  1813. +/* Mask which isolates a type of object this fake block number was assigned
  1814. + to */
  1815. +#define REISER4_BLOCKNR_STATUS_BIT_MASK 0xC000000000000000ULL
  1816. +
  1817. +/*result after applying the REISER4_BLOCKNR_STATUS_BIT_MASK should be compared
  1818. + against these two values to understand is the object unallocated or bitmap
  1819. + shadow object (WORKING BITMAP block, look at the plugin/space/bitmap.c) */
  1820. +#define REISER4_UNALLOCATED_STATUS_VALUE 0xC000000000000000ULL
  1821. +#define REISER4_BITMAP_BLOCKS_STATUS_VALUE 0x8000000000000000ULL
  1822. +
  1823. +/* specification how block allocation was counted in sb block counters */
  1824. +typedef enum {
  1825. + BLOCK_NOT_COUNTED = 0, /* reiser4 has no info about this block yet */
  1826. + BLOCK_GRABBED = 1, /* free space grabbed for further allocation
  1827. + of this block */
  1828. + BLOCK_FLUSH_RESERVED = 2, /* block is reserved for flush needs. */
  1829. + BLOCK_UNALLOCATED = 3, /* block is used for existing in-memory object
  1830. + ( unallocated formatted or unformatted
  1831. + node) */
  1832. + BLOCK_ALLOCATED = 4 /* block is mapped to disk, real on-disk block
  1833. + number assigned */
  1834. +} block_stage_t;
  1835. +
  1836. +/* a hint for block allocator */
  1837. +struct reiser4_blocknr_hint {
  1838. + /* FIXME: I think we want to add a longterm lock on the bitmap block
  1839. + here. This is to prevent jnode_flush() calls from interleaving
  1840. + allocations on the same bitmap, once a hint is established. */
  1841. +
  1842. + /* search start hint */
  1843. + reiser4_block_nr blk;
  1844. + /* if not zero, it is a region size we search for free blocks in */
  1845. + reiser4_block_nr max_dist;
  1846. + /* level for allocation, may be useful have branch-level and higher
  1847. + write-optimized. */
  1848. + tree_level level;
  1849. + /* block allocator assumes that blocks, which will be mapped to disk,
  1850. + are in this specified block_stage */
  1851. + block_stage_t block_stage;
  1852. + /* If direction = 1 allocate blocks in backward direction from the end
  1853. + * of disk to the beginning of disk. */
  1854. + unsigned int backward:1;
  1855. +
  1856. +};
  1857. +
  1858. +/* These flags control block allocation/deallocation behavior */
  1859. +enum reiser4_ba_flags {
  1860. + /* do allocatations from reserved (5%) area */
  1861. + BA_RESERVED = (1 << 0),
  1862. +
  1863. + /* block allocator can do commit trying to recover free space */
  1864. + BA_CAN_COMMIT = (1 << 1),
  1865. +
  1866. + /* if operation will be applied to formatted block */
  1867. + BA_FORMATTED = (1 << 2),
  1868. +
  1869. + /* defer actual block freeing until transaction commit */
  1870. + BA_DEFER = (1 << 3),
  1871. +
  1872. + /* allocate blocks for permanent fs objects (formatted or unformatted),
  1873. + not wandered of log blocks */
  1874. + BA_PERMANENT = (1 << 4),
  1875. +
  1876. + /* grab space even it was disabled */
  1877. + BA_FORCE = (1 << 5),
  1878. +
  1879. + /* use default start value for free blocks search. */
  1880. + BA_USE_DEFAULT_SEARCH_START = (1 << 6)
  1881. +};
  1882. +
  1883. +typedef enum reiser4_ba_flags reiser4_ba_flags_t;
  1884. +
  1885. +extern void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint);
  1886. +extern void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint);
  1887. +extern void update_blocknr_hint_default(const struct super_block *,
  1888. + const reiser4_block_nr *);
  1889. +extern void get_blocknr_hint_default(reiser4_block_nr *);
  1890. +
  1891. +extern reiser4_block_nr reiser4_fs_reserved_space(struct super_block *super);
  1892. +
  1893. +int assign_fake_blocknr_formatted(reiser4_block_nr *);
  1894. +reiser4_block_nr fake_blocknr_unformatted(int);
  1895. +
  1896. +/* free -> grabbed -> fake_allocated -> used */
  1897. +
  1898. +int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags);
  1899. +void all_grabbed2free(void);
  1900. +void grabbed2free(reiser4_context * , reiser4_super_info_data * , __u64 count);
  1901. +void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags);
  1902. +void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count);
  1903. +void grabbed2flush_reserved(__u64 count);
  1904. +int reiser4_alloc_blocks(reiser4_blocknr_hint * hint,
  1905. + reiser4_block_nr * start,
  1906. + reiser4_block_nr * len, reiser4_ba_flags_t flags);
  1907. +int reiser4_dealloc_blocks(const reiser4_block_nr *,
  1908. + const reiser4_block_nr *,
  1909. + block_stage_t, reiser4_ba_flags_t flags);
  1910. +
  1911. +static inline int reiser4_alloc_block(reiser4_blocknr_hint * hint,
  1912. + reiser4_block_nr * start,
  1913. + reiser4_ba_flags_t flags)
  1914. +{
  1915. + reiser4_block_nr one = 1;
  1916. + return reiser4_alloc_blocks(hint, start, &one, flags);
  1917. +}
  1918. +
  1919. +static inline int reiser4_dealloc_block(const reiser4_block_nr * block,
  1920. + block_stage_t stage,
  1921. + reiser4_ba_flags_t flags)
  1922. +{
  1923. + const reiser4_block_nr one = 1;
  1924. + return reiser4_dealloc_blocks(block, &one, stage, flags);
  1925. +}
  1926. +
  1927. +#define reiser4_grab_space_force(count, flags) \
  1928. + reiser4_grab_space(count, flags | BA_FORCE)
  1929. +
  1930. +extern void grabbed2free_mark(__u64 mark);
  1931. +extern int reiser4_grab_reserved(struct super_block *,
  1932. + __u64, reiser4_ba_flags_t);
  1933. +extern void reiser4_release_reserved(struct super_block *super);
  1934. +
  1935. +/* grabbed -> fake_allocated */
  1936. +
  1937. +/* fake_allocated -> used */
  1938. +
  1939. +/* used -> fake_allocated -> grabbed -> free */
  1940. +
  1941. +extern void flush_reserved2grabbed(txn_atom * atom, __u64 count);
  1942. +
  1943. +extern int reiser4_blocknr_is_fake(const reiser4_block_nr * da);
  1944. +
  1945. +extern void grabbed2cluster_reserved(int count);
  1946. +extern void cluster_reserved2grabbed(int count);
  1947. +extern void cluster_reserved2free(int count);
  1948. +
  1949. +extern int reiser4_check_block_counters(const struct super_block *);
  1950. +
  1951. +
  1952. +extern int reiser4_check_blocks(const reiser4_block_nr *start,
  1953. + const reiser4_block_nr *len, int desired);
  1954. +
  1955. +static inline int reiser4_check_block(const reiser4_block_nr *start,
  1956. + int desired)
  1957. +{
  1958. + return reiser4_check_blocks(start, NULL, desired);
  1959. +}
  1960. +
  1961. +extern int reiser4_pre_commit_hook(void);
  1962. +extern void reiser4_post_commit_hook(void);
  1963. +extern void reiser4_post_write_back_hook(void);
  1964. +
  1965. +#endif /* __FS_REISER4_BLOCK_ALLOC_H__ */
  1966. +
  1967. +/* Make Linus happy.
  1968. + Local variables:
  1969. + c-indentation-style: "K&R"
  1970. + mode-name: "LC"
  1971. + c-basic-offset: 8
  1972. + tab-width: 8
  1973. + fill-column: 120
  1974. + End:
  1975. +*/
  1976. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/blocknrlist.c linux-5.16.14/fs/reiser4/blocknrlist.c
  1977. --- linux-5.16.14.orig/fs/reiser4/blocknrlist.c 1970-01-01 01:00:00.000000000 +0100
  1978. +++ linux-5.16.14/fs/reiser4/blocknrlist.c 2022-03-12 13:26:19.640892700 +0100
  1979. @@ -0,0 +1,337 @@
  1980. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  1981. + * reiser4/README */
  1982. +
  1983. +/* This is a block list implementation, used to create ordered block sets
  1984. + (at the cost of being less memory efficient than blocknr_set).
  1985. + It is used by discard code. */
  1986. +
  1987. +#include "debug.h"
  1988. +#include "dformat.h"
  1989. +#include "txnmgr.h"
  1990. +#include "context.h"
  1991. +#include "super.h"
  1992. +
  1993. +#include <linux/slab.h>
  1994. +#include <linux/list_sort.h>
  1995. +
  1996. +static struct kmem_cache *blocknr_list_slab = NULL;
  1997. +
  1998. +/**
  1999. + * Represents an extent range [@start; @end).
  2000. + */
  2001. +struct blocknr_list_entry {
  2002. + reiser4_block_nr start, len;
  2003. + struct list_head link;
  2004. +};
  2005. +
  2006. +#define blocknr_list_entry(ptr) list_entry(ptr, blocknr_list_entry, link)
  2007. +
  2008. +static void blocknr_list_entry_init(blocknr_list_entry *entry)
  2009. +{
  2010. + assert("intelfx-11", entry != NULL);
  2011. +
  2012. + entry->start = 0;
  2013. + entry->len = 0;
  2014. + INIT_LIST_HEAD(&entry->link);
  2015. +}
  2016. +
  2017. +static blocknr_list_entry *blocknr_list_entry_alloc(void)
  2018. +{
  2019. + blocknr_list_entry *entry;
  2020. +
  2021. + entry = (blocknr_list_entry *)kmem_cache_alloc(blocknr_list_slab,
  2022. + reiser4_ctx_gfp_mask_get());
  2023. + if (entry == NULL) {
  2024. + return NULL;
  2025. + }
  2026. +
  2027. + blocknr_list_entry_init(entry);
  2028. +
  2029. + return entry;
  2030. +}
  2031. +
  2032. +static void blocknr_list_entry_free(blocknr_list_entry *entry)
  2033. +{
  2034. + assert("intelfx-12", entry != NULL);
  2035. +
  2036. + kmem_cache_free(blocknr_list_slab, entry);
  2037. +}
  2038. +
  2039. +/**
  2040. + * Given ranges @to and [@start; @end), if they overlap, their union
  2041. + * is calculated and saved in @to.
  2042. + */
  2043. +static int blocknr_list_entry_merge(blocknr_list_entry *to,
  2044. + reiser4_block_nr start,
  2045. + reiser4_block_nr len)
  2046. +{
  2047. + reiser4_block_nr end, to_end;
  2048. +
  2049. + assert("intelfx-13", to != NULL);
  2050. +
  2051. + assert("intelfx-16", to->len > 0);
  2052. + assert("intelfx-17", len > 0);
  2053. +
  2054. + end = start + len;
  2055. + to_end = to->start + to->len;
  2056. +
  2057. + if ((to->start <= end) && (start <= to_end)) {
  2058. + if (start < to->start) {
  2059. + to->start = start;
  2060. + }
  2061. +
  2062. + if (end > to_end) {
  2063. + to_end = end;
  2064. + }
  2065. +
  2066. + to->len = to_end - to->start;
  2067. +
  2068. + return 0;
  2069. + }
  2070. +
  2071. + return -1;
  2072. +}
  2073. +
  2074. +static int blocknr_list_entry_merge_entry(blocknr_list_entry *to,
  2075. + blocknr_list_entry *from)
  2076. +{
  2077. + assert("intelfx-18", from != NULL);
  2078. +
  2079. + return blocknr_list_entry_merge(to, from->start, from->len);
  2080. +}
  2081. +
  2082. +/**
  2083. + * A comparison function for list_sort().
  2084. + *
  2085. + * "The comparison function @cmp must return a negative value if @a
  2086. + * should sort before @b, and a positive value if @a should sort after
  2087. + * @b. If @a and @b are equivalent, and their original relative
  2088. + * ordering is to be preserved, @cmp must return 0."
  2089. + */
  2090. +static int blocknr_list_entry_compare(void* priv UNUSED_ARG,
  2091. + const struct list_head *a,
  2092. + const struct list_head *b)
  2093. +{
  2094. + blocknr_list_entry *entry_a, *entry_b;
  2095. + reiser4_block_nr entry_a_end, entry_b_end;
  2096. +
  2097. + assert("intelfx-19", a != NULL);
  2098. + assert("intelfx-20", b != NULL);
  2099. +
  2100. + entry_a = blocknr_list_entry(a);
  2101. + entry_b = blocknr_list_entry(b);
  2102. +
  2103. + entry_a_end = entry_a->start + entry_a->len;
  2104. + entry_b_end = entry_b->start + entry_b->len;
  2105. +
  2106. + /* First sort by starting block numbers... */
  2107. + if (entry_a->start < entry_b->start) {
  2108. + return -1;
  2109. + }
  2110. +
  2111. + if (entry_a->start > entry_b->start) {
  2112. + return 1;
  2113. + }
  2114. +
  2115. + /** Then by ending block numbers.
  2116. + * If @a contains @b, it will be sorted before. */
  2117. + if (entry_a_end > entry_b_end) {
  2118. + return -1;
  2119. + }
  2120. +
  2121. + if (entry_a_end < entry_b_end) {
  2122. + return 1;
  2123. + }
  2124. +
  2125. + return 0;
  2126. +}
  2127. +
  2128. +int blocknr_list_init_static(void)
  2129. +{
  2130. + assert("intelfx-54", blocknr_list_slab == NULL);
  2131. +
  2132. + blocknr_list_slab = kmem_cache_create("blocknr_list_entry",
  2133. + sizeof(blocknr_list_entry),
  2134. + 0,
  2135. + SLAB_HWCACHE_ALIGN |
  2136. + SLAB_RECLAIM_ACCOUNT,
  2137. + NULL);
  2138. + if (blocknr_list_slab == NULL) {
  2139. + return RETERR(-ENOMEM);
  2140. + }
  2141. +
  2142. + return 0;
  2143. +}
  2144. +
  2145. +void blocknr_list_done_static(void)
  2146. +{
  2147. + destroy_reiser4_cache(&blocknr_list_slab);
  2148. +}
  2149. +
  2150. +void blocknr_list_init(struct list_head* blist)
  2151. +{
  2152. + assert("intelfx-24", blist != NULL);
  2153. +
  2154. + INIT_LIST_HEAD(blist);
  2155. +}
  2156. +
  2157. +void blocknr_list_destroy(struct list_head* blist)
  2158. +{
  2159. + struct list_head *pos, *tmp;
  2160. + blocknr_list_entry *entry;
  2161. +
  2162. + assert("intelfx-25", blist != NULL);
  2163. +
  2164. + list_for_each_safe(pos, tmp, blist) {
  2165. + entry = blocknr_list_entry(pos);
  2166. + list_del_init(pos);
  2167. + blocknr_list_entry_free(entry);
  2168. + }
  2169. +
  2170. + assert("intelfx-48", list_empty(blist));
  2171. +}
  2172. +
  2173. +void blocknr_list_merge(struct list_head *from, struct list_head *to)
  2174. +{
  2175. + assert("intelfx-26", from != NULL);
  2176. + assert("intelfx-27", to != NULL);
  2177. +
  2178. + list_splice_tail_init(from, to);
  2179. +
  2180. + assert("intelfx-49", list_empty(from));
  2181. +}
  2182. +
  2183. +void blocknr_list_sort_and_join(struct list_head *blist)
  2184. +{
  2185. + struct list_head *pos, *next;
  2186. + struct blocknr_list_entry *entry, *next_entry;
  2187. +
  2188. + assert("intelfx-50", blist != NULL);
  2189. +
  2190. + /* Step 1. Sort the extent list. */
  2191. + list_sort(NULL, blist, blocknr_list_entry_compare);
  2192. +
  2193. + /* Step 2. Join adjacent extents in the list. */
  2194. + pos = blist->next;
  2195. + next = pos->next;
  2196. + entry = blocknr_list_entry(pos);
  2197. +
  2198. + for (; next != blist; next = pos->next) {
  2199. + /** @next is a valid node at this point */
  2200. + next_entry = blocknr_list_entry(next);
  2201. +
  2202. + /** try to merge @next into @pos */
  2203. + if (!blocknr_list_entry_merge_entry(entry, next_entry)) {
  2204. + /** successful; delete the @next node.
  2205. + * next merge will be attempted into the same node. */
  2206. + list_del_init(next);
  2207. + blocknr_list_entry_free(next_entry);
  2208. + } else {
  2209. + /** otherwise advance @pos. */
  2210. + pos = next;
  2211. + entry = next_entry;
  2212. + }
  2213. + }
  2214. +}
  2215. +
  2216. +int blocknr_list_add_extent(txn_atom *atom,
  2217. + struct list_head *blist,
  2218. + blocknr_list_entry **new_entry,
  2219. + const reiser4_block_nr *start,
  2220. + const reiser4_block_nr *len)
  2221. +{
  2222. + assert("intelfx-29", atom != NULL);
  2223. + assert("intelfx-42", atom_is_protected(atom));
  2224. + assert("intelfx-43", blist != NULL);
  2225. + assert("intelfx-30", new_entry != NULL);
  2226. + assert("intelfx-31", start != NULL);
  2227. + assert("intelfx-32", len != NULL && *len > 0);
  2228. +
  2229. + if (*new_entry == NULL) {
  2230. + /*
  2231. + * Optimization: try to merge new extent into the last one.
  2232. + */
  2233. + if (!list_empty(blist)) {
  2234. + blocknr_list_entry *last_entry;
  2235. + last_entry = blocknr_list_entry(blist->prev);
  2236. + if (!blocknr_list_entry_merge(last_entry, *start, *len)) {
  2237. + return 0;
  2238. + }
  2239. + }
  2240. +
  2241. + /*
  2242. + * Otherwise, allocate a new entry and tell -E_REPEAT.
  2243. + * Next time we'll take the branch below.
  2244. + */
  2245. + spin_unlock_atom(atom);
  2246. + *new_entry = blocknr_list_entry_alloc();
  2247. + return (*new_entry != NULL) ? -E_REPEAT : RETERR(-ENOMEM);
  2248. + }
  2249. +
  2250. + /*
  2251. + * The entry has been allocated beforehand, fill it and link to the list.
  2252. + */
  2253. + (*new_entry)->start = *start;
  2254. + (*new_entry)->len = *len;
  2255. + list_add_tail(&(*new_entry)->link, blist);
  2256. +
  2257. + return 0;
  2258. +}
  2259. +
  2260. +int blocknr_list_iterator(txn_atom *atom,
  2261. + struct list_head *blist,
  2262. + blocknr_set_actor_f actor,
  2263. + void *data,
  2264. + int delete)
  2265. +{
  2266. + struct list_head *pos;
  2267. + blocknr_list_entry *entry;
  2268. + int ret = 0;
  2269. +
  2270. + assert("intelfx-46", blist != NULL);
  2271. + assert("intelfx-47", actor != NULL);
  2272. +
  2273. + if (delete) {
  2274. + struct list_head *tmp;
  2275. +
  2276. + list_for_each_safe(pos, tmp, blist) {
  2277. + entry = blocknr_list_entry(pos);
  2278. +
  2279. + /*
  2280. + * Do not exit, delete flag is set. Instead, on the first error we
  2281. + * downgrade from iterating to just deleting.
  2282. + */
  2283. + if (ret == 0) {
  2284. + ret = actor(atom, &entry->start, &entry->len, data);
  2285. + }
  2286. +
  2287. + list_del_init(pos);
  2288. + blocknr_list_entry_free(entry);
  2289. + }
  2290. +
  2291. + assert("intelfx-44", list_empty(blist));
  2292. + } else {
  2293. + list_for_each(pos, blist) {
  2294. + entry = blocknr_list_entry(pos);
  2295. +
  2296. + ret = actor(atom, &entry->start, &entry->len, data);
  2297. +
  2298. + if (ret != 0) {
  2299. + return ret;
  2300. + }
  2301. + }
  2302. + }
  2303. +
  2304. + return ret;
  2305. +}
  2306. +
  2307. +/* Make Linus happy.
  2308. + Local variables:
  2309. + c-indentation-style: "K&R"
  2310. + mode-name: "LC"
  2311. + c-basic-offset: 8
  2312. + tab-width: 8
  2313. + fill-column: 120
  2314. + scroll-step: 1
  2315. + End:
  2316. +*/
  2317. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/blocknrset.c linux-5.16.14/fs/reiser4/blocknrset.c
  2318. --- linux-5.16.14.orig/fs/reiser4/blocknrset.c 1970-01-01 01:00:00.000000000 +0100
  2319. +++ linux-5.16.14/fs/reiser4/blocknrset.c 2022-03-12 13:26:19.640892700 +0100
  2320. @@ -0,0 +1,400 @@
  2321. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  2322. +reiser4/README */
  2323. +
  2324. +/* This file contains code for various block number sets used by the atom to
  2325. + track the deleted set and wandered block mappings. */
  2326. +
  2327. +#include "debug.h"
  2328. +#include "dformat.h"
  2329. +#include "txnmgr.h"
  2330. +#include "context.h"
  2331. +#include "super.h"
  2332. +
  2333. +#include <linux/slab.h>
  2334. +
  2335. +/* The proposed data structure for storing unordered block number sets is a
  2336. + list of elements, each of which contains an array of block number or/and
  2337. + array of block number pairs. That element called blocknr_set_entry is used
  2338. + to store block numbers from the beginning and for extents from the end of
  2339. + the data field (char data[...]). The ->nr_blocks and ->nr_pairs fields
  2340. + count numbers of blocks and extents.
  2341. +
  2342. + +------------------- blocknr_set_entry->data ------------------+
  2343. + |block1|block2| ... <free space> ... |pair3|pair2|pair1|
  2344. + +------------------------------------------------------------+
  2345. +
  2346. + When current blocknr_set_entry is full, allocate a new one. */
  2347. +
  2348. +/* Usage examples: blocknr sets are used in reiser4 for storing atom's delete
  2349. + * set (single blocks and block extents), in that case blocknr pair represent an
  2350. + * extent; atom's wandered map is also stored as a blocknr set, blocknr pairs
  2351. + * there represent a (real block) -> (wandered block) mapping. */
  2352. +
  2353. +/* Protection: blocknr sets belong to reiser4 atom, and
  2354. + * their modifications are performed with the atom lock held */
  2355. +
  2356. +/* The total size of a blocknr_set_entry. */
  2357. +#define BLOCKNR_SET_ENTRY_SIZE 128
  2358. +
  2359. +/* The number of blocks that can fit the blocknr data area. */
  2360. +#define BLOCKNR_SET_ENTRIES_NUMBER \
  2361. + ((BLOCKNR_SET_ENTRY_SIZE - \
  2362. + 2 * sizeof(unsigned) - \
  2363. + sizeof(struct list_head)) / \
  2364. + sizeof(reiser4_block_nr))
  2365. +
  2366. +static struct kmem_cache *blocknr_set_slab = NULL;
  2367. +
  2368. +/* An entry of the blocknr_set */
  2369. +struct blocknr_set_entry {
  2370. + unsigned nr_singles;
  2371. + unsigned nr_pairs;
  2372. + struct list_head link;
  2373. + reiser4_block_nr entries[BLOCKNR_SET_ENTRIES_NUMBER];
  2374. +};
  2375. +
  2376. +static_assert(sizeof(blocknr_set_entry) == BLOCKNR_SET_ENTRY_SIZE);
  2377. +
  2378. +/* A pair of blocks as recorded in the blocknr_set_entry data. */
  2379. +struct blocknr_pair {
  2380. + reiser4_block_nr a;
  2381. + reiser4_block_nr b;
  2382. +};
  2383. +
  2384. +/* Return the number of blocknr slots available in a blocknr_set_entry. */
  2385. +/* Audited by: green(2002.06.11) */
  2386. +static unsigned bse_avail(blocknr_set_entry * bse)
  2387. +{
  2388. + unsigned used = bse->nr_singles + 2 * bse->nr_pairs;
  2389. +
  2390. + assert("jmacd-5088", BLOCKNR_SET_ENTRIES_NUMBER >= used);
  2391. +
  2392. + return BLOCKNR_SET_ENTRIES_NUMBER - used;
  2393. +}
  2394. +
  2395. +/* Initialize a blocknr_set_entry. */
  2396. +static void bse_init(blocknr_set_entry *bse)
  2397. +{
  2398. + bse->nr_singles = 0;
  2399. + bse->nr_pairs = 0;
  2400. + INIT_LIST_HEAD(&bse->link);
  2401. +}
  2402. +
  2403. +/* Allocate and initialize a blocknr_set_entry. */
  2404. +/* Audited by: green(2002.06.11) */
  2405. +static blocknr_set_entry *bse_alloc(void)
  2406. +{
  2407. + blocknr_set_entry *e;
  2408. +
  2409. + if ((e = (blocknr_set_entry *) kmem_cache_alloc(blocknr_set_slab,
  2410. + reiser4_ctx_gfp_mask_get())) == NULL)
  2411. + return NULL;
  2412. +
  2413. + bse_init(e);
  2414. +
  2415. + return e;
  2416. +}
  2417. +
  2418. +/* Free a blocknr_set_entry. */
  2419. +/* Audited by: green(2002.06.11) */
  2420. +static void bse_free(blocknr_set_entry * bse)
  2421. +{
  2422. + kmem_cache_free(blocknr_set_slab, bse);
  2423. +}
  2424. +
  2425. +/* Add a block number to a blocknr_set_entry */
  2426. +/* Audited by: green(2002.06.11) */
  2427. +static void
  2428. +bse_put_single(blocknr_set_entry * bse, const reiser4_block_nr * block)
  2429. +{
  2430. + assert("jmacd-5099", bse_avail(bse) >= 1);
  2431. +
  2432. + bse->entries[bse->nr_singles++] = *block;
  2433. +}
  2434. +
  2435. +/* Get a pair of block numbers */
  2436. +/* Audited by: green(2002.06.11) */
  2437. +static inline struct blocknr_pair *bse_get_pair(blocknr_set_entry * bse,
  2438. + unsigned pno)
  2439. +{
  2440. + assert("green-1", BLOCKNR_SET_ENTRIES_NUMBER >= 2 * (pno + 1));
  2441. +
  2442. + return (struct blocknr_pair *) (bse->entries +
  2443. + BLOCKNR_SET_ENTRIES_NUMBER -
  2444. + 2 * (pno + 1));
  2445. +}
  2446. +
  2447. +/* Add a pair of block numbers to a blocknr_set_entry */
  2448. +/* Audited by: green(2002.06.11) */
  2449. +static void
  2450. +bse_put_pair(blocknr_set_entry * bse, const reiser4_block_nr * a,
  2451. + const reiser4_block_nr * b)
  2452. +{
  2453. + struct blocknr_pair *pair;
  2454. +
  2455. + assert("jmacd-5100", bse_avail(bse) >= 2 && a != NULL && b != NULL);
  2456. +
  2457. + pair = bse_get_pair(bse, bse->nr_pairs++);
  2458. +
  2459. + pair->a = *a;
  2460. + pair->b = *b;
  2461. +}
  2462. +
  2463. +/* Add either a block or pair of blocks to the block number set. The first
  2464. + blocknr (@a) must be non-NULL. If @b is NULL a single blocknr is added, if
  2465. + @b is non-NULL a pair is added. The block number set belongs to atom, and
  2466. + the call is made with the atom lock held. There may not be enough space in
  2467. + the current blocknr_set_entry. If new_bsep points to a non-NULL
  2468. + blocknr_set_entry then it will be added to the blocknr_set and new_bsep
  2469. + will be set to NULL. If new_bsep contains NULL then the atom lock will be
  2470. + released and a new bse will be allocated in new_bsep. E_REPEAT will be
  2471. + returned with the atom unlocked for the operation to be tried again. If
  2472. + the operation succeeds, 0 is returned. If new_bsep is non-NULL and not
  2473. + used during the call, it will be freed automatically. */
  2474. +static int blocknr_set_add(txn_atom *atom, struct list_head *bset,
  2475. + blocknr_set_entry **new_bsep, const reiser4_block_nr *a,
  2476. + const reiser4_block_nr *b)
  2477. +{
  2478. + blocknr_set_entry *bse;
  2479. + unsigned entries_needed;
  2480. +
  2481. + assert("jmacd-5101", a != NULL);
  2482. +
  2483. + entries_needed = (b == NULL) ? 1 : 2;
  2484. + if (list_empty(bset) ||
  2485. + bse_avail(list_entry(bset->next, blocknr_set_entry, link)) < entries_needed) {
  2486. + /* See if a bse was previously allocated. */
  2487. + if (*new_bsep == NULL) {
  2488. + spin_unlock_atom(atom);
  2489. + *new_bsep = bse_alloc();
  2490. + return (*new_bsep != NULL) ? -E_REPEAT :
  2491. + RETERR(-ENOMEM);
  2492. + }
  2493. +
  2494. + /* Put it on the head of the list. */
  2495. + list_add(&((*new_bsep)->link), bset);
  2496. +
  2497. + *new_bsep = NULL;
  2498. + }
  2499. +
  2500. + /* Add the single or pair. */
  2501. + bse = list_entry(bset->next, blocknr_set_entry, link);
  2502. + if (b == NULL) {
  2503. + bse_put_single(bse, a);
  2504. + } else {
  2505. + bse_put_pair(bse, a, b);
  2506. + }
  2507. +
  2508. + /* If new_bsep is non-NULL then there was an allocation race, free this
  2509. + copy. */
  2510. + if (*new_bsep != NULL) {
  2511. + bse_free(*new_bsep);
  2512. + *new_bsep = NULL;
  2513. + }
  2514. +
  2515. + return 0;
  2516. +}
  2517. +
  2518. +/* Add an extent to the block set. If the length is 1, it is treated as a
  2519. + single block (e.g., reiser4_set_add_block). */
  2520. +/* Audited by: green(2002.06.11) */
  2521. +/* Auditor note: Entire call chain cannot hold any spinlocks, because
  2522. + kmalloc might schedule. The only exception is atom spinlock, which is
  2523. + properly freed. */
  2524. +int
  2525. +blocknr_set_add_extent(txn_atom * atom,
  2526. + struct list_head *bset,
  2527. + blocknr_set_entry ** new_bsep,
  2528. + const reiser4_block_nr * start,
  2529. + const reiser4_block_nr * len)
  2530. +{
  2531. + assert("jmacd-5102", start != NULL && len != NULL && *len > 0);
  2532. + return blocknr_set_add(atom, bset, new_bsep, start,
  2533. + *len == 1 ? NULL : len);
  2534. +}
  2535. +
  2536. +/* Add a block pair to the block set. It adds exactly a pair, which is checked
  2537. + * by an assertion that both arguments are not null.*/
  2538. +/* Audited by: green(2002.06.11) */
  2539. +/* Auditor note: Entire call chain cannot hold any spinlocks, because
  2540. + kmalloc might schedule. The only exception is atom spinlock, which is
  2541. + properly freed. */
  2542. +int
  2543. +blocknr_set_add_pair(txn_atom * atom,
  2544. + struct list_head *bset,
  2545. + blocknr_set_entry ** new_bsep, const reiser4_block_nr * a,
  2546. + const reiser4_block_nr * b)
  2547. +{
  2548. + assert("jmacd-5103", a != NULL && b != NULL);
  2549. + return blocknr_set_add(atom, bset, new_bsep, a, b);
  2550. +}
  2551. +
  2552. +/* Initialize slab cache of blocknr_set_entry objects. */
  2553. +int blocknr_set_init_static(void)
  2554. +{
  2555. + assert("intelfx-55", blocknr_set_slab == NULL);
  2556. +
  2557. + blocknr_set_slab = kmem_cache_create("blocknr_set_entry",
  2558. + sizeof(blocknr_set_entry),
  2559. + 0,
  2560. + SLAB_HWCACHE_ALIGN |
  2561. + SLAB_RECLAIM_ACCOUNT,
  2562. + NULL);
  2563. +
  2564. + if (blocknr_set_slab == NULL) {
  2565. + return RETERR(-ENOMEM);
  2566. + }
  2567. +
  2568. + return 0;
  2569. +}
  2570. +
  2571. +/* Destroy slab cache of blocknr_set_entry objects. */
  2572. +void blocknr_set_done_static(void)
  2573. +{
  2574. + destroy_reiser4_cache(&blocknr_set_slab);
  2575. +}
  2576. +
  2577. +/* Initialize a blocknr_set. */
  2578. +void blocknr_set_init(struct list_head *bset)
  2579. +{
  2580. + INIT_LIST_HEAD(bset);
  2581. +}
  2582. +
  2583. +/* Release the entries of a blocknr_set. */
  2584. +void blocknr_set_destroy(struct list_head *bset)
  2585. +{
  2586. + blocknr_set_entry *bse;
  2587. +
  2588. + while (!list_empty(bset)) {
  2589. + bse = list_entry(bset->next, blocknr_set_entry, link);
  2590. + list_del_init(&bse->link);
  2591. + bse_free(bse);
  2592. + }
  2593. +}
  2594. +
  2595. +/* Merge blocknr_set entries out of @from into @into. */
  2596. +/* Audited by: green(2002.06.11) */
  2597. +/* Auditor comments: This merge does not know if merged sets contain
  2598. + blocks pairs (As for wandered sets) or extents, so it cannot really merge
  2599. + overlapping ranges if there is some. So I believe it may lead to
  2600. + some blocks being presented several times in one blocknr_set. To help
  2601. + debugging such problems it might help to check for duplicate entries on
  2602. + actual processing of this set. Testing this kind of stuff right here is
  2603. + also complicated by the fact that these sets are not sorted and going
  2604. + through whole set on each element addition is going to be CPU-heavy task */
  2605. +void blocknr_set_merge(struct list_head *from, struct list_head *into)
  2606. +{
  2607. + blocknr_set_entry *bse_into = NULL;
  2608. +
  2609. + /* If @from is empty, no work to perform. */
  2610. + if (list_empty(from))
  2611. + return;
  2612. + /* If @into is not empty, try merging partial-entries. */
  2613. + if (!list_empty(into)) {
  2614. +
  2615. + /* Neither set is empty, pop the front to members and try to
  2616. + combine them. */
  2617. + blocknr_set_entry *bse_from;
  2618. + unsigned into_avail;
  2619. +
  2620. + bse_into = list_entry(into->next, blocknr_set_entry, link);
  2621. + list_del_init(&bse_into->link);
  2622. + bse_from = list_entry(from->next, blocknr_set_entry, link);
  2623. + list_del_init(&bse_from->link);
  2624. +
  2625. + /* Combine singles. */
  2626. + for (into_avail = bse_avail(bse_into);
  2627. + into_avail != 0 && bse_from->nr_singles != 0;
  2628. + into_avail -= 1) {
  2629. + bse_put_single(bse_into,
  2630. + &bse_from->entries[--bse_from->
  2631. + nr_singles]);
  2632. + }
  2633. +
  2634. + /* Combine pairs. */
  2635. + for (; into_avail > 1 && bse_from->nr_pairs != 0;
  2636. + into_avail -= 2) {
  2637. + struct blocknr_pair *pair =
  2638. + bse_get_pair(bse_from, --bse_from->nr_pairs);
  2639. + bse_put_pair(bse_into, &pair->a, &pair->b);
  2640. + }
  2641. +
  2642. + /* If bse_from is empty, delete it now. */
  2643. + if (bse_avail(bse_from) == BLOCKNR_SET_ENTRIES_NUMBER) {
  2644. + bse_free(bse_from);
  2645. + } else {
  2646. + /* Otherwise, bse_into is full or nearly full (e.g.,
  2647. + it could have one slot avail and bse_from has one
  2648. + pair left). Push it back onto the list. bse_from
  2649. + becomes bse_into, which will be the new partial. */
  2650. + list_add(&bse_into->link, into);
  2651. + bse_into = bse_from;
  2652. + }
  2653. + }
  2654. +
  2655. + /* Splice lists together. */
  2656. + list_splice_init(from, into->prev);
  2657. +
  2658. + /* Add the partial entry back to the head of the list. */
  2659. + if (bse_into != NULL)
  2660. + list_add(&bse_into->link, into);
  2661. +}
  2662. +
  2663. +/* Iterate over all blocknr set elements. */
  2664. +int blocknr_set_iterator(txn_atom *atom, struct list_head *bset,
  2665. + blocknr_set_actor_f actor, void *data, int delete)
  2666. +{
  2667. +
  2668. + blocknr_set_entry *entry;
  2669. +
  2670. + assert("zam-429", atom != NULL);
  2671. + assert("zam-430", atom_is_protected(atom));
  2672. + assert("zam-431", bset != 0);
  2673. + assert("zam-432", actor != NULL);
  2674. +
  2675. + entry = list_entry(bset->next, blocknr_set_entry, link);
  2676. + while (bset != &entry->link) {
  2677. + blocknr_set_entry *tmp = list_entry(entry->link.next, blocknr_set_entry, link);
  2678. + unsigned int i;
  2679. + int ret;
  2680. +
  2681. + for (i = 0; i < entry->nr_singles; i++) {
  2682. + ret = actor(atom, &entry->entries[i], NULL, data);
  2683. +
  2684. + /* We can't break a loop if delete flag is set. */
  2685. + if (ret != 0 && !delete)
  2686. + return ret;
  2687. + }
  2688. +
  2689. + for (i = 0; i < entry->nr_pairs; i++) {
  2690. + struct blocknr_pair *ab;
  2691. +
  2692. + ab = bse_get_pair(entry, i);
  2693. +
  2694. + ret = actor(atom, &ab->a, &ab->b, data);
  2695. +
  2696. + if (ret != 0 && !delete)
  2697. + return ret;
  2698. + }
  2699. +
  2700. + if (delete) {
  2701. + list_del(&entry->link);
  2702. + bse_free(entry);
  2703. + }
  2704. +
  2705. + entry = tmp;
  2706. + }
  2707. +
  2708. + return 0;
  2709. +}
  2710. +
  2711. +/*
  2712. + * Local variables:
  2713. + * c-indentation-style: "K&R"
  2714. + * mode-name: "LC"
  2715. + * c-basic-offset: 8
  2716. + * tab-width: 8
  2717. + * fill-column: 79
  2718. + * scroll-step: 1
  2719. + * End:
  2720. + */
  2721. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/carry.c linux-5.16.14/fs/reiser4/carry.c
  2722. --- linux-5.16.14.orig/fs/reiser4/carry.c 1970-01-01 01:00:00.000000000 +0100
  2723. +++ linux-5.16.14/fs/reiser4/carry.c 2022-03-12 13:26:19.641892702 +0100
  2724. @@ -0,0 +1,1408 @@
  2725. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  2726. + reiser4/README */
  2727. +/* Functions to "carry" tree modification(s) upward. */
  2728. +/* Tree is modified one level at a time. As we modify a level we accumulate a
  2729. + set of changes that need to be propagated to the next level. We manage
  2730. + node locking such that any searches that collide with carrying are
  2731. + restarted, from the root if necessary.
  2732. +
  2733. + Insertion of a new item may result in items being moved among nodes and
  2734. + this requires the delimiting key to be updated at the least common parent
  2735. + of the nodes modified to preserve search tree invariants. Also, insertion
  2736. + may require allocation of a new node. A pointer to the new node has to be
  2737. + inserted into some node on the parent level, etc.
  2738. +
  2739. + Tree carrying is meant to be analogous to arithmetic carrying.
  2740. +
  2741. + A carry operation is always associated with some node (&carry_node).
  2742. +
  2743. + Carry process starts with some initial set of operations to be performed
  2744. + and an initial set of already locked nodes. Operations are performed one
  2745. + by one. Performing each single operation has following possible effects:
  2746. +
  2747. + - content of carry node associated with operation is modified
  2748. + - new carry nodes are locked and involved into carry process on this level
  2749. + - new carry operations are posted to the next level
  2750. +
  2751. + After all carry operations on this level are done, process is repeated for
  2752. + the accumulated sequence on carry operations for the next level. This
  2753. + starts by trying to lock (in left to right order) all carry nodes
  2754. + associated with carry operations on the parent level. After this, we decide
  2755. + whether more nodes are required on the left of already locked set. If so,
  2756. + all locks taken on the parent level are released, new carry nodes are
  2757. + added, and locking process repeats.
  2758. +
  2759. + It may happen that balancing process fails owing to unrecoverable error on
  2760. + some of upper levels of a tree (possible causes are io error, failure to
  2761. + allocate new node, etc.). In this case we should unmount the filesystem,
  2762. + rebooting if it is the root, and possibly advise the use of fsck.
  2763. +
  2764. + USAGE:
  2765. +
  2766. + int some_tree_operation( znode *node, ... )
  2767. + {
  2768. + // Allocate on a stack pool of carry objects: operations and nodes.
  2769. + // Most carry processes will only take objects from here, without
  2770. + // dynamic allocation.
  2771. +
  2772. +I feel uneasy about this pool. It adds to code complexity, I understand why it
  2773. +exists, but.... -Hans
  2774. +
  2775. + carry_pool pool;
  2776. + carry_level lowest_level;
  2777. + carry_op *op;
  2778. +
  2779. + init_carry_pool( &pool );
  2780. + init_carry_level( &lowest_level, &pool );
  2781. +
  2782. + // operation may be one of:
  2783. + // COP_INSERT --- insert new item into node
  2784. + // COP_CUT --- remove part of or whole node
  2785. + // COP_PASTE --- increase size of item
  2786. + // COP_DELETE --- delete pointer from parent node
  2787. + // COP_UPDATE --- update delimiting key in least
  2788. + // common ancestor of two
  2789. +
  2790. + op = reiser4_post_carry( &lowest_level, operation, node, 0 );
  2791. + if( IS_ERR( op ) || ( op == NULL ) ) {
  2792. + handle error
  2793. + } else {
  2794. + // fill in remaining fields in @op, according to carry.h:carry_op
  2795. + result = carry(&lowest_level, NULL);
  2796. + }
  2797. + done_carry_pool(&pool);
  2798. + }
  2799. +
  2800. + When you are implementing node plugin method that participates in carry
  2801. + (shifting, insertion, deletion, etc.), do the following:
  2802. +
  2803. + int foo_node_method(znode * node, ..., carry_level * todo)
  2804. + {
  2805. + carry_op *op;
  2806. +
  2807. + ....
  2808. +
  2809. + // note, that last argument to reiser4_post_carry() is non-null
  2810. + // here, because @op is to be applied to the parent of @node, rather
  2811. + // than to the @node itself as in the previous case.
  2812. +
  2813. + op = node_post_carry(todo, operation, node, 1);
  2814. + // fill in remaining fields in @op, according to carry.h:carry_op
  2815. +
  2816. + ....
  2817. +
  2818. + }
  2819. +
  2820. + BATCHING:
  2821. +
  2822. + One of the main advantages of level-by-level balancing implemented here is
  2823. + ability to batch updates on a parent level and to peform them more
  2824. + efficiently as a result.
  2825. +
  2826. + Description To Be Done (TBD).
  2827. +
  2828. + DIFFICULTIES AND SUBTLE POINTS:
  2829. +
  2830. + 1. complex plumbing is required, because:
  2831. +
  2832. + a. effective allocation through pools is needed
  2833. +
  2834. + b. target of operation is not exactly known when operation is
  2835. + posted. This is worked around through bitfields in &carry_node and
  2836. + logic in lock_carry_node()
  2837. +
  2838. + c. of interaction with locking code: node should be added into sibling
  2839. + list when pointer to it is inserted into its parent, which is some time
  2840. + after node was created. Between these moments, node is somewhat in
  2841. + suspended state and is only registered in the carry lists
  2842. +
  2843. + 2. whole balancing logic is implemented here, in particular, insertion
  2844. + logic is coded in make_space().
  2845. +
  2846. + 3. special cases like insertion (reiser4_add_tree_root()) or deletion
  2847. + (reiser4_kill_tree_root()) of tree root and morphing of paste into insert
  2848. + (insert_paste()) have to be handled.
  2849. +
  2850. + 4. there is non-trivial interdependency between allocation of new nodes
  2851. + and almost everything else. This is mainly due to the (1.c) above. I shall
  2852. + write about this later.
  2853. +
  2854. +*/
  2855. +
  2856. +#include "forward.h"
  2857. +#include "debug.h"
  2858. +#include "key.h"
  2859. +#include "coord.h"
  2860. +#include "plugin/item/item.h"
  2861. +#include "plugin/item/extent.h"
  2862. +#include "plugin/node/node.h"
  2863. +#include "jnode.h"
  2864. +#include "znode.h"
  2865. +#include "tree_mod.h"
  2866. +#include "tree_walk.h"
  2867. +#include "block_alloc.h"
  2868. +#include "pool.h"
  2869. +#include "tree.h"
  2870. +#include "carry.h"
  2871. +#include "carry_ops.h"
  2872. +#include "super.h"
  2873. +#include "reiser4.h"
  2874. +
  2875. +#include <linux/types.h>
  2876. +
  2877. +/* level locking/unlocking */
  2878. +static int lock_carry_level(carry_level * level);
  2879. +static void unlock_carry_level(carry_level * level, int failure);
  2880. +static void done_carry_level(carry_level * level);
  2881. +static void unlock_carry_node(carry_level * level, carry_node * node, int fail);
  2882. +
  2883. +int lock_carry_node(carry_level * level, carry_node * node);
  2884. +int lock_carry_node_tail(carry_node * node);
  2885. +
  2886. +/* carry processing proper */
  2887. +static int carry_on_level(carry_level * doing, carry_level * todo);
  2888. +
  2889. +static carry_op *add_op(carry_level * level, pool_ordering order,
  2890. + carry_op * reference);
  2891. +
  2892. +/* handlers for carry operations. */
  2893. +
  2894. +static void fatal_carry_error(carry_level * doing, int ecode);
  2895. +static int add_new_root(carry_level * level, carry_node * node, znode * fake);
  2896. +
  2897. +static void print_level(const char *prefix, carry_level * level);
  2898. +
  2899. +#if REISER4_DEBUG
  2900. +typedef enum {
  2901. + CARRY_TODO,
  2902. + CARRY_DOING
  2903. +} carry_queue_state;
  2904. +static int carry_level_invariant(carry_level * level, carry_queue_state state);
  2905. +#endif
  2906. +
  2907. +/* main entry point for tree balancing.
  2908. +
  2909. + Tree carry performs operations from @doing and while doing so accumulates
  2910. + information about operations to be performed on the next level ("carried"
  2911. + to the parent level). Carried operations are performed, causing possibly
  2912. + more operations to be carried upward etc. carry() takes care about
  2913. + locking and pinning znodes while operating on them.
  2914. +
  2915. + For usage, see comment at the top of fs/reiser4/carry.c
  2916. +
  2917. +*/
  2918. +int reiser4_carry(carry_level * doing /* set of carry operations to be
  2919. + * performed */ ,
  2920. + carry_level * done /* set of nodes, already performed
  2921. + * at the previous level.
  2922. + * NULL in most cases */)
  2923. +{
  2924. + int result = 0;
  2925. + gfp_t old_mask;
  2926. + /* queue of new requests */
  2927. + carry_level *todo;
  2928. + ON_DEBUG(STORE_COUNTERS);
  2929. +
  2930. + assert("nikita-888", doing != NULL);
  2931. + BUG_ON(done != NULL);
  2932. +
  2933. + todo = doing + 1;
  2934. + init_carry_level(todo, doing->pool);
  2935. +
  2936. + /* queue of requests preformed on the previous level */
  2937. + done = todo + 1;
  2938. + init_carry_level(done, doing->pool);
  2939. + /*
  2940. + * NOTE: We are not allowed to fail in the loop below.
  2941. + * Incomplete carry (even if carry_on_level is complete)
  2942. + * can leave the tree in an inconsistent state (broken
  2943. + * order of keys in a node, etc).
  2944. + */
  2945. + old_mask = get_current_context()->gfp_mask;
  2946. + get_current_context()->gfp_mask |= __GFP_NOFAIL;
  2947. +
  2948. + /* iterate until there is nothing more to do */
  2949. + while (result == 0 && doing->ops_num > 0) {
  2950. + carry_level *tmp;
  2951. +
  2952. + /* at this point @done is locked. */
  2953. + /* repeat lock/do/unlock while
  2954. +
  2955. + (1) lock_carry_level() fails due to deadlock avoidance, or
  2956. +
  2957. + (2) carry_on_level() decides that more nodes have to
  2958. + be involved.
  2959. +
  2960. + (3) some unexpected error occurred while balancing on the
  2961. + upper levels. In this case all changes are rolled back.
  2962. +
  2963. + */
  2964. + while (1) {
  2965. + result = lock_carry_level(doing);
  2966. + if (result == 0) {
  2967. + /* perform operations from @doing and
  2968. + accumulate new requests in @todo */
  2969. + result = carry_on_level(doing, todo);
  2970. + if (result == 0)
  2971. + break;
  2972. + else if (result != -E_REPEAT ||
  2973. + !doing->restartable) {
  2974. + warning("nikita-1043",
  2975. + "Fatal error during carry: %i",
  2976. + result);
  2977. + print_level("done", done);
  2978. + print_level("doing", doing);
  2979. + print_level("todo", todo);
  2980. + /* do some rough stuff like aborting
  2981. + all pending transcrashes and thus
  2982. + pushing tree back to the consistent
  2983. + state. Alternatvely, just panic.
  2984. + */
  2985. + fatal_carry_error(doing, result);
  2986. + return result;
  2987. + }
  2988. + } else if (result != -E_REPEAT) {
  2989. + fatal_carry_error(doing, result);
  2990. + return result;
  2991. + }
  2992. + unlock_carry_level(doing, 1);
  2993. + }
  2994. + /* at this point @done can be safely unlocked */
  2995. + done_carry_level(done);
  2996. +
  2997. + /* cyclically shift queues */
  2998. + tmp = done;
  2999. + done = doing;
  3000. + doing = todo;
  3001. + todo = tmp;
  3002. + init_carry_level(todo, doing->pool);
  3003. +
  3004. + /* give other threads chance to run */
  3005. + reiser4_preempt_point();
  3006. + }
  3007. + get_current_context()->gfp_mask = old_mask;
  3008. + done_carry_level(done);
  3009. +
  3010. + /* all counters, but x_refs should remain the same. x_refs can change
  3011. + owing to transaction manager */
  3012. + ON_DEBUG(CHECK_COUNTERS);
  3013. + return result;
  3014. +}
  3015. +
  3016. +/* perform carry operations on given level.
  3017. +
  3018. + Optimizations proposed by pooh:
  3019. +
  3020. + (1) don't lock all nodes from queue at the same time. Lock nodes lazily as
  3021. + required;
  3022. +
  3023. + (2) unlock node if there are no more operations to be performed upon it and
  3024. + node didn't add any operation to @todo. This can be implemented by
  3025. + attaching to each node two counters: counter of operaions working on this
  3026. + node and counter and operations carried upward from this node.
  3027. +
  3028. +*/
  3029. +static int carry_on_level(carry_level * doing /* queue of carry operations to
  3030. + * do on this level */ ,
  3031. + carry_level * todo /* queue where new carry
  3032. + * operations to be performed on
  3033. + * the * parent level are
  3034. + * accumulated during @doing
  3035. + * processing. */ )
  3036. +{
  3037. + int result;
  3038. + int (*f) (carry_op *, carry_level *, carry_level *);
  3039. + carry_op *op;
  3040. + carry_op *tmp_op;
  3041. +
  3042. + assert("nikita-1034", doing != NULL);
  3043. + assert("nikita-1035", todo != NULL);
  3044. +
  3045. + /* @doing->nodes are locked. */
  3046. +
  3047. + /* This function can be split into two phases: analysis and modification
  3048. +
  3049. + Analysis calculates precisely what items should be moved between
  3050. + nodes. This information is gathered in some structures attached to
  3051. + each carry_node in a @doing queue. Analysis also determines whether
  3052. + new nodes are to be allocated etc.
  3053. +
  3054. + After analysis is completed, actual modification is performed. Here
  3055. + we can take advantage of "batch modification": if there are several
  3056. + operations acting on the same node, modifications can be performed
  3057. + more efficiently when batched together.
  3058. +
  3059. + Above is an optimization left for the future.
  3060. + */
  3061. + /* Important, but delayed optimization: it's possible to batch
  3062. + operations together and perform them more efficiently as a
  3063. + result. For example, deletion of several neighboring items from a
  3064. + node can be converted to a single ->cut() operation.
  3065. +
  3066. + Before processing queue, it should be scanned and "mergeable"
  3067. + operations merged.
  3068. + */
  3069. + result = 0;
  3070. + for_all_ops(doing, op, tmp_op) {
  3071. + carry_opcode opcode;
  3072. +
  3073. + assert("nikita-1041", op != NULL);
  3074. + opcode = op->op;
  3075. + assert("nikita-1042", op->op < COP_LAST_OP);
  3076. + f = op_dispatch_table[op->op].handler;
  3077. + result = f(op, doing, todo);
  3078. + /* locking can fail with -E_REPEAT. Any different error is fatal
  3079. + and will be handled by fatal_carry_error() sledgehammer.
  3080. + */
  3081. + if (result != 0)
  3082. + break;
  3083. + }
  3084. + if (result == 0) {
  3085. + carry_plugin_info info;
  3086. + carry_node *scan;
  3087. + carry_node *tmp_scan;
  3088. +
  3089. + info.doing = doing;
  3090. + info.todo = todo;
  3091. +
  3092. + assert("nikita-3002",
  3093. + carry_level_invariant(doing, CARRY_DOING));
  3094. + for_all_nodes(doing, scan, tmp_scan) {
  3095. + znode *node;
  3096. +
  3097. + node = reiser4_carry_real(scan);
  3098. + assert("nikita-2547", node != NULL);
  3099. + if (node_is_empty(node)) {
  3100. + result =
  3101. + node_plugin_by_node(node)->
  3102. + prepare_removal(node, &info);
  3103. + if (result != 0)
  3104. + break;
  3105. + }
  3106. + }
  3107. + }
  3108. + return result;
  3109. +}
  3110. +
  3111. +/* post carry operation
  3112. +
  3113. + This is main function used by external carry clients: node layout plugins
  3114. + and tree operations to create new carry operation to be performed on some
  3115. + level.
  3116. +
  3117. + New operation will be included in the @level queue. To actually perform it,
  3118. + call carry( level, ... ). This function takes write lock on @node. Carry
  3119. + manages all its locks by itself, don't worry about this.
  3120. +
  3121. + This function adds operation and node at the end of the queue. It is up to
  3122. + caller to guarantee proper ordering of node queue.
  3123. +
  3124. +*/
  3125. +carry_op * reiser4_post_carry(carry_level * level /* queue where new operation
  3126. + * is to be posted at */ ,
  3127. + carry_opcode op /* opcode of operation */ ,
  3128. + znode * node /* node on which this operation
  3129. + * will operate */ ,
  3130. + int apply_to_parent_p /* whether operation will
  3131. + * operate directly on @node
  3132. + * or on it parent. */)
  3133. +{
  3134. + carry_op *result;
  3135. + carry_node *child;
  3136. +
  3137. + assert("nikita-1046", level != NULL);
  3138. + assert("nikita-1788", znode_is_write_locked(node));
  3139. +
  3140. + result = add_op(level, POOLO_LAST, NULL);
  3141. + if (IS_ERR(result))
  3142. + return result;
  3143. + child = reiser4_add_carry(level, POOLO_LAST, NULL);
  3144. + if (IS_ERR(child)) {
  3145. + reiser4_pool_free(&level->pool->op_pool, &result->header);
  3146. + return (carry_op *) child;
  3147. + }
  3148. + result->node = child;
  3149. + result->op = op;
  3150. + child->parent = apply_to_parent_p;
  3151. + if (ZF_ISSET(node, JNODE_ORPHAN))
  3152. + child->left_before = 1;
  3153. + child->node = node;
  3154. + return result;
  3155. +}
  3156. +
  3157. +/* initialize carry queue */
  3158. +void init_carry_level(carry_level * level /* level to initialize */ ,
  3159. + carry_pool * pool /* pool @level will allocate objects
  3160. + * from */ )
  3161. +{
  3162. + assert("nikita-1045", level != NULL);
  3163. + assert("nikita-967", pool != NULL);
  3164. +
  3165. + memset(level, 0, sizeof *level);
  3166. + level->pool = pool;
  3167. +
  3168. + INIT_LIST_HEAD(&level->nodes);
  3169. + INIT_LIST_HEAD(&level->ops);
  3170. +}
  3171. +
  3172. +/* allocate carry pool and initialize pools within queue */
  3173. +carry_pool *init_carry_pool(int size)
  3174. +{
  3175. + carry_pool *pool;
  3176. +
  3177. + assert("", size >= sizeof(carry_pool) + 3 * sizeof(carry_level));
  3178. + pool = kmalloc(size, reiser4_ctx_gfp_mask_get());
  3179. + if (pool == NULL)
  3180. + return ERR_PTR(RETERR(-ENOMEM));
  3181. +
  3182. + reiser4_init_pool(&pool->op_pool, sizeof(carry_op), CARRIES_POOL_SIZE,
  3183. + (char *)pool->op);
  3184. + reiser4_init_pool(&pool->node_pool, sizeof(carry_node),
  3185. + NODES_LOCKED_POOL_SIZE, (char *)pool->node);
  3186. + return pool;
  3187. +}
  3188. +
  3189. +/* finish with queue pools */
  3190. +void done_carry_pool(carry_pool * pool/* pool to destroy */)
  3191. +{
  3192. + reiser4_done_pool(&pool->op_pool);
  3193. + reiser4_done_pool(&pool->node_pool);
  3194. + kfree(pool);
  3195. +}
  3196. +
  3197. +/* add new carry node to the @level.
  3198. +
  3199. + Returns pointer to the new carry node allocated from pool. It's up to
  3200. + callers to maintain proper order in the @level. Assumption is that if carry
  3201. + nodes on one level are already sorted and modifications are peroformed from
  3202. + left to right, carry nodes added on the parent level will be ordered
  3203. + automatically. To control ordering use @order and @reference parameters.
  3204. +
  3205. +*/
  3206. +carry_node *reiser4_add_carry_skip(carry_level * level /* &carry_level to add
  3207. + * node to */ ,
  3208. + pool_ordering order /* where to insert:
  3209. + * at the beginning of
  3210. + * @level,
  3211. + * before @reference,
  3212. + * after @reference,
  3213. + * at the end of @level
  3214. + */ ,
  3215. + carry_node * reference/* reference node for
  3216. + * insertion */)
  3217. +{
  3218. + ON_DEBUG(carry_node * orig_ref = reference);
  3219. +
  3220. + if (order == POOLO_BEFORE) {
  3221. + reference = find_left_carry(reference, level);
  3222. + if (reference == NULL)
  3223. + reference = list_entry(level->nodes.next, carry_node,
  3224. + header.level_linkage);
  3225. + else
  3226. + reference = list_entry(reference->header.level_linkage.next,
  3227. + carry_node, header.level_linkage);
  3228. + } else if (order == POOLO_AFTER) {
  3229. + reference = find_right_carry(reference, level);
  3230. + if (reference == NULL)
  3231. + reference = list_entry(level->nodes.prev, carry_node,
  3232. + header.level_linkage);
  3233. + else
  3234. + reference = list_entry(reference->header.level_linkage.prev,
  3235. + carry_node, header.level_linkage);
  3236. + }
  3237. + assert("nikita-2209",
  3238. + ergo(orig_ref != NULL,
  3239. + reiser4_carry_real(reference) ==
  3240. + reiser4_carry_real(orig_ref)));
  3241. + return reiser4_add_carry(level, order, reference);
  3242. +}
  3243. +
  3244. +carry_node *reiser4_add_carry(carry_level * level, /* carry_level to add
  3245. + node to */
  3246. + pool_ordering order, /* where to insert:
  3247. + * at the beginning of
  3248. + * @level;
  3249. + * before @reference;
  3250. + * after @reference;
  3251. + * at the end of @level
  3252. + */
  3253. + carry_node * reference /* reference node for
  3254. + * insertion */)
  3255. +{
  3256. + carry_node *result;
  3257. +
  3258. + result =
  3259. + (carry_node *) reiser4_add_obj(&level->pool->node_pool,
  3260. + &level->nodes,
  3261. + order, &reference->header);
  3262. + if (!IS_ERR(result) && (result != NULL))
  3263. + ++level->nodes_num;
  3264. + return result;
  3265. +}
  3266. +
  3267. +/**
  3268. + * add new carry operation to the @level.
  3269. + *
  3270. + * Returns pointer to the new carry operations allocated from pool. It's up to
  3271. + * callers to maintain proper order in the @level. To control ordering use
  3272. + * @order and @reference parameters.
  3273. + */
  3274. +static carry_op *add_op(carry_level * level, /* &carry_level to add node to */
  3275. + pool_ordering order, /* where to insert:
  3276. + * at the beginning of @level;
  3277. + * before @reference;
  3278. + * after @reference;
  3279. + * at the end of @level */
  3280. + carry_op * reference /* reference node for insertion */)
  3281. +{
  3282. + carry_op *result;
  3283. +
  3284. + result =
  3285. + (carry_op *) reiser4_add_obj(&level->pool->op_pool, &level->ops,
  3286. + order, &reference->header);
  3287. + if (!IS_ERR(result) && (result != NULL))
  3288. + ++level->ops_num;
  3289. + return result;
  3290. +}
  3291. +
  3292. +/**
  3293. + * Return node on the right of which @node was created.
  3294. + *
  3295. + * Each node is created on the right of some existing node (or it is new root,
  3296. + * which is special case not handled here).
  3297. + *
  3298. + * @node is new node created on some level, but not yet inserted into its
  3299. + * parent, it has corresponding bit (JNODE_ORPHAN) set in zstate.
  3300. + */
  3301. +static carry_node *find_begetting_brother(carry_node * node,/* node to start
  3302. + search from */
  3303. + carry_level * kin UNUSED_ARG
  3304. + /* level to scan */)
  3305. +{
  3306. + carry_node *scan;
  3307. +
  3308. + assert("nikita-1614", node != NULL);
  3309. + assert("nikita-1615", kin != NULL);
  3310. + assert("nikita-1616", LOCK_CNT_GTZ(rw_locked_tree));
  3311. + assert("nikita-1619", ergo(reiser4_carry_real(node) != NULL,
  3312. + ZF_ISSET(reiser4_carry_real(node),
  3313. + JNODE_ORPHAN)));
  3314. + for (scan = node;;
  3315. + scan = list_entry(scan->header.level_linkage.prev, carry_node,
  3316. + header.level_linkage)) {
  3317. + assert("nikita-1617", &kin->nodes != &scan->header.level_linkage);
  3318. + if ((scan->node != node->node) &&
  3319. + !ZF_ISSET(scan->node, JNODE_ORPHAN)) {
  3320. + assert("nikita-1618", reiser4_carry_real(scan) != NULL);
  3321. + break;
  3322. + }
  3323. + }
  3324. + return scan;
  3325. +}
  3326. +
  3327. +static cmp_t
  3328. +carry_node_cmp(carry_level * level, carry_node * n1, carry_node * n2)
  3329. +{
  3330. + assert("nikita-2199", n1 != NULL);
  3331. + assert("nikita-2200", n2 != NULL);
  3332. +
  3333. + if (n1 == n2)
  3334. + return EQUAL_TO;
  3335. + while (1) {
  3336. + n1 = carry_node_next(n1);
  3337. + if (carry_node_end(level, n1))
  3338. + return GREATER_THAN;
  3339. + if (n1 == n2)
  3340. + return LESS_THAN;
  3341. + }
  3342. + impossible("nikita-2201", "End of level reached");
  3343. +}
  3344. +
  3345. +carry_node *find_carry_node(carry_level * level, const znode * node)
  3346. +{
  3347. + carry_node *scan;
  3348. + carry_node *tmp_scan;
  3349. +
  3350. + assert("nikita-2202", level != NULL);
  3351. + assert("nikita-2203", node != NULL);
  3352. +
  3353. + for_all_nodes(level, scan, tmp_scan) {
  3354. + if (reiser4_carry_real(scan) == node)
  3355. + return scan;
  3356. + }
  3357. + return NULL;
  3358. +}
  3359. +
  3360. +znode *reiser4_carry_real(const carry_node * node)
  3361. +{
  3362. + assert("nikita-3061", node != NULL);
  3363. +
  3364. + return node->lock_handle.node;
  3365. +}
  3366. +
  3367. +carry_node *insert_carry_node(carry_level * doing, carry_level * todo,
  3368. + const znode * node)
  3369. +{
  3370. + carry_node *base;
  3371. + carry_node *scan;
  3372. + carry_node *tmp_scan;
  3373. + carry_node *proj;
  3374. +
  3375. + base = find_carry_node(doing, node);
  3376. + assert("nikita-2204", base != NULL);
  3377. +
  3378. + for_all_nodes(todo, scan, tmp_scan) {
  3379. + proj = find_carry_node(doing, scan->node);
  3380. + assert("nikita-2205", proj != NULL);
  3381. + if (carry_node_cmp(doing, proj, base) != LESS_THAN)
  3382. + break;
  3383. + }
  3384. + return scan;
  3385. +}
  3386. +
  3387. +static carry_node *add_carry_atplace(carry_level * doing, carry_level * todo,
  3388. + znode * node)
  3389. +{
  3390. + carry_node *reference;
  3391. +
  3392. + assert("nikita-2994", doing != NULL);
  3393. + assert("nikita-2995", todo != NULL);
  3394. + assert("nikita-2996", node != NULL);
  3395. +
  3396. + reference = insert_carry_node(doing, todo, node);
  3397. + assert("nikita-2997", reference != NULL);
  3398. +
  3399. + return reiser4_add_carry(todo, POOLO_BEFORE, reference);
  3400. +}
  3401. +
  3402. +/* like reiser4_post_carry(), but designed to be called from node plugin
  3403. + methods. This function is different from reiser4_post_carry() in that it
  3404. + finds proper place to insert node in the queue. */
  3405. +carry_op *node_post_carry(carry_plugin_info * info /* carry parameters
  3406. + * passed down to node
  3407. + * plugin */ ,
  3408. + carry_opcode op /* opcode of operation */ ,
  3409. + znode * node /* node on which this
  3410. + * operation will operate */ ,
  3411. + int apply_to_parent_p /* whether operation will
  3412. + * operate directly on @node
  3413. + * or on it parent. */ )
  3414. +{
  3415. + carry_op *result;
  3416. + carry_node *child;
  3417. +
  3418. + assert("nikita-2207", info != NULL);
  3419. + assert("nikita-2208", info->todo != NULL);
  3420. +
  3421. + if (info->doing == NULL)
  3422. + return reiser4_post_carry(info->todo, op, node,
  3423. + apply_to_parent_p);
  3424. +
  3425. + result = add_op(info->todo, POOLO_LAST, NULL);
  3426. + if (IS_ERR(result))
  3427. + return result;
  3428. + child = add_carry_atplace(info->doing, info->todo, node);
  3429. + if (IS_ERR(child)) {
  3430. + reiser4_pool_free(&info->todo->pool->op_pool, &result->header);
  3431. + return (carry_op *) child;
  3432. + }
  3433. + result->node = child;
  3434. + result->op = op;
  3435. + child->parent = apply_to_parent_p;
  3436. + if (ZF_ISSET(node, JNODE_ORPHAN))
  3437. + child->left_before = 1;
  3438. + child->node = node;
  3439. + return result;
  3440. +}
  3441. +
  3442. +/* lock all carry nodes in @level */
  3443. +static int lock_carry_level(carry_level * level/* level to lock */)
  3444. +{
  3445. + int result;
  3446. + carry_node *node;
  3447. + carry_node *tmp_node;
  3448. +
  3449. + assert("nikita-881", level != NULL);
  3450. + assert("nikita-2229", carry_level_invariant(level, CARRY_TODO));
  3451. +
  3452. + /* lock nodes from left to right */
  3453. + result = 0;
  3454. + for_all_nodes(level, node, tmp_node) {
  3455. + result = lock_carry_node(level, node);
  3456. + if (result != 0)
  3457. + break;
  3458. + }
  3459. + return result;
  3460. +}
  3461. +
  3462. +/* Synchronize delimiting keys between @node and its left neighbor.
  3463. +
  3464. + To reduce contention on dk key and simplify carry code, we synchronize
  3465. + delimiting keys only when carry ultimately leaves tree level (carrying
  3466. + changes upward) and unlocks nodes at this level.
  3467. +
  3468. + This function first finds left neighbor of @node and then updates left
  3469. + neighbor's right delimiting key to conincide with least key in @node.
  3470. +
  3471. +*/
  3472. +
  3473. +ON_DEBUG(extern atomic_t delim_key_version;
  3474. + )
  3475. +
  3476. +static void sync_dkeys(znode * spot/* node to update */)
  3477. +{
  3478. + reiser4_key pivot;
  3479. + reiser4_tree *tree;
  3480. +
  3481. + assert("nikita-1610", spot != NULL);
  3482. + assert("nikita-1612", LOCK_CNT_NIL(rw_locked_dk));
  3483. +
  3484. + tree = znode_get_tree(spot);
  3485. + read_lock_tree(tree);
  3486. + write_lock_dk(tree);
  3487. +
  3488. + assert("nikita-2192", znode_is_loaded(spot));
  3489. +
  3490. + /* sync left delimiting key of @spot with key in its leftmost item */
  3491. + if (node_is_empty(spot))
  3492. + pivot = *znode_get_rd_key(spot);
  3493. + else
  3494. + leftmost_key_in_node(spot, &pivot);
  3495. +
  3496. + znode_set_ld_key(spot, &pivot);
  3497. +
  3498. + /* there can be sequence of empty nodes pending removal on the left of
  3499. + @spot. Scan them and update their left and right delimiting keys to
  3500. + match left delimiting key of @spot. Also, update right delimiting
  3501. + key of first non-empty left neighbor.
  3502. + */
  3503. + while (1) {
  3504. + if (!ZF_ISSET(spot, JNODE_LEFT_CONNECTED))
  3505. + break;
  3506. +
  3507. + spot = spot->left;
  3508. + if (spot == NULL)
  3509. + break;
  3510. +
  3511. + znode_set_rd_key(spot, &pivot);
  3512. + /* don't sink into the domain of another balancing */
  3513. + if (!znode_is_write_locked(spot))
  3514. + break;
  3515. + if (ZF_ISSET(spot, JNODE_HEARD_BANSHEE))
  3516. + znode_set_ld_key(spot, &pivot);
  3517. + else
  3518. + break;
  3519. + }
  3520. +
  3521. + write_unlock_dk(tree);
  3522. + read_unlock_tree(tree);
  3523. +}
  3524. +
  3525. +/* unlock all carry nodes in @level */
  3526. +static void unlock_carry_level(carry_level * level /* level to unlock */ ,
  3527. + int failure /* true if unlocking owing to
  3528. + * failure */ )
  3529. +{
  3530. + carry_node *node;
  3531. + carry_node *tmp_node;
  3532. +
  3533. + assert("nikita-889", level != NULL);
  3534. +
  3535. + if (!failure) {
  3536. + znode *spot;
  3537. +
  3538. + spot = NULL;
  3539. + /* update delimiting keys */
  3540. + for_all_nodes(level, node, tmp_node) {
  3541. + if (reiser4_carry_real(node) != spot) {
  3542. + spot = reiser4_carry_real(node);
  3543. + sync_dkeys(spot);
  3544. + }
  3545. + }
  3546. + }
  3547. +
  3548. + /* nodes can be unlocked in arbitrary order. In preemptible
  3549. + environment it's better to unlock in reverse order of locking,
  3550. + though.
  3551. + */
  3552. + for_all_nodes_back(level, node, tmp_node) {
  3553. + /* all allocated nodes should be already linked to their
  3554. + parents at this moment. */
  3555. + assert("nikita-1631",
  3556. + ergo(!failure, !ZF_ISSET(reiser4_carry_real(node),
  3557. + JNODE_ORPHAN)));
  3558. + ON_DEBUG(check_dkeys(reiser4_carry_real(node)));
  3559. + unlock_carry_node(level, node, failure);
  3560. + }
  3561. + level->new_root = NULL;
  3562. +}
  3563. +
  3564. +/* finish with @level
  3565. +
  3566. + Unlock nodes and release all allocated resources */
  3567. +static void done_carry_level(carry_level * level/* level to finish */)
  3568. +{
  3569. + carry_node *node;
  3570. + carry_node *tmp_node;
  3571. + carry_op *op;
  3572. + carry_op *tmp_op;
  3573. +
  3574. + assert("nikita-1076", level != NULL);
  3575. +
  3576. + unlock_carry_level(level, 0);
  3577. + for_all_nodes(level, node, tmp_node) {
  3578. + assert("nikita-2113", list_empty_careful(&node->lock_handle.locks_link));
  3579. + assert("nikita-2114", list_empty_careful(&node->lock_handle.owners_link));
  3580. + reiser4_pool_free(&level->pool->node_pool, &node->header);
  3581. + }
  3582. + for_all_ops(level, op, tmp_op)
  3583. + reiser4_pool_free(&level->pool->op_pool, &op->header);
  3584. +}
  3585. +
  3586. +/* helper function to complete locking of carry node
  3587. +
  3588. + Finish locking of carry node. There are several ways in which new carry
  3589. + node can be added into carry level and locked. Normal is through
  3590. + lock_carry_node(), but also from find_{left|right}_neighbor(). This
  3591. + function factors out common final part of all locking scenarios. It
  3592. + supposes that @node -> lock_handle is lock handle for lock just taken and
  3593. + fills ->real_node from this lock handle.
  3594. +
  3595. +*/
  3596. +int lock_carry_node_tail(carry_node * node/* node to complete locking of */)
  3597. +{
  3598. + assert("nikita-1052", node != NULL);
  3599. + assert("nikita-1187", reiser4_carry_real(node) != NULL);
  3600. + assert("nikita-1188", !node->unlock);
  3601. +
  3602. + node->unlock = 1;
  3603. + /* Load node content into memory and install node plugin by
  3604. + looking at the node header.
  3605. +
  3606. + Most of the time this call is cheap because the node is
  3607. + already in memory.
  3608. +
  3609. + Corresponding zrelse() is in unlock_carry_node()
  3610. + */
  3611. + return zload(reiser4_carry_real(node));
  3612. +}
  3613. +
  3614. +/* lock carry node
  3615. +
  3616. + "Resolve" node to real znode, lock it and mark as locked.
  3617. + This requires recursive locking of znodes.
  3618. +
  3619. + When operation is posted to the parent level, node it will be applied to is
  3620. + not yet known. For example, when shifting data between two nodes,
  3621. + delimiting has to be updated in parent or parents of nodes involved. But
  3622. + their parents is not yet locked and, moreover said nodes can be reparented
  3623. + by concurrent balancing.
  3624. +
  3625. + To work around this, carry operation is applied to special "carry node"
  3626. + rather than to the znode itself. Carry node consists of some "base" or
  3627. + "reference" znode and flags indicating how to get to the target of carry
  3628. + operation (->real_node field of carry_node) from base.
  3629. +
  3630. +*/
  3631. +int lock_carry_node(carry_level * level /* level @node is in */ ,
  3632. + carry_node * node/* node to lock */)
  3633. +{
  3634. + int result;
  3635. + znode *reference_point;
  3636. + lock_handle lh;
  3637. + lock_handle tmp_lh;
  3638. + reiser4_tree *tree;
  3639. +
  3640. + assert("nikita-887", level != NULL);
  3641. + assert("nikita-882", node != NULL);
  3642. +
  3643. + result = 0;
  3644. + reference_point = node->node;
  3645. + init_lh(&lh);
  3646. + init_lh(&tmp_lh);
  3647. + if (node->left_before) {
  3648. + /* handling of new nodes, allocated on the previous level:
  3649. +
  3650. + some carry ops were propably posted from the new node, but
  3651. + this node neither has parent pointer set, nor is
  3652. + connected. This will be done in ->create_hook() for
  3653. + internal item.
  3654. +
  3655. + No then less, parent of new node has to be locked. To do
  3656. + this, first go to the "left" in the carry order. This
  3657. + depends on the decision to always allocate new node on the
  3658. + right of existing one.
  3659. +
  3660. + Loop handles case when multiple nodes, all orphans, were
  3661. + inserted.
  3662. +
  3663. + Strictly speaking, taking tree lock is not necessary here,
  3664. + because all nodes scanned by loop in
  3665. + find_begetting_brother() are write-locked by this thread,
  3666. + and thus, their sibling linkage cannot change.
  3667. +
  3668. + */
  3669. + tree = znode_get_tree(reference_point);
  3670. + read_lock_tree(tree);
  3671. + reference_point = find_begetting_brother(node, level)->node;
  3672. + read_unlock_tree(tree);
  3673. + assert("nikita-1186", reference_point != NULL);
  3674. + }
  3675. + if (node->parent && (result == 0)) {
  3676. + result =
  3677. + reiser4_get_parent(&tmp_lh, reference_point,
  3678. + ZNODE_WRITE_LOCK);
  3679. + if (result != 0) {
  3680. + ; /* nothing */
  3681. + } else if (znode_get_level(tmp_lh.node) == 0) {
  3682. + assert("nikita-1347", znode_above_root(tmp_lh.node));
  3683. + result = add_new_root(level, node, tmp_lh.node);
  3684. + if (result == 0) {
  3685. + reference_point = level->new_root;
  3686. + move_lh(&lh, &node->lock_handle);
  3687. + }
  3688. + } else if ((level->new_root != NULL)
  3689. + && (level->new_root !=
  3690. + znode_parent_nolock(reference_point))) {
  3691. + /* parent of node exists, but this level aready
  3692. + created different new root, so */
  3693. + warning("nikita-1109",
  3694. + /* it should be "radicis", but tradition is
  3695. + tradition. do banshees read latin? */
  3696. + "hodie natus est radici frater");
  3697. + result = -EIO;
  3698. + } else {
  3699. + move_lh(&lh, &tmp_lh);
  3700. + reference_point = lh.node;
  3701. + }
  3702. + }
  3703. + if (node->left && (result == 0)) {
  3704. + assert("nikita-1183", node->parent);
  3705. + assert("nikita-883", reference_point != NULL);
  3706. + result =
  3707. + reiser4_get_left_neighbor(&tmp_lh, reference_point,
  3708. + ZNODE_WRITE_LOCK,
  3709. + GN_CAN_USE_UPPER_LEVELS);
  3710. + if (result == 0) {
  3711. + done_lh(&lh);
  3712. + move_lh(&lh, &tmp_lh);
  3713. + reference_point = lh.node;
  3714. + }
  3715. + }
  3716. + if (!node->parent && !node->left && !node->left_before) {
  3717. + result =
  3718. + longterm_lock_znode(&lh, reference_point, ZNODE_WRITE_LOCK,
  3719. + ZNODE_LOCK_HIPRI);
  3720. + }
  3721. + if (result == 0) {
  3722. + move_lh(&node->lock_handle, &lh);
  3723. + result = lock_carry_node_tail(node);
  3724. + }
  3725. + done_lh(&tmp_lh);
  3726. + done_lh(&lh);
  3727. + return result;
  3728. +}
  3729. +
  3730. +/* release a lock on &carry_node.
  3731. +
  3732. + Release if necessary lock on @node. This opearion is pair of
  3733. + lock_carry_node() and is idempotent: you can call it more than once on the
  3734. + same node.
  3735. +
  3736. +*/
  3737. +static void
  3738. +unlock_carry_node(carry_level * level,
  3739. + carry_node * node /* node to be released */ ,
  3740. + int failure /* 0 if node is unlocked due
  3741. + * to some error */ )
  3742. +{
  3743. + znode *real_node;
  3744. +
  3745. + assert("nikita-884", node != NULL);
  3746. +
  3747. + real_node = reiser4_carry_real(node);
  3748. + /* pair to zload() in lock_carry_node_tail() */
  3749. + zrelse(real_node);
  3750. + if (node->unlock && (real_node != NULL)) {
  3751. + assert("nikita-899", real_node == node->lock_handle.node);
  3752. + longterm_unlock_znode(&node->lock_handle);
  3753. + }
  3754. + if (failure) {
  3755. + if (node->deallocate && (real_node != NULL)) {
  3756. + /* free node in bitmap
  3757. +
  3758. + Prepare node for removal. Last zput() will finish
  3759. + with it.
  3760. + */
  3761. + ZF_SET(real_node, JNODE_HEARD_BANSHEE);
  3762. + }
  3763. + if (node->free) {
  3764. + assert("nikita-2177",
  3765. + list_empty_careful(&node->lock_handle.locks_link));
  3766. + assert("nikita-2112",
  3767. + list_empty_careful(&node->lock_handle.owners_link));
  3768. + reiser4_pool_free(&level->pool->node_pool,
  3769. + &node->header);
  3770. + }
  3771. + }
  3772. +}
  3773. +
  3774. +/* fatal_carry_error() - all-catching error handling function
  3775. +
  3776. + It is possible that carry faces unrecoverable error, like unability to
  3777. + insert pointer at the internal level. Our simple solution is just panic in
  3778. + this situation. More sophisticated things like attempt to remount
  3779. + file-system as read-only can be implemented without much difficlties.
  3780. +
  3781. + It is believed, that:
  3782. +
  3783. + 1. in stead of panicking, all current transactions can be aborted rolling
  3784. + system back to the consistent state.
  3785. +
  3786. +Umm, if you simply panic without doing anything more at all, then all current
  3787. +transactions are aborted and the system is rolled back to a consistent state,
  3788. +by virtue of the design of the transactional mechanism. Well, wait, let's be
  3789. +precise. If an internal node is corrupted on disk due to hardware failure,
  3790. +then there may be no consistent state that can be rolled back to, so instead
  3791. +we should say that it will rollback the transactions, which barring other
  3792. +factors means rolling back to a consistent state.
  3793. +
  3794. +# Nikita: there is a subtle difference between panic and aborting
  3795. +# transactions: machine doesn't reboot. Processes aren't killed. Processes
  3796. +# don't using reiser4 (not that we care about such processes), or using other
  3797. +# reiser4 mounts (about them we do care) will simply continue to run. With
  3798. +# some luck, even application using aborted file system can survive: it will
  3799. +# get some error, like EBADF, from each file descriptor on failed file system,
  3800. +# but applications that do care about tolerance will cope with this (squid
  3801. +# will).
  3802. +
  3803. +It would be a nice feature though to support rollback without rebooting
  3804. +followed by remount, but this can wait for later versions.
  3805. +
  3806. + 2. once isolated transactions will be implemented it will be possible to
  3807. + roll back offending transaction.
  3808. +
  3809. +2. is additional code complexity of inconsistent value (it implies that a
  3810. +broken tree should be kept in operation), so we must think about it more
  3811. +before deciding if it should be done. -Hans
  3812. +
  3813. +*/
  3814. +static void fatal_carry_error(carry_level * doing UNUSED_ARG /* carry level
  3815. + * where
  3816. + * unrecoverable
  3817. + * error
  3818. + * occurred */ ,
  3819. + int ecode/* error code */)
  3820. +{
  3821. + assert("nikita-1230", doing != NULL);
  3822. + assert("nikita-1231", ecode < 0);
  3823. +
  3824. + reiser4_panic("nikita-1232", "Carry failed: %i", ecode);
  3825. +}
  3826. +
  3827. +/**
  3828. + * Add new root to the tree
  3829. + *
  3830. + * This function itself only manages changes in carry structures and delegates
  3831. + * all hard work (allocation of znode for new root, changes of parent and
  3832. + * sibling pointers) to the reiser4_add_tree_root().
  3833. + *
  3834. + * Locking: old tree root is locked by carry at this point. Fake znode is also
  3835. + * locked.
  3836. + */
  3837. +static int add_new_root(carry_level * level,/* carry level in context of which
  3838. + * operation is performed */
  3839. + carry_node * node, /* carry node for existing root */
  3840. + znode * fake /* "fake" znode already locked by
  3841. + * us */)
  3842. +{
  3843. + int result;
  3844. +
  3845. + assert("nikita-1104", level != NULL);
  3846. + assert("nikita-1105", node != NULL);
  3847. +
  3848. + assert("nikita-1403", znode_is_write_locked(node->node));
  3849. + assert("nikita-1404", znode_is_write_locked(fake));
  3850. +
  3851. + /* trying to create new root. */
  3852. + /* @node is root and it's already locked by us. This
  3853. + means that nobody else can be trying to add/remove
  3854. + tree root right now.
  3855. + */
  3856. + if (level->new_root == NULL)
  3857. + level->new_root = reiser4_add_tree_root(node->node, fake);
  3858. + if (!IS_ERR(level->new_root)) {
  3859. + assert("nikita-1210", znode_is_root(level->new_root));
  3860. + node->deallocate = 1;
  3861. + result =
  3862. + longterm_lock_znode(&node->lock_handle, level->new_root,
  3863. + ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI);
  3864. + if (result == 0)
  3865. + zput(level->new_root);
  3866. + } else {
  3867. + result = PTR_ERR(level->new_root);
  3868. + level->new_root = NULL;
  3869. + }
  3870. + return result;
  3871. +}
  3872. +
  3873. +/* allocate new znode and add the operation that inserts the
  3874. + pointer to it into the parent node into the todo level
  3875. +
  3876. + Allocate new znode, add it into carry queue and post into @todo queue
  3877. + request to add pointer to new node into its parent.
  3878. +
  3879. + This is carry related routing that calls reiser4_new_node() to allocate new
  3880. + node.
  3881. +*/
  3882. +carry_node *add_new_znode(znode * brother /* existing left neighbor of new
  3883. + * node */ ,
  3884. + carry_node * ref /* carry node after which new
  3885. + * carry node is to be inserted
  3886. + * into queue. This affects
  3887. + * locking. */ ,
  3888. + carry_level * doing /* carry queue where new node is
  3889. + * to be added */ ,
  3890. + carry_level * todo /* carry queue where COP_INSERT
  3891. + * operation to add pointer to
  3892. + * new node will ne added */ )
  3893. +{
  3894. + carry_node *fresh;
  3895. + znode *new_znode;
  3896. + carry_op *add_pointer;
  3897. + carry_plugin_info info;
  3898. +
  3899. + assert("nikita-1048", brother != NULL);
  3900. + assert("nikita-1049", todo != NULL);
  3901. +
  3902. + /* There is a lot of possible variations here: to what parent
  3903. + new node will be attached and where. For simplicity, always
  3904. + do the following:
  3905. +
  3906. + (1) new node and @brother will have the same parent.
  3907. +
  3908. + (2) new node is added on the right of @brother
  3909. +
  3910. + */
  3911. +
  3912. + fresh = reiser4_add_carry_skip(doing,
  3913. + ref ? POOLO_AFTER : POOLO_LAST, ref);
  3914. + if (IS_ERR(fresh))
  3915. + return fresh;
  3916. +
  3917. + fresh->deallocate = 1;
  3918. + fresh->free = 1;
  3919. +
  3920. + new_znode = reiser4_new_node(brother, znode_get_level(brother));
  3921. + if (IS_ERR(new_znode))
  3922. + /* @fresh will be deallocated automatically by error
  3923. + handling code in the caller. */
  3924. + return (carry_node *) new_znode;
  3925. +
  3926. + /* new_znode returned znode with x_count 1. Caller has to decrease
  3927. + it. make_space() does. */
  3928. +
  3929. + ZF_SET(new_znode, JNODE_ORPHAN);
  3930. + fresh->node = new_znode;
  3931. +
  3932. + while (ZF_ISSET(reiser4_carry_real(ref), JNODE_ORPHAN)) {
  3933. + ref = carry_node_prev(ref);
  3934. + assert("nikita-1606", !carry_node_end(doing, ref));
  3935. + }
  3936. +
  3937. + info.todo = todo;
  3938. + info.doing = doing;
  3939. + add_pointer = node_post_carry(&info, COP_INSERT,
  3940. + reiser4_carry_real(ref), 1);
  3941. + if (IS_ERR(add_pointer)) {
  3942. + /* no need to deallocate @new_znode here: it will be
  3943. + deallocated during carry error handling. */
  3944. + return (carry_node *) add_pointer;
  3945. + }
  3946. +
  3947. + add_pointer->u.insert.type = COPT_CHILD;
  3948. + add_pointer->u.insert.child = fresh;
  3949. + add_pointer->u.insert.brother = brother;
  3950. + /* initially new node spawns empty key range */
  3951. + write_lock_dk(znode_get_tree(brother));
  3952. + znode_set_ld_key(new_znode,
  3953. + znode_set_rd_key(new_znode,
  3954. + znode_get_rd_key(brother)));
  3955. + write_unlock_dk(znode_get_tree(brother));
  3956. + return fresh;
  3957. +}
  3958. +
  3959. +/* DEBUGGING FUNCTIONS.
  3960. +
  3961. + Probably we also should leave them on even when
  3962. + debugging is turned off to print dumps at errors.
  3963. +*/
  3964. +#if REISER4_DEBUG
  3965. +static int carry_level_invariant(carry_level * level, carry_queue_state state)
  3966. +{
  3967. + carry_node *node;
  3968. + carry_node *tmp_node;
  3969. +
  3970. + if (level == NULL)
  3971. + return 0;
  3972. +
  3973. + if (level->track_type != 0 &&
  3974. + level->track_type != CARRY_TRACK_NODE &&
  3975. + level->track_type != CARRY_TRACK_CHANGE)
  3976. + return 0;
  3977. +
  3978. + /* check that nodes are in ascending order */
  3979. + for_all_nodes(level, node, tmp_node) {
  3980. + znode *left;
  3981. + znode *right;
  3982. +
  3983. + reiser4_key lkey;
  3984. + reiser4_key rkey;
  3985. +
  3986. + if (node != carry_node_front(level)) {
  3987. + if (state == CARRY_TODO) {
  3988. + right = node->node;
  3989. + left = carry_node_prev(node)->node;
  3990. + } else {
  3991. + right = reiser4_carry_real(node);
  3992. + left = reiser4_carry_real(carry_node_prev(node));
  3993. + }
  3994. + if (right == NULL || left == NULL)
  3995. + continue;
  3996. + if (node_is_empty(right) || node_is_empty(left))
  3997. + continue;
  3998. + if (!keyle(leftmost_key_in_node(left, &lkey),
  3999. + leftmost_key_in_node(right, &rkey))) {
  4000. + warning("", "wrong key order");
  4001. + return 0;
  4002. + }
  4003. + }
  4004. + }
  4005. + return 1;
  4006. +}
  4007. +#endif
  4008. +
  4009. +/* get symbolic name for boolean */
  4010. +static const char *tf(int boolean/* truth value */)
  4011. +{
  4012. + return boolean ? "t" : "f";
  4013. +}
  4014. +
  4015. +/* symbolic name for carry operation */
  4016. +static const char *carry_op_name(carry_opcode op/* carry opcode */)
  4017. +{
  4018. + switch (op) {
  4019. + case COP_INSERT:
  4020. + return "COP_INSERT";
  4021. + case COP_DELETE:
  4022. + return "COP_DELETE";
  4023. + case COP_CUT:
  4024. + return "COP_CUT";
  4025. + case COP_PASTE:
  4026. + return "COP_PASTE";
  4027. + case COP_UPDATE:
  4028. + return "COP_UPDATE";
  4029. + case COP_EXTENT:
  4030. + return "COP_EXTENT";
  4031. + case COP_INSERT_FLOW:
  4032. + return "COP_INSERT_FLOW";
  4033. + default:{
  4034. + /* not mt safe, but who cares? */
  4035. + static char buf[20];
  4036. +
  4037. + sprintf(buf, "unknown op: %x", op);
  4038. + return buf;
  4039. + }
  4040. + }
  4041. +}
  4042. +
  4043. +/* dump information about carry node */
  4044. +static void print_carry(const char *prefix /* prefix to print */ ,
  4045. + carry_node * node/* node to print */)
  4046. +{
  4047. + if (node == NULL) {
  4048. + printk("%s: null\n", prefix);
  4049. + return;
  4050. + }
  4051. + printk
  4052. + ("%s: %p parent: %s, left: %s, unlock: %s, free: %s, dealloc: %s\n",
  4053. + prefix, node, tf(node->parent), tf(node->left), tf(node->unlock),
  4054. + tf(node->free), tf(node->deallocate));
  4055. +}
  4056. +
  4057. +/* dump information about carry operation */
  4058. +static void print_op(const char *prefix /* prefix to print */ ,
  4059. + carry_op * op/* operation to print */)
  4060. +{
  4061. + if (op == NULL) {
  4062. + printk("%s: null\n", prefix);
  4063. + return;
  4064. + }
  4065. + printk("%s: %p carry_opcode: %s\n", prefix, op, carry_op_name(op->op));
  4066. + print_carry("\tnode", op->node);
  4067. + switch (op->op) {
  4068. + case COP_INSERT:
  4069. + case COP_PASTE:
  4070. + print_coord("\tcoord",
  4071. + op->u.insert.d ? op->u.insert.d->coord : NULL, 0);
  4072. + reiser4_print_key("\tkey",
  4073. + op->u.insert.d ? op->u.insert.d->key : NULL);
  4074. + print_carry("\tchild", op->u.insert.child);
  4075. + break;
  4076. + case COP_DELETE:
  4077. + print_carry("\tchild", op->u.delete.child);
  4078. + break;
  4079. + case COP_CUT:
  4080. + if (op->u.cut_or_kill.is_cut) {
  4081. + print_coord("\tfrom",
  4082. + op->u.cut_or_kill.u.kill->params.from, 0);
  4083. + print_coord("\tto", op->u.cut_or_kill.u.kill->params.to,
  4084. + 0);
  4085. + } else {
  4086. + print_coord("\tfrom",
  4087. + op->u.cut_or_kill.u.cut->params.from, 0);
  4088. + print_coord("\tto", op->u.cut_or_kill.u.cut->params.to,
  4089. + 0);
  4090. + }
  4091. + break;
  4092. + case COP_UPDATE:
  4093. + print_carry("\tleft", op->u.update.left);
  4094. + break;
  4095. + default:
  4096. + /* do nothing */
  4097. + break;
  4098. + }
  4099. +}
  4100. +
  4101. +/* dump information about all nodes and operations in a @level */
  4102. +static void print_level(const char *prefix /* prefix to print */ ,
  4103. + carry_level * level/* level to print */)
  4104. +{
  4105. + carry_node *node;
  4106. + carry_node *tmp_node;
  4107. + carry_op *op;
  4108. + carry_op *tmp_op;
  4109. +
  4110. + if (level == NULL) {
  4111. + printk("%s: null\n", prefix);
  4112. + return;
  4113. + }
  4114. + printk("%s: %p, restartable: %s\n",
  4115. + prefix, level, tf(level->restartable));
  4116. +
  4117. + for_all_nodes(level, node, tmp_node)
  4118. + print_carry("\tcarry node", node);
  4119. + for_all_ops(level, op, tmp_op)
  4120. + print_op("\tcarry op", op);
  4121. +}
  4122. +
  4123. +/* Make Linus happy.
  4124. + Local variables:
  4125. + c-indentation-style: "K&R"
  4126. + mode-name: "LC"
  4127. + c-basic-offset: 8
  4128. + tab-width: 8
  4129. + fill-column: 120
  4130. + scroll-step: 1
  4131. + End:
  4132. +*/
  4133. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/carry.h linux-5.16.14/fs/reiser4/carry.h
  4134. --- linux-5.16.14.orig/fs/reiser4/carry.h 1970-01-01 01:00:00.000000000 +0100
  4135. +++ linux-5.16.14/fs/reiser4/carry.h 2022-03-12 13:26:19.641892702 +0100
  4136. @@ -0,0 +1,445 @@
  4137. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  4138. + reiser4/README */
  4139. +
  4140. +/* Functions and data types to "carry" tree modification(s) upward.
  4141. + See fs/reiser4/carry.c for details. */
  4142. +
  4143. +#if !defined(__FS_REISER4_CARRY_H__)
  4144. +#define __FS_REISER4_CARRY_H__
  4145. +
  4146. +#include "forward.h"
  4147. +#include "debug.h"
  4148. +#include "pool.h"
  4149. +#include "znode.h"
  4150. +
  4151. +#include <linux/types.h>
  4152. +
  4153. +/* &carry_node - "location" of carry node.
  4154. +
  4155. + "location" of node that is involved or going to be involved into
  4156. + carry process. Node where operation will be carried to on the
  4157. + parent level cannot be recorded explicitly. Operation will be carried
  4158. + usually to the parent of some node (where changes are performed at
  4159. + the current level) or, to the left neighbor of its parent. But while
  4160. + modifications are performed at the current level, parent may
  4161. + change. So, we have to allow some indirection (or, positevly,
  4162. + flexibility) in locating carry nodes.
  4163. +
  4164. +*/
  4165. +typedef struct carry_node {
  4166. + /* pool linkage */
  4167. + struct reiser4_pool_header header;
  4168. +
  4169. + /* base node from which real_node is calculated. See
  4170. + fs/reiser4/carry.c:lock_carry_node(). */
  4171. + znode *node;
  4172. +
  4173. + /* how to get ->real_node */
  4174. + /* to get ->real_node obtain parent of ->node */
  4175. + __u32 parent:1;
  4176. + /* to get ->real_node obtain left neighbor of parent of
  4177. + ->node */
  4178. + __u32 left:1;
  4179. + __u32 left_before:1;
  4180. +
  4181. + /* locking */
  4182. +
  4183. + /* this node was locked by carry process and should be
  4184. + unlocked when carry leaves a level */
  4185. + __u32 unlock:1;
  4186. +
  4187. + /* disk block for this node was allocated by carry process and
  4188. + should be deallocated when carry leaves a level */
  4189. + __u32 deallocate:1;
  4190. + /* this carry node was allocated by carry process and should be
  4191. + freed when carry leaves a level */
  4192. + __u32 free:1;
  4193. +
  4194. + /* type of lock we want to take on this node */
  4195. + lock_handle lock_handle;
  4196. +} carry_node;
  4197. +
  4198. +/* &carry_opcode - elementary operations that can be carried upward
  4199. +
  4200. + Operations that carry() can handle. This list is supposed to be
  4201. + expanded.
  4202. +
  4203. + Each carry operation (cop) is handled by appropriate function defined
  4204. + in fs/reiser4/carry.c. For example COP_INSERT is handled by
  4205. + fs/reiser4/carry.c:carry_insert() etc. These functions in turn
  4206. + call plugins of nodes affected by operation to modify nodes' content
  4207. + and to gather operations to be performed on the next level.
  4208. +
  4209. +*/
  4210. +typedef enum {
  4211. + /* insert new item into node. */
  4212. + COP_INSERT,
  4213. + /* delete pointer from parent node */
  4214. + COP_DELETE,
  4215. + /* remove part of or whole node. */
  4216. + COP_CUT,
  4217. + /* increase size of item. */
  4218. + COP_PASTE,
  4219. + /* insert extent (that is sequence of unformatted nodes). */
  4220. + COP_EXTENT,
  4221. + /* update delimiting key in least common ancestor of two
  4222. + nodes. This is performed when items are moved between two
  4223. + nodes.
  4224. + */
  4225. + COP_UPDATE,
  4226. + /* insert flow */
  4227. + COP_INSERT_FLOW,
  4228. + COP_LAST_OP,
  4229. +} carry_opcode;
  4230. +
  4231. +#define CARRY_FLOW_NEW_NODES_LIMIT 20
  4232. +
  4233. +/* mode (or subtype) of COP_{INSERT|PASTE} operation. Specifies how target
  4234. + item is determined. */
  4235. +typedef enum {
  4236. + /* target item is one containing pointer to the ->child node */
  4237. + COPT_CHILD,
  4238. + /* target item is given explicitly by @coord */
  4239. + COPT_ITEM_DATA,
  4240. + /* target item is given by key */
  4241. + COPT_KEY,
  4242. + /* see insert_paste_common() for more comments on this. */
  4243. + COPT_PASTE_RESTARTED,
  4244. +} cop_insert_pos_type;
  4245. +
  4246. +/* flags to cut and delete */
  4247. +typedef enum {
  4248. + /* don't kill node even if it became completely empty as results of
  4249. + * cut. This is needed for eottl handling. See carry_extent() for
  4250. + * details. */
  4251. + DELETE_RETAIN_EMPTY = (1 << 0)
  4252. +} cop_delete_flag;
  4253. +
  4254. +/*
  4255. + * carry() implements "lock handle tracking" feature.
  4256. + *
  4257. + * Callers supply carry with node where to perform initial operation and lock
  4258. + * handle on this node. Trying to optimize node utilization carry may actually
  4259. + * move insertion point to different node. Callers expect that lock handle
  4260. + * will rebe transferred to the new node also.
  4261. + *
  4262. + */
  4263. +typedef enum {
  4264. + /* transfer lock handle along with insertion point */
  4265. + CARRY_TRACK_CHANGE = 1,
  4266. + /* acquire new lock handle to the node where insertion point is. This
  4267. + * is used when carry() client doesn't initially possess lock handle
  4268. + * on the insertion point node, for example, by extent insertion
  4269. + * code. See carry_extent(). */
  4270. + CARRY_TRACK_NODE = 2
  4271. +} carry_track_type;
  4272. +
  4273. +/* data supplied to COP_{INSERT|PASTE} by callers */
  4274. +typedef struct carry_insert_data {
  4275. + /* position where new item is to be inserted */
  4276. + coord_t *coord;
  4277. + /* new item description */
  4278. + reiser4_item_data * data;
  4279. + /* key of new item */
  4280. + const reiser4_key * key;
  4281. +} carry_insert_data;
  4282. +
  4283. +/* cut and kill are similar, so carry_cut_data and carry_kill_data share the
  4284. + below structure of parameters */
  4285. +struct cut_kill_params {
  4286. + /* coord where cut starts (inclusive) */
  4287. + coord_t *from;
  4288. + /* coord where cut stops (inclusive, this item/unit will also be
  4289. + * cut) */
  4290. + coord_t *to;
  4291. + /* starting key. This is necessary when item and unit pos don't
  4292. + * uniquely identify what portion or tree to remove. For example, this
  4293. + * indicates what portion of extent unit will be affected. */
  4294. + const reiser4_key * from_key;
  4295. + /* exclusive stop key */
  4296. + const reiser4_key * to_key;
  4297. + /* if this is not NULL, smallest actually removed key is stored
  4298. + * here. */
  4299. + reiser4_key *smallest_removed;
  4300. + /* kill_node_content() is called for file truncate */
  4301. + int truncate;
  4302. +};
  4303. +
  4304. +struct carry_cut_data {
  4305. + struct cut_kill_params params;
  4306. +};
  4307. +
  4308. +struct carry_kill_data {
  4309. + struct cut_kill_params params;
  4310. + /* parameter to be passed to the ->kill_hook() method of item
  4311. + * plugin */
  4312. + /*void *iplug_params; *//* FIXME: unused currently */
  4313. + /* if not NULL---inode whose items are being removed. This is needed
  4314. + * for ->kill_hook() of extent item to update VM structures when
  4315. + * removing pages. */
  4316. + struct inode *inode;
  4317. + /* sibling list maintenance is complicated by existence of eottl. When
  4318. + * eottl whose left and right neighbors are formatted leaves is
  4319. + * removed, one has to connect said leaves in the sibling list. This
  4320. + * cannot be done when extent removal is just started as locking rules
  4321. + * require sibling list update to happen atomically with removal of
  4322. + * extent item. Therefore: 1. pointers to left and right neighbors
  4323. + * have to be passed down to the ->kill_hook() of extent item, and
  4324. + * 2. said neighbors have to be locked. */
  4325. + lock_handle *left;
  4326. + lock_handle *right;
  4327. + /* flags modifying behavior of kill. Currently, it may have
  4328. + DELETE_RETAIN_EMPTY set. */
  4329. + unsigned flags;
  4330. + char *buf;
  4331. +};
  4332. +
  4333. +/* &carry_tree_op - operation to "carry" upward.
  4334. +
  4335. + Description of an operation we want to "carry" to the upper level of
  4336. + a tree: e.g, when we insert something and there is not enough space
  4337. + we allocate a new node and "carry" the operation of inserting a
  4338. + pointer to the new node to the upper level, on removal of empty node,
  4339. + we carry up operation of removing appropriate entry from parent.
  4340. +
  4341. + There are two types of carry ops: when adding or deleting node we
  4342. + node at the parent level where appropriate modification has to be
  4343. + performed is known in advance. When shifting items between nodes
  4344. + (split, merge), delimiting key should be changed in the least common
  4345. + parent of the nodes involved that is not known in advance.
  4346. +
  4347. + For the operations of the first type we store in &carry_op pointer to
  4348. + the &carry_node at the parent level. For the operation of the second
  4349. + type we store &carry_node or parents of the left and right nodes
  4350. + modified and keep track of them upward until they coincide.
  4351. +
  4352. +*/
  4353. +typedef struct carry_op {
  4354. + /* pool linkage */
  4355. + struct reiser4_pool_header header;
  4356. + carry_opcode op;
  4357. + /* node on which operation is to be performed:
  4358. +
  4359. + for insert, paste: node where new item is to be inserted
  4360. +
  4361. + for delete: node where pointer is to be deleted
  4362. +
  4363. + for cut: node to cut from
  4364. +
  4365. + for update: node where delimiting key is to be modified
  4366. +
  4367. + for modify: parent of modified node
  4368. +
  4369. + */
  4370. + carry_node *node;
  4371. + union {
  4372. + struct {
  4373. + /* (sub-)type of insertion/paste. Taken from
  4374. + cop_insert_pos_type. */
  4375. + __u8 type;
  4376. + /* various operation flags. Taken from
  4377. + cop_insert_flag. */
  4378. + __u8 flags;
  4379. + carry_insert_data *d;
  4380. + carry_node *child;
  4381. + znode *brother;
  4382. + } insert, paste, extent;
  4383. +
  4384. + struct {
  4385. + int is_cut;
  4386. + union {
  4387. + carry_kill_data *kill;
  4388. + carry_cut_data *cut;
  4389. + } u;
  4390. + } cut_or_kill;
  4391. +
  4392. + struct {
  4393. + carry_node *left;
  4394. + } update;
  4395. + struct {
  4396. + /* changed child */
  4397. + carry_node *child;
  4398. + /* bitmask of changes. See &cop_modify_flag */
  4399. + __u32 flag;
  4400. + } modify;
  4401. + struct {
  4402. + /* flags to deletion operation. Are taken from
  4403. + cop_delete_flag */
  4404. + __u32 flags;
  4405. + /* child to delete from parent. If this is
  4406. + NULL, delete op->node. */
  4407. + carry_node *child;
  4408. + } delete;
  4409. + struct {
  4410. + /* various operation flags. Taken from
  4411. + cop_insert_flag. */
  4412. + __u32 flags;
  4413. + flow_t *flow;
  4414. + coord_t *insert_point;
  4415. + reiser4_item_data *data;
  4416. + /* flow insertion is limited by number of new blocks
  4417. + added in that operation which do not get any data
  4418. + but part of flow. This limit is set by macro
  4419. + CARRY_FLOW_NEW_NODES_LIMIT. This field stores number
  4420. + of nodes added already during one carry_flow */
  4421. + int new_nodes;
  4422. + } insert_flow;
  4423. + } u;
  4424. +} carry_op;
  4425. +
  4426. +/* &carry_op_pool - preallocated pool of carry operations, and nodes */
  4427. +typedef struct carry_pool {
  4428. + carry_op op[CARRIES_POOL_SIZE];
  4429. + struct reiser4_pool op_pool;
  4430. + carry_node node[NODES_LOCKED_POOL_SIZE];
  4431. + struct reiser4_pool node_pool;
  4432. +} carry_pool;
  4433. +
  4434. +/* &carry_tree_level - carry process on given level
  4435. +
  4436. + Description of balancing process on the given level.
  4437. +
  4438. + No need for locking here, as carry_tree_level is essentially per
  4439. + thread thing (for now).
  4440. +
  4441. +*/
  4442. +struct carry_level {
  4443. + /* this level may be restarted */
  4444. + __u32 restartable:1;
  4445. + /* list of carry nodes on this level, ordered by key order */
  4446. + struct list_head nodes;
  4447. + struct list_head ops;
  4448. + /* pool where new objects are allocated from */
  4449. + carry_pool *pool;
  4450. + int ops_num;
  4451. + int nodes_num;
  4452. + /* new root created on this level, if any */
  4453. + znode *new_root;
  4454. + /* This is set by caller (insert_by_key(), rreiser4_esize_item(), etc.)
  4455. + when they want ->tracked to automagically wander to the node where
  4456. + insertion point moved after insert or paste.
  4457. + */
  4458. + carry_track_type track_type;
  4459. + /* lock handle supplied by user that we are tracking. See
  4460. + above. */
  4461. + lock_handle *tracked;
  4462. +};
  4463. +
  4464. +/* information carry passes to plugin methods that may add new operations to
  4465. + the @todo queue */
  4466. +struct carry_plugin_info {
  4467. + carry_level *doing;
  4468. + carry_level *todo;
  4469. +};
  4470. +
  4471. +int reiser4_carry(carry_level * doing, carry_level * done);
  4472. +
  4473. +carry_node *reiser4_add_carry(carry_level * level, pool_ordering order,
  4474. + carry_node * reference);
  4475. +carry_node *reiser4_add_carry_skip(carry_level * level, pool_ordering order,
  4476. + carry_node * reference);
  4477. +
  4478. +extern carry_node *insert_carry_node(carry_level * doing,
  4479. + carry_level * todo, const znode * node);
  4480. +
  4481. +extern carry_pool *init_carry_pool(int);
  4482. +extern void done_carry_pool(carry_pool * pool);
  4483. +
  4484. +extern void init_carry_level(carry_level * level, carry_pool * pool);
  4485. +
  4486. +extern carry_op *reiser4_post_carry(carry_level * level, carry_opcode op,
  4487. + znode * node, int apply_to_parent);
  4488. +extern carry_op *node_post_carry(carry_plugin_info * info, carry_opcode op,
  4489. + znode * node, int apply_to_parent_p);
  4490. +
  4491. +carry_node *add_new_znode(znode * brother, carry_node * reference,
  4492. + carry_level * doing, carry_level * todo);
  4493. +
  4494. +carry_node *find_carry_node(carry_level * level, const znode * node);
  4495. +
  4496. +extern znode *reiser4_carry_real(const carry_node * node);
  4497. +
  4498. +/* helper macros to iterate over carry queues */
  4499. +
  4500. +#define carry_node_next(node) \
  4501. + list_entry((node)->header.level_linkage.next, carry_node, \
  4502. + header.level_linkage)
  4503. +
  4504. +#define carry_node_prev(node) \
  4505. + list_entry((node)->header.level_linkage.prev, carry_node, \
  4506. + header.level_linkage)
  4507. +
  4508. +#define carry_node_front(level) \
  4509. + list_entry((level)->nodes.next, carry_node, header.level_linkage)
  4510. +
  4511. +#define carry_node_back(level) \
  4512. + list_entry((level)->nodes.prev, carry_node, header.level_linkage)
  4513. +
  4514. +#define carry_node_end(level, node) \
  4515. + (&(level)->nodes == &(node)->header.level_linkage)
  4516. +
  4517. +/* macro to iterate over all operations in a @level */
  4518. +#define for_all_ops(level /* carry level (of type carry_level *) */, \
  4519. + op /* pointer to carry operation, modified by loop (of \
  4520. + * type carry_op *) */, \
  4521. + tmp /* pointer to carry operation (of type carry_op *), \
  4522. + * used to make iterator stable in the face of \
  4523. + * deletions from the level */ ) \
  4524. +for (op = list_entry(level->ops.next, carry_op, header.level_linkage), \
  4525. + tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage); \
  4526. + &op->header.level_linkage != &level->ops; \
  4527. + op = tmp, \
  4528. + tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage))
  4529. +
  4530. +#if 0
  4531. +for (op = (carry_op *) pool_level_list_front(&level->ops), \
  4532. + tmp = (carry_op *) pool_level_list_next(&op->header) ; \
  4533. + !pool_level_list_end(&level->ops, &op->header) ; \
  4534. + op = tmp, tmp = (carry_op *) pool_level_list_next(&op->header))
  4535. +#endif
  4536. +
  4537. +/* macro to iterate over all nodes in a @level */ \
  4538. +#define for_all_nodes(level /* carry level (of type carry_level *) */, \
  4539. + node /* pointer to carry node, modified by loop (of \
  4540. + * type carry_node *) */, \
  4541. + tmp /* pointer to carry node (of type carry_node *), \
  4542. + * used to make iterator stable in the face of * \
  4543. + * deletions from the level */ ) \
  4544. +for (node = list_entry(level->nodes.next, carry_node, header.level_linkage), \
  4545. + tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage); \
  4546. + &node->header.level_linkage != &level->nodes; \
  4547. + node = tmp, \
  4548. + tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage))
  4549. +
  4550. +#if 0
  4551. +for (node = carry_node_front(level), \
  4552. + tmp = carry_node_next(node) ; !carry_node_end(level, node) ; \
  4553. + node = tmp, tmp = carry_node_next(node))
  4554. +#endif
  4555. +
  4556. +/* macro to iterate over all nodes in a @level in reverse order
  4557. +
  4558. + This is used, because nodes are unlocked in reversed order of locking */
  4559. +#define for_all_nodes_back(level /* carry level (of type carry_level *) */, \
  4560. + node /* pointer to carry node, modified by loop \
  4561. + * (of type carry_node *) */, \
  4562. + tmp /* pointer to carry node (of type carry_node \
  4563. + * *), used to make iterator stable in the \
  4564. + * face of deletions from the level */ ) \
  4565. +for (node = carry_node_back(level), \
  4566. + tmp = carry_node_prev(node) ; !carry_node_end(level, node) ; \
  4567. + node = tmp, tmp = carry_node_prev(node))
  4568. +
  4569. +/* __FS_REISER4_CARRY_H__ */
  4570. +#endif
  4571. +
  4572. +/* Make Linus happy.
  4573. + Local variables:
  4574. + c-indentation-style: "K&R"
  4575. + mode-name: "LC"
  4576. + c-basic-offset: 8
  4577. + tab-width: 8
  4578. + fill-column: 120
  4579. + scroll-step: 1
  4580. + End:
  4581. +*/
  4582. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/carry_ops.c linux-5.16.14/fs/reiser4/carry_ops.c
  4583. --- linux-5.16.14.orig/fs/reiser4/carry_ops.c 1970-01-01 01:00:00.000000000 +0100
  4584. +++ linux-5.16.14/fs/reiser4/carry_ops.c 2022-03-12 13:26:19.643892707 +0100
  4585. @@ -0,0 +1,2136 @@
  4586. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  4587. + reiser4/README */
  4588. +
  4589. +/* implementation of carry operations */
  4590. +
  4591. +#include "forward.h"
  4592. +#include "debug.h"
  4593. +#include "key.h"
  4594. +#include "coord.h"
  4595. +#include "plugin/item/item.h"
  4596. +#include "plugin/node/node.h"
  4597. +#include "jnode.h"
  4598. +#include "znode.h"
  4599. +#include "block_alloc.h"
  4600. +#include "tree_walk.h"
  4601. +#include "pool.h"
  4602. +#include "tree_mod.h"
  4603. +#include "carry.h"
  4604. +#include "carry_ops.h"
  4605. +#include "tree.h"
  4606. +#include "super.h"
  4607. +#include "reiser4.h"
  4608. +
  4609. +#include <linux/types.h>
  4610. +#include <linux/err.h>
  4611. +
  4612. +static int carry_shift_data(sideof side, coord_t *insert_coord, znode * node,
  4613. + carry_level * doing, carry_level * todo,
  4614. + unsigned int including_insert_coord_p);
  4615. +
  4616. +extern int lock_carry_node(carry_level * level, carry_node * node);
  4617. +extern int lock_carry_node_tail(carry_node * node);
  4618. +
  4619. +/* find left neighbor of a carry node
  4620. +
  4621. + Look for left neighbor of @node and add it to the @doing queue. See
  4622. + comments in the body.
  4623. +
  4624. +*/
  4625. +static carry_node *find_left_neighbor(carry_op * op /* node to find left
  4626. + * neighbor of */ ,
  4627. + carry_level * doing/* level to scan */)
  4628. +{
  4629. + int result;
  4630. + carry_node *node;
  4631. + carry_node *left;
  4632. + int flags;
  4633. + reiser4_tree *tree;
  4634. +
  4635. + node = op->node;
  4636. +
  4637. + tree = current_tree;
  4638. + read_lock_tree(tree);
  4639. + /* first, check whether left neighbor is already in a @doing queue */
  4640. + if (reiser4_carry_real(node)->left != NULL) {
  4641. + /* NOTE: there is locking subtlety here. Look into
  4642. + * find_right_neighbor() for more info */
  4643. + if (find_carry_node(doing,
  4644. + reiser4_carry_real(node)->left) != NULL) {
  4645. + read_unlock_tree(tree);
  4646. + left = node;
  4647. + do {
  4648. + left = list_entry(left->header.level_linkage.prev,
  4649. + carry_node, header.level_linkage);
  4650. + assert("nikita-3408", !carry_node_end(doing,
  4651. + left));
  4652. + } while (reiser4_carry_real(left) ==
  4653. + reiser4_carry_real(node));
  4654. + return left;
  4655. + }
  4656. + }
  4657. + read_unlock_tree(tree);
  4658. +
  4659. + left = reiser4_add_carry_skip(doing, POOLO_BEFORE, node);
  4660. + if (IS_ERR(left))
  4661. + return left;
  4662. +
  4663. + left->node = node->node;
  4664. + left->free = 1;
  4665. +
  4666. + flags = GN_TRY_LOCK;
  4667. + if (!(op->u.insert.flags & COPI_LOAD_LEFT))
  4668. + flags |= GN_NO_ALLOC;
  4669. +
  4670. + /* then, feeling lucky, peek left neighbor in the cache. */
  4671. + result = reiser4_get_left_neighbor(&left->lock_handle,
  4672. + reiser4_carry_real(node),
  4673. + ZNODE_WRITE_LOCK, flags);
  4674. + if (result == 0) {
  4675. + /* ok, node found and locked. */
  4676. + result = lock_carry_node_tail(left);
  4677. + if (result != 0)
  4678. + left = ERR_PTR(result);
  4679. + } else if (result == -E_NO_NEIGHBOR || result == -ENOENT) {
  4680. + /* node is leftmost node in a tree, or neighbor wasn't in
  4681. + cache, or there is an extent on the left. */
  4682. + reiser4_pool_free(&doing->pool->node_pool, &left->header);
  4683. + left = NULL;
  4684. + } else if (doing->restartable) {
  4685. + /* if left neighbor is locked, and level is restartable, add
  4686. + new node to @doing and restart. */
  4687. + assert("nikita-913", node->parent != 0);
  4688. + assert("nikita-914", node->node != NULL);
  4689. + left->left = 1;
  4690. + left->free = 0;
  4691. + left = ERR_PTR(-E_REPEAT);
  4692. + } else {
  4693. + /* left neighbor is locked, level cannot be restarted. Just
  4694. + ignore left neighbor. */
  4695. + reiser4_pool_free(&doing->pool->node_pool, &left->header);
  4696. + left = NULL;
  4697. + }
  4698. + return left;
  4699. +}
  4700. +
  4701. +/* find right neighbor of a carry node
  4702. +
  4703. + Look for right neighbor of @node and add it to the @doing queue. See
  4704. + comments in the body.
  4705. +
  4706. +*/
  4707. +static carry_node *find_right_neighbor(carry_op * op /* node to find right
  4708. + * neighbor of */ ,
  4709. + carry_level * doing/* level to scan */)
  4710. +{
  4711. + int result;
  4712. + carry_node *node;
  4713. + carry_node *right;
  4714. + lock_handle lh;
  4715. + int flags;
  4716. + reiser4_tree *tree;
  4717. +
  4718. + init_lh(&lh);
  4719. +
  4720. + node = op->node;
  4721. +
  4722. + tree = current_tree;
  4723. + read_lock_tree(tree);
  4724. + /* first, check whether right neighbor is already in a @doing queue */
  4725. + if (reiser4_carry_real(node)->right != NULL) {
  4726. + /*
  4727. + * Tree lock is taken here anyway, because, even if _outcome_
  4728. + * of (find_carry_node() != NULL) doesn't depends on
  4729. + * concurrent updates to ->right, find_carry_node() cannot
  4730. + * work with second argument NULL. Hence, following comment is
  4731. + * of historic importance only.
  4732. + *
  4733. + * Subtle:
  4734. + *
  4735. + * Q: why don't we need tree lock here, looking for the right
  4736. + * neighbor?
  4737. + *
  4738. + * A: even if value of node->real_node->right were changed
  4739. + * during find_carry_node() execution, outcome of execution
  4740. + * wouldn't change, because (in short) other thread cannot add
  4741. + * elements to the @doing, and if node->real_node->right
  4742. + * already was in @doing, value of node->real_node->right
  4743. + * couldn't change, because node cannot be inserted between
  4744. + * locked neighbors.
  4745. + */
  4746. + if (find_carry_node(doing,
  4747. + reiser4_carry_real(node)->right) != NULL) {
  4748. + read_unlock_tree(tree);
  4749. + /*
  4750. + * What we are doing here (this is also applicable to
  4751. + * the find_left_neighbor()).
  4752. + *
  4753. + * tree_walk.c code requires that insertion of a
  4754. + * pointer to a child, modification of parent pointer
  4755. + * in the child, and insertion of the child into
  4756. + * sibling list are atomic (see
  4757. + * plugin/item/internal.c:create_hook_internal()).
  4758. + *
  4759. + * carry allocates new node long before pointer to it
  4760. + * is inserted into parent and, actually, long before
  4761. + * parent is even known. Such allocated-but-orphaned
  4762. + * nodes are only trackable through carry level lists.
  4763. + *
  4764. + * Situation that is handled here is following: @node
  4765. + * has valid ->right pointer, but there is
  4766. + * allocated-but-orphaned node in the carry queue that
  4767. + * is logically between @node and @node->right. Here
  4768. + * we are searching for it. Critical point is that
  4769. + * this is only possible if @node->right is also in
  4770. + * the carry queue (this is checked above), because
  4771. + * this is the only way new orphaned node could be
  4772. + * inserted between them (before inserting new node,
  4773. + * make_space() first tries to shift to the right, so,
  4774. + * right neighbor will be locked and queued).
  4775. + *
  4776. + */
  4777. + right = node;
  4778. + do {
  4779. + right = list_entry(right->header.level_linkage.next,
  4780. + carry_node, header.level_linkage);
  4781. + assert("nikita-3408", !carry_node_end(doing,
  4782. + right));
  4783. + } while (reiser4_carry_real(right) ==
  4784. + reiser4_carry_real(node));
  4785. + return right;
  4786. + }
  4787. + }
  4788. + read_unlock_tree(tree);
  4789. +
  4790. + flags = GN_CAN_USE_UPPER_LEVELS;
  4791. + if (!(op->u.insert.flags & COPI_LOAD_RIGHT))
  4792. + flags = GN_NO_ALLOC;
  4793. +
  4794. + /* then, try to lock right neighbor */
  4795. + init_lh(&lh);
  4796. + result = reiser4_get_right_neighbor(&lh,
  4797. + reiser4_carry_real(node),
  4798. + ZNODE_WRITE_LOCK, flags);
  4799. + if (result == 0) {
  4800. + /* ok, node found and locked. */
  4801. + right = reiser4_add_carry_skip(doing, POOLO_AFTER, node);
  4802. + if (!IS_ERR(right)) {
  4803. + right->node = lh.node;
  4804. + move_lh(&right->lock_handle, &lh);
  4805. + right->free = 1;
  4806. + result = lock_carry_node_tail(right);
  4807. + if (result != 0)
  4808. + right = ERR_PTR(result);
  4809. + }
  4810. + } else if ((result == -E_NO_NEIGHBOR) || (result == -ENOENT)) {
  4811. + /* node is rightmost node in a tree, or neighbor wasn't in
  4812. + cache, or there is an extent on the right. */
  4813. + right = NULL;
  4814. + } else
  4815. + right = ERR_PTR(result);
  4816. + done_lh(&lh);
  4817. + return right;
  4818. +}
  4819. +
  4820. +/* how much free space in a @node is needed for @op
  4821. +
  4822. + How much space in @node is required for completion of @op, where @op is
  4823. + insert or paste operation.
  4824. +*/
  4825. +static unsigned int space_needed_for_op(znode * node /* znode data are
  4826. + * inserted or
  4827. + * pasted in */ ,
  4828. + carry_op * op /* carry
  4829. + operation */ )
  4830. +{
  4831. + assert("nikita-919", op != NULL);
  4832. +
  4833. + switch (op->op) {
  4834. + default:
  4835. + impossible("nikita-1701", "Wrong opcode");
  4836. + case COP_INSERT:
  4837. + return space_needed(node, NULL, op->u.insert.d->data, 1);
  4838. + case COP_PASTE:
  4839. + return space_needed(node, op->u.insert.d->coord,
  4840. + op->u.insert.d->data, 0);
  4841. + }
  4842. +}
  4843. +
  4844. +/* how much space in @node is required to insert or paste @data at
  4845. + @coord. */
  4846. +unsigned int space_needed(const znode * node /* node data are inserted or
  4847. + * pasted in */ ,
  4848. + const coord_t *coord /* coord where data are
  4849. + * inserted or pasted
  4850. + * at */ ,
  4851. + const reiser4_item_data * data /* data to insert or
  4852. + * paste */ ,
  4853. + int insertion/* non-0 is inserting, 0---paste */)
  4854. +{
  4855. + int result;
  4856. + item_plugin *iplug;
  4857. +
  4858. + assert("nikita-917", node != NULL);
  4859. + assert("nikita-918", node_plugin_by_node(node) != NULL);
  4860. + assert("vs-230", !insertion || (coord == NULL));
  4861. +
  4862. + result = 0;
  4863. + iplug = data->iplug;
  4864. + if (iplug->b.estimate != NULL) {
  4865. + /* ask item plugin how much space is needed to insert this
  4866. + item */
  4867. + result += iplug->b.estimate(insertion ? NULL : coord, data);
  4868. + } else {
  4869. + /* reasonable default */
  4870. + result += data->length;
  4871. + }
  4872. + if (insertion) {
  4873. + node_plugin *nplug;
  4874. +
  4875. + nplug = node->nplug;
  4876. + /* and add node overhead */
  4877. + if (nplug->item_overhead != NULL)
  4878. + result += nplug->item_overhead(node, NULL);
  4879. + }
  4880. + return result;
  4881. +}
  4882. +
  4883. +/* find &coord in parent where pointer to new child is to be stored. */
  4884. +static int find_new_child_coord(carry_op * op /* COP_INSERT carry operation to
  4885. + * insert pointer to new
  4886. + * child */ )
  4887. +{
  4888. + int result;
  4889. + znode *node;
  4890. + znode *child;
  4891. +
  4892. + assert("nikita-941", op != NULL);
  4893. + assert("nikita-942", op->op == COP_INSERT);
  4894. +
  4895. + node = reiser4_carry_real(op->node);
  4896. + assert("nikita-943", node != NULL);
  4897. + assert("nikita-944", node_plugin_by_node(node) != NULL);
  4898. +
  4899. + child = reiser4_carry_real(op->u.insert.child);
  4900. + result =
  4901. + find_new_child_ptr(node, child, op->u.insert.brother,
  4902. + op->u.insert.d->coord);
  4903. +
  4904. + build_child_ptr_data(child, op->u.insert.d->data);
  4905. + return result;
  4906. +}
  4907. +
  4908. +/* additional amount of free space in @node required to complete @op */
  4909. +static int free_space_shortage(znode * node /* node to check */ ,
  4910. + carry_op * op/* operation being performed */)
  4911. +{
  4912. + assert("nikita-1061", node != NULL);
  4913. + assert("nikita-1062", op != NULL);
  4914. +
  4915. + switch (op->op) {
  4916. + default:
  4917. + impossible("nikita-1702", "Wrong opcode");
  4918. + case COP_INSERT:
  4919. + case COP_PASTE:
  4920. + return space_needed_for_op(node, op) - znode_free_space(node);
  4921. + case COP_EXTENT:
  4922. + /* when inserting extent shift data around until insertion
  4923. + point is utmost in the node. */
  4924. + if (coord_wrt(op->u.insert.d->coord) == COORD_INSIDE)
  4925. + return +1;
  4926. + else
  4927. + return -1;
  4928. + }
  4929. +}
  4930. +
  4931. +/* helper function: update node pointer in operation after insertion
  4932. + point was probably shifted into @target. */
  4933. +static znode *sync_op(carry_op * op, carry_node * target)
  4934. +{
  4935. + znode *insertion_node;
  4936. +
  4937. + /* reget node from coord: shift might move insertion coord to
  4938. + the neighbor */
  4939. + insertion_node = op->u.insert.d->coord->node;
  4940. + /* if insertion point was actually moved into new node,
  4941. + update carry node pointer in operation. */
  4942. + if (insertion_node != reiser4_carry_real(op->node)) {
  4943. + op->node = target;
  4944. + assert("nikita-2540",
  4945. + reiser4_carry_real(target) == insertion_node);
  4946. + }
  4947. + assert("nikita-2541",
  4948. + reiser4_carry_real(op->node) == op->u.insert.d->coord->node);
  4949. + return insertion_node;
  4950. +}
  4951. +
  4952. +/*
  4953. + * complete make_space() call: update tracked lock handle if necessary. See
  4954. + * comments for fs/reiser4/carry.h:carry_track_type
  4955. + */
  4956. +static int
  4957. +make_space_tail(carry_op * op, carry_level * doing, znode * orig_node)
  4958. +{
  4959. + int result;
  4960. + carry_track_type tracking;
  4961. + znode *node;
  4962. +
  4963. + tracking = doing->track_type;
  4964. + node = op->u.insert.d->coord->node;
  4965. +
  4966. + if (tracking == CARRY_TRACK_NODE ||
  4967. + (tracking == CARRY_TRACK_CHANGE && node != orig_node)) {
  4968. + /* inserting or pasting into node different from
  4969. + original. Update lock handle supplied by caller. */
  4970. + assert("nikita-1417", doing->tracked != NULL);
  4971. + done_lh(doing->tracked);
  4972. + init_lh(doing->tracked);
  4973. + result = longterm_lock_znode(doing->tracked, node,
  4974. + ZNODE_WRITE_LOCK,
  4975. + ZNODE_LOCK_HIPRI);
  4976. + } else
  4977. + result = 0;
  4978. + return result;
  4979. +}
  4980. +
  4981. +/* This is insertion policy function. It shifts data to the left and right
  4982. + neighbors of insertion coord and allocates new nodes until there is enough
  4983. + free space to complete @op.
  4984. +
  4985. + See comments in the body.
  4986. +
  4987. + Assumes that the node format favors insertions at the right end of the node
  4988. + as node40 does.
  4989. +
  4990. + See carry_flow() on detail about flow insertion
  4991. +*/
  4992. +static int make_space(carry_op * op /* carry operation, insert or paste */ ,
  4993. + carry_level * doing /* current carry queue */ ,
  4994. + carry_level * todo/* carry queue on the parent level */)
  4995. +{
  4996. + znode *node;
  4997. + int result;
  4998. + int not_enough_space;
  4999. + int blk_alloc;
  5000. + znode *orig_node;
  5001. + __u32 flags;
  5002. +
  5003. + coord_t *coord;
  5004. +
  5005. + assert("nikita-890", op != NULL);
  5006. + assert("nikita-891", todo != NULL);
  5007. + assert("nikita-892",
  5008. + op->op == COP_INSERT ||
  5009. + op->op == COP_PASTE || op->op == COP_EXTENT);
  5010. + assert("nikita-1607",
  5011. + reiser4_carry_real(op->node) == op->u.insert.d->coord->node);
  5012. +
  5013. + flags = op->u.insert.flags;
  5014. +
  5015. + /* NOTE check that new node can only be allocated after checking left
  5016. + * and right neighbors. This is necessary for proper work of
  5017. + * find_{left,right}_neighbor(). */
  5018. + assert("nikita-3410", ergo(flags & COPI_DONT_ALLOCATE,
  5019. + flags & COPI_DONT_SHIFT_LEFT));
  5020. + assert("nikita-3411", ergo(flags & COPI_DONT_ALLOCATE,
  5021. + flags & COPI_DONT_SHIFT_RIGHT));
  5022. +
  5023. + coord = op->u.insert.d->coord;
  5024. + orig_node = node = coord->node;
  5025. +
  5026. + assert("nikita-908", node != NULL);
  5027. + assert("nikita-909", node_plugin_by_node(node) != NULL);
  5028. +
  5029. + result = 0;
  5030. + /* If there is not enough space in a node, try to shift something to
  5031. + the left neighbor. This is a bit tricky, as locking to the left is
  5032. + low priority. This is handled by restart logic in carry().
  5033. + */
  5034. + not_enough_space = free_space_shortage(node, op);
  5035. + if (not_enough_space <= 0)
  5036. + /* it is possible that carry was called when there actually
  5037. + was enough space in the node. For example, when inserting
  5038. + leftmost item so that delimiting keys have to be updated.
  5039. + */
  5040. + return make_space_tail(op, doing, orig_node);
  5041. + if (!(flags & COPI_DONT_SHIFT_LEFT)) {
  5042. + carry_node *left;
  5043. + /* make note in statistics of an attempt to move
  5044. + something into the left neighbor */
  5045. + left = find_left_neighbor(op, doing);
  5046. + if (unlikely(IS_ERR(left))) {
  5047. + if (PTR_ERR(left) == -E_REPEAT)
  5048. + return -E_REPEAT;
  5049. + else {
  5050. + /* some error other than restart request
  5051. + occurred. This shouldn't happen. Issue a
  5052. + warning and continue as if left neighbor
  5053. + weren't existing.
  5054. + */
  5055. + warning("nikita-924",
  5056. + "Error accessing left neighbor: %li",
  5057. + PTR_ERR(left));
  5058. + }
  5059. + } else if (left != NULL) {
  5060. +
  5061. + /* shift everything possible on the left of and
  5062. + including insertion coord into the left neighbor */
  5063. + result = carry_shift_data(LEFT_SIDE, coord,
  5064. + reiser4_carry_real(left),
  5065. + doing, todo,
  5066. + flags & COPI_GO_LEFT);
  5067. +
  5068. + /* reget node from coord: shift_left() might move
  5069. + insertion coord to the left neighbor */
  5070. + node = sync_op(op, left);
  5071. +
  5072. + not_enough_space = free_space_shortage(node, op);
  5073. + /* There is not enough free space in @node, but
  5074. + may be, there is enough free space in
  5075. + @left. Various balancing decisions are valid here.
  5076. + The same for the shifiting to the right.
  5077. + */
  5078. + }
  5079. + }
  5080. + /* If there still is not enough space, shift to the right */
  5081. + if (not_enough_space > 0 && !(flags & COPI_DONT_SHIFT_RIGHT)) {
  5082. + carry_node *right;
  5083. +
  5084. + right = find_right_neighbor(op, doing);
  5085. + if (IS_ERR(right)) {
  5086. + warning("nikita-1065",
  5087. + "Error accessing right neighbor: %li",
  5088. + PTR_ERR(right));
  5089. + } else if (right != NULL) {
  5090. + /* node containing insertion point, and its right
  5091. + neighbor node are write locked by now.
  5092. +
  5093. + shift everything possible on the right of but
  5094. + excluding insertion coord into the right neighbor
  5095. + */
  5096. + result = carry_shift_data(RIGHT_SIDE, coord,
  5097. + reiser4_carry_real(right),
  5098. + doing, todo,
  5099. + flags & COPI_GO_RIGHT);
  5100. + /* reget node from coord: shift_right() might move
  5101. + insertion coord to the right neighbor */
  5102. + node = sync_op(op, right);
  5103. + not_enough_space = free_space_shortage(node, op);
  5104. + }
  5105. + }
  5106. + /* If there is still not enough space, allocate new node(s).
  5107. +
  5108. + We try to allocate new blocks if COPI_DONT_ALLOCATE is not set in
  5109. + the carry operation flags (currently this is needed during flush
  5110. + only).
  5111. + */
  5112. + for (blk_alloc = 0;
  5113. + not_enough_space > 0 && result == 0 && blk_alloc < 2 &&
  5114. + !(flags & COPI_DONT_ALLOCATE); ++blk_alloc) {
  5115. + carry_node *fresh; /* new node we are allocating */
  5116. + coord_t coord_shadow; /* remembered insertion point before
  5117. + * shifting data into new node */
  5118. + carry_node *node_shadow; /* remembered insertion node
  5119. + * before shifting */
  5120. + unsigned int gointo; /* whether insertion point should move
  5121. + * into newly allocated node */
  5122. +
  5123. + /* allocate new node on the right of @node. Znode and disk
  5124. + fake block number for new node are allocated.
  5125. +
  5126. + add_new_znode() posts carry operation COP_INSERT with
  5127. + COPT_CHILD option to the parent level to add
  5128. + pointer to newly created node to its parent.
  5129. +
  5130. + Subtle point: if several new nodes are required to complete
  5131. + insertion operation at this level, they will be inserted
  5132. + into their parents in the order of creation, which means
  5133. + that @node will be valid "cookie" at the time of insertion.
  5134. +
  5135. + */
  5136. + fresh = add_new_znode(node, op->node, doing, todo);
  5137. + if (IS_ERR(fresh))
  5138. + return PTR_ERR(fresh);
  5139. +
  5140. + /* Try to shift into new node. */
  5141. + result = lock_carry_node(doing, fresh);
  5142. + zput(reiser4_carry_real(fresh));
  5143. + if (result != 0) {
  5144. + warning("nikita-947",
  5145. + "Cannot lock new node: %i", result);
  5146. + return result;
  5147. + }
  5148. +
  5149. + /* both nodes are write locked by now.
  5150. +
  5151. + shift everything possible on the right of and
  5152. + including insertion coord into the right neighbor.
  5153. + */
  5154. + coord_dup(&coord_shadow, op->u.insert.d->coord);
  5155. + node_shadow = op->node;
  5156. + /* move insertion point into newly created node if:
  5157. +
  5158. + . insertion point is rightmost in the source node, or
  5159. + . this is not the first node we are allocating in a row.
  5160. + */
  5161. + gointo =
  5162. + (blk_alloc > 0) ||
  5163. + coord_is_after_rightmost(op->u.insert.d->coord);
  5164. +
  5165. + if (gointo &&
  5166. + op->op == COP_PASTE &&
  5167. + coord_is_existing_item(op->u.insert.d->coord) &&
  5168. + is_solid_item((item_plugin_by_coord(op->u.insert.d->coord)))) {
  5169. + /* paste into solid (atomic) item, which can contain
  5170. + only one unit, so we need to shift it right, where
  5171. + insertion point supposed to be */
  5172. +
  5173. + assert("edward-1444", op->u.insert.d->data->iplug ==
  5174. + item_plugin_by_id(STATIC_STAT_DATA_ID));
  5175. + assert("edward-1445",
  5176. + op->u.insert.d->data->length >
  5177. + node_plugin_by_node(coord->node)->free_space
  5178. + (coord->node));
  5179. +
  5180. + op->u.insert.d->coord->between = BEFORE_UNIT;
  5181. + }
  5182. +
  5183. + result = carry_shift_data(RIGHT_SIDE, coord,
  5184. + reiser4_carry_real(fresh),
  5185. + doing, todo, gointo);
  5186. + /* if insertion point was actually moved into new node,
  5187. + update carry node pointer in operation. */
  5188. + node = sync_op(op, fresh);
  5189. + not_enough_space = free_space_shortage(node, op);
  5190. + if ((not_enough_space > 0) && (node != coord_shadow.node)) {
  5191. + /* there is not enough free in new node. Shift
  5192. + insertion point back to the @shadow_node so that
  5193. + next new node would be inserted between
  5194. + @shadow_node and @fresh.
  5195. + */
  5196. + coord_normalize(&coord_shadow);
  5197. + coord_dup(coord, &coord_shadow);
  5198. + node = coord->node;
  5199. + op->node = node_shadow;
  5200. + if (1 || (flags & COPI_STEP_BACK)) {
  5201. + /* still not enough space?! Maybe there is
  5202. + enough space in the source node (i.e., node
  5203. + data are moved from) now.
  5204. + */
  5205. + not_enough_space =
  5206. + free_space_shortage(node, op);
  5207. + }
  5208. + }
  5209. + }
  5210. + if (not_enough_space > 0) {
  5211. + if (!(flags & COPI_DONT_ALLOCATE))
  5212. + warning("nikita-948", "Cannot insert new item");
  5213. + result = -E_NODE_FULL;
  5214. + }
  5215. + assert("nikita-1622", ergo(result == 0,
  5216. + reiser4_carry_real(op->node) == coord->node));
  5217. + assert("nikita-2616", coord == op->u.insert.d->coord);
  5218. + if (result == 0)
  5219. + result = make_space_tail(op, doing, orig_node);
  5220. + return result;
  5221. +}
  5222. +
  5223. +/* insert_paste_common() - common part of insert and paste operations
  5224. +
  5225. + This function performs common part of COP_INSERT and COP_PASTE.
  5226. +
  5227. + There are two ways in which insertion/paste can be requested:
  5228. +
  5229. + . by directly supplying reiser4_item_data. In this case, op ->
  5230. + u.insert.type is set to COPT_ITEM_DATA.
  5231. +
  5232. + . by supplying child pointer to which is to inserted into parent. In this
  5233. + case op -> u.insert.type == COPT_CHILD.
  5234. +
  5235. + . by supplying key of new item/unit. This is currently only used during
  5236. + extent insertion
  5237. +
  5238. + This is required, because when new node is allocated we don't know at what
  5239. + position pointer to it is to be stored in the parent. Actually, we don't
  5240. + even know what its parent will be, because parent can be re-balanced
  5241. + concurrently and new node re-parented, and because parent can be full and
  5242. + pointer to the new node will go into some other node.
  5243. +
  5244. + insert_paste_common() resolves pointer to child node into position in the
  5245. + parent by calling find_new_child_coord(), that fills
  5246. + reiser4_item_data. After this, insertion/paste proceeds uniformly.
  5247. +
  5248. + Another complication is with finding free space during pasting. It may
  5249. + happen that while shifting items to the neighbors and newly allocated
  5250. + nodes, insertion coord can no longer be in the item we wanted to paste
  5251. + into. At this point, paste becomes (morphs) into insert. Moreover free
  5252. + space analysis has to be repeated, because amount of space required for
  5253. + insertion is different from that of paste (item header overhead, etc).
  5254. +
  5255. + This function "unifies" different insertion modes (by resolving child
  5256. + pointer or key into insertion coord), and then calls make_space() to free
  5257. + enough space in the node by shifting data to the left and right and by
  5258. + allocating new nodes if necessary. Carry operation knows amount of space
  5259. + required for its completion. After enough free space is obtained, caller of
  5260. + this function (carry_{insert,paste,etc.}) performs actual insertion/paste
  5261. + by calling item plugin method.
  5262. +
  5263. +*/
  5264. +static int insert_paste_common(carry_op * op /* carry operation being
  5265. + * performed */ ,
  5266. + carry_level * doing /* current carry level */ ,
  5267. + carry_level * todo /* next carry level */ ,
  5268. + carry_insert_data * cdata /* pointer to
  5269. + * cdata */ ,
  5270. + coord_t *coord /* insertion/paste coord */ ,
  5271. + reiser4_item_data * data /* data to be
  5272. + * inserted/pasted */ )
  5273. +{
  5274. + assert("nikita-981", op != NULL);
  5275. + assert("nikita-980", todo != NULL);
  5276. + assert("nikita-979", (op->op == COP_INSERT) || (op->op == COP_PASTE)
  5277. + || (op->op == COP_EXTENT));
  5278. +
  5279. + if (op->u.insert.type == COPT_PASTE_RESTARTED) {
  5280. + /* nothing to do. Fall through to make_space(). */
  5281. + ;
  5282. + } else if (op->u.insert.type == COPT_KEY) {
  5283. + node_search_result intra_node;
  5284. + znode *node;
  5285. + /* Problem with doing batching at the lowest level, is that
  5286. + operations here are given by coords where modification is
  5287. + to be performed, and one modification can invalidate coords
  5288. + of all following operations.
  5289. +
  5290. + So, we are implementing yet another type for operation that
  5291. + will use (the only) "locator" stable across shifting of
  5292. + data between nodes, etc.: key (COPT_KEY).
  5293. +
  5294. + This clause resolves key to the coord in the node.
  5295. +
  5296. + But node can change also. Probably some pieces have to be
  5297. + added to the lock_carry_node(), to lock node by its key.
  5298. +
  5299. + */
  5300. + /* NOTE-NIKITA Lookup bias is fixed to FIND_EXACT. Complain
  5301. + if you need something else. */
  5302. + op->u.insert.d->coord = coord;
  5303. + node = reiser4_carry_real(op->node);
  5304. + intra_node = node_plugin_by_node(node)->lookup
  5305. + (node, op->u.insert.d->key, FIND_EXACT,
  5306. + op->u.insert.d->coord);
  5307. + if ((intra_node != NS_FOUND) && (intra_node != NS_NOT_FOUND)) {
  5308. + warning("nikita-1715", "Intra node lookup failure: %i",
  5309. + intra_node);
  5310. + return intra_node;
  5311. + }
  5312. + } else if (op->u.insert.type == COPT_CHILD) {
  5313. + /* if we are asked to insert pointer to the child into
  5314. + internal node, first convert pointer to the child into
  5315. + coord within parent node.
  5316. + */
  5317. + znode *child;
  5318. + int result;
  5319. +
  5320. + op->u.insert.d = cdata;
  5321. + op->u.insert.d->coord = coord;
  5322. + op->u.insert.d->data = data;
  5323. + op->u.insert.d->coord->node = reiser4_carry_real(op->node);
  5324. + result = find_new_child_coord(op);
  5325. + child = reiser4_carry_real(op->u.insert.child);
  5326. + if (result != NS_NOT_FOUND) {
  5327. + warning("nikita-993",
  5328. + "Cannot find a place for child pointer: %i",
  5329. + result);
  5330. + return result;
  5331. + }
  5332. + /* This only happens when we did multiple insertions at
  5333. + the previous level, trying to insert single item and
  5334. + it so happened, that insertion of pointers to all new
  5335. + nodes before this one already caused parent node to
  5336. + split (may be several times).
  5337. +
  5338. + I am going to come up with better solution.
  5339. +
  5340. + You are not expected to understand this.
  5341. + -- v6root/usr/sys/ken/slp.c
  5342. +
  5343. + Basically, what happens here is the following: carry came
  5344. + to the parent level and is about to insert internal item
  5345. + pointing to the child node that it just inserted in the
  5346. + level below. Position where internal item is to be inserted
  5347. + was found by find_new_child_coord() above, but node of the
  5348. + current carry operation (that is, parent node of child
  5349. + inserted on the previous level), was determined earlier in
  5350. + the lock_carry_level/lock_carry_node. It could so happen
  5351. + that other carry operations already performed on the parent
  5352. + level already split parent node, so that insertion point
  5353. + moved into another node. Handle this by creating new carry
  5354. + node for insertion point if necessary.
  5355. + */
  5356. + if (reiser4_carry_real(op->node) !=
  5357. + op->u.insert.d->coord->node) {
  5358. + pool_ordering direction;
  5359. + znode *z1;
  5360. + znode *z2;
  5361. + reiser4_key k1;
  5362. + reiser4_key k2;
  5363. +
  5364. + /*
  5365. + * determine in what direction insertion point
  5366. + * moved. Do this by comparing delimiting keys.
  5367. + */
  5368. + z1 = op->u.insert.d->coord->node;
  5369. + z2 = reiser4_carry_real(op->node);
  5370. + if (keyle(leftmost_key_in_node(z1, &k1),
  5371. + leftmost_key_in_node(z2, &k2)))
  5372. + /* insertion point moved to the left */
  5373. + direction = POOLO_BEFORE;
  5374. + else
  5375. + /* insertion point moved to the right */
  5376. + direction = POOLO_AFTER;
  5377. +
  5378. + op->node = reiser4_add_carry_skip(doing,
  5379. + direction, op->node);
  5380. + if (IS_ERR(op->node))
  5381. + return PTR_ERR(op->node);
  5382. + op->node->node = op->u.insert.d->coord->node;
  5383. + op->node->free = 1;
  5384. + result = lock_carry_node(doing, op->node);
  5385. + if (result != 0)
  5386. + return result;
  5387. + }
  5388. +
  5389. + /*
  5390. + * set up key of an item being inserted: we are inserting
  5391. + * internal item and its key is (by the very definition of
  5392. + * search tree) is leftmost key in the child node.
  5393. + */
  5394. + write_lock_dk(znode_get_tree(child));
  5395. + op->u.insert.d->key = leftmost_key_in_node(child,
  5396. + znode_get_ld_key(child));
  5397. + write_unlock_dk(znode_get_tree(child));
  5398. + op->u.insert.d->data->arg = op->u.insert.brother;
  5399. + } else {
  5400. + assert("vs-243", op->u.insert.d->coord != NULL);
  5401. + op->u.insert.d->coord->node = reiser4_carry_real(op->node);
  5402. + }
  5403. +
  5404. + /* find free space. */
  5405. + return make_space(op, doing, todo);
  5406. +}
  5407. +
  5408. +/* handle carry COP_INSERT operation.
  5409. +
  5410. + Insert new item into node. New item can be given in one of two ways:
  5411. +
  5412. + - by passing &tree_coord and &reiser4_item_data as part of @op. This is
  5413. + only applicable at the leaf/twig level.
  5414. +
  5415. + - by passing a child node pointer to which is to be inserted by this
  5416. + operation.
  5417. +
  5418. +*/
  5419. +static int carry_insert(carry_op * op /* operation to perform */ ,
  5420. + carry_level * doing /* queue of operations @op
  5421. + * is part of */ ,
  5422. + carry_level * todo /* queue where new operations
  5423. + * are accumulated */ )
  5424. +{
  5425. + znode *node;
  5426. + carry_insert_data cdata;
  5427. + coord_t coord;
  5428. + reiser4_item_data data;
  5429. + carry_plugin_info info;
  5430. + int result;
  5431. +
  5432. + assert("nikita-1036", op != NULL);
  5433. + assert("nikita-1037", todo != NULL);
  5434. + assert("nikita-1038", op->op == COP_INSERT);
  5435. +
  5436. + coord_init_zero(&coord);
  5437. +
  5438. + /* perform common functionality of insert and paste. */
  5439. + result = insert_paste_common(op, doing, todo, &cdata, &coord, &data);
  5440. + if (result != 0)
  5441. + return result;
  5442. +
  5443. + node = op->u.insert.d->coord->node;
  5444. + assert("nikita-1039", node != NULL);
  5445. + assert("nikita-1040", node_plugin_by_node(node) != NULL);
  5446. +
  5447. + assert("nikita-949",
  5448. + space_needed_for_op(node, op) <= znode_free_space(node));
  5449. +
  5450. + /* ask node layout to create new item. */
  5451. + info.doing = doing;
  5452. + info.todo = todo;
  5453. + result = node_plugin_by_node(node)->create_item
  5454. + (op->u.insert.d->coord, op->u.insert.d->key, op->u.insert.d->data,
  5455. + &info);
  5456. + doing->restartable = 0;
  5457. + znode_make_dirty(node);
  5458. +
  5459. + return result;
  5460. +}
  5461. +
  5462. +/*
  5463. + * Flow insertion code. COP_INSERT_FLOW is special tree operation that is
  5464. + * supplied with a "flow" (that is, a stream of data) and inserts it into tree
  5465. + * by slicing into multiple items.
  5466. + */
  5467. +
  5468. +#define flow_insert_point(op) ((op)->u.insert_flow.insert_point)
  5469. +#define flow_insert_flow(op) ((op)->u.insert_flow.flow)
  5470. +#define flow_insert_data(op) ((op)->u.insert_flow.data)
  5471. +
  5472. +static size_t item_data_overhead(carry_op * op)
  5473. +{
  5474. + if (flow_insert_data(op)->iplug->b.estimate == NULL)
  5475. + return 0;
  5476. + return (flow_insert_data(op)->iplug->b.
  5477. + estimate(NULL /* estimate insertion */ , flow_insert_data(op)) -
  5478. + flow_insert_data(op)->length);
  5479. +}
  5480. +
  5481. +/* FIXME-VS: this is called several times during one make_flow_for_insertion
  5482. + and it will always return the same result. Some optimization could be made
  5483. + by calculating this value once at the beginning and passing it around. That
  5484. + would reduce some flexibility in future changes
  5485. +*/
  5486. +static int can_paste(coord_t *, const reiser4_key *, const reiser4_item_data *);
  5487. +static size_t flow_insertion_overhead(carry_op * op)
  5488. +{
  5489. + znode *node;
  5490. + size_t insertion_overhead;
  5491. +
  5492. + node = flow_insert_point(op)->node;
  5493. + insertion_overhead = 0;
  5494. + if (node->nplug->item_overhead &&
  5495. + !can_paste(flow_insert_point(op), &flow_insert_flow(op)->key,
  5496. + flow_insert_data(op)))
  5497. + insertion_overhead =
  5498. + node->nplug->item_overhead(node, NULL) +
  5499. + item_data_overhead(op);
  5500. + return insertion_overhead;
  5501. +}
  5502. +
  5503. +/* how many bytes of flow does fit to the node */
  5504. +static int what_can_fit_into_node(carry_op * op)
  5505. +{
  5506. + size_t free, overhead;
  5507. +
  5508. + overhead = flow_insertion_overhead(op);
  5509. + free = znode_free_space(flow_insert_point(op)->node);
  5510. + if (free <= overhead)
  5511. + return 0;
  5512. + free -= overhead;
  5513. + /* FIXME: flow->length is loff_t only to not get overflowed in case of
  5514. + expandign truncate */
  5515. + if (free < op->u.insert_flow.flow->length)
  5516. + return free;
  5517. + return (int)op->u.insert_flow.flow->length;
  5518. +}
  5519. +
  5520. +/* in make_space_for_flow_insertion we need to check either whether whole flow
  5521. + fits into a node or whether minimal fraction of flow fits into a node */
  5522. +static int enough_space_for_whole_flow(carry_op * op)
  5523. +{
  5524. + return (unsigned)what_can_fit_into_node(op) ==
  5525. + op->u.insert_flow.flow->length;
  5526. +}
  5527. +
  5528. +#define MIN_FLOW_FRACTION 1
  5529. +static int enough_space_for_min_flow_fraction(carry_op * op)
  5530. +{
  5531. + //assert("vs-902", coord_is_after_rightmost(flow_insert_point(op)));
  5532. +
  5533. + return what_can_fit_into_node(op) >= MIN_FLOW_FRACTION;
  5534. +}
  5535. +
  5536. +/* this returns 0 if left neighbor was obtained successfully and everything
  5537. + upto insertion point including it were shifted and left neighbor still has
  5538. + some free space to put minimal fraction of flow into it */
  5539. +static int
  5540. +make_space_by_shift_left(carry_op * op, carry_level * doing, carry_level * todo)
  5541. +{
  5542. + carry_node *left;
  5543. + znode *orig;
  5544. +
  5545. + left = find_left_neighbor(op, doing);
  5546. + if (unlikely(IS_ERR(left))) {
  5547. + warning("vs-899",
  5548. + "make_space_by_shift_left: "
  5549. + "error accessing left neighbor: %li", PTR_ERR(left));
  5550. + return 1;
  5551. + }
  5552. + if (left == NULL)
  5553. + /* left neighbor either does not exist or is unformatted
  5554. + node */
  5555. + return 1;
  5556. +
  5557. + orig = flow_insert_point(op)->node;
  5558. + /* try to shift content of node @orig from its head upto insert point
  5559. + including insertion point into the left neighbor */
  5560. + carry_shift_data(LEFT_SIDE, flow_insert_point(op),
  5561. + reiser4_carry_real(left), doing, todo,
  5562. + 1/* including insert point */);
  5563. + if (reiser4_carry_real(left) != flow_insert_point(op)->node) {
  5564. + /* insertion point did not move */
  5565. + return 1;
  5566. + }
  5567. +
  5568. + /* insertion point is set after last item in the node */
  5569. + assert("vs-900", coord_is_after_rightmost(flow_insert_point(op)));
  5570. +
  5571. + if (!enough_space_for_min_flow_fraction(op)) {
  5572. + /* insertion point node does not have enough free space to put
  5573. + even minimal portion of flow into it, therefore, move
  5574. + insertion point back to orig node (before first item) */
  5575. + coord_init_before_first_item(flow_insert_point(op), orig);
  5576. + return 1;
  5577. + }
  5578. +
  5579. + /* part of flow is to be written to the end of node */
  5580. + op->node = left;
  5581. + return 0;
  5582. +}
  5583. +
  5584. +/* this returns 0 if right neighbor was obtained successfully and everything to
  5585. + the right of insertion point was shifted to it and node got enough free
  5586. + space to put minimal fraction of flow into it */
  5587. +static int
  5588. +make_space_by_shift_right(carry_op * op, carry_level * doing,
  5589. + carry_level * todo)
  5590. +{
  5591. + carry_node *right;
  5592. +
  5593. + right = find_right_neighbor(op, doing);
  5594. + if (unlikely(IS_ERR(right))) {
  5595. + warning("nikita-1065", "shift_right_excluding_insert_point: "
  5596. + "error accessing right neighbor: %li", PTR_ERR(right));
  5597. + return 1;
  5598. + }
  5599. + if (right) {
  5600. + /* shift everything possible on the right of but excluding
  5601. + insertion coord into the right neighbor */
  5602. + carry_shift_data(RIGHT_SIDE, flow_insert_point(op),
  5603. + reiser4_carry_real(right), doing, todo,
  5604. + 0/* not including insert point */);
  5605. + } else {
  5606. + /* right neighbor either does not exist or is unformatted
  5607. + node */
  5608. + ;
  5609. + }
  5610. + if (coord_is_after_rightmost(flow_insert_point(op))) {
  5611. + if (enough_space_for_min_flow_fraction(op)) {
  5612. + /* part of flow is to be written to the end of node */
  5613. + return 0;
  5614. + }
  5615. + }
  5616. +
  5617. + /* new node is to be added if insert point node did not get enough
  5618. + space for whole flow */
  5619. + return 1;
  5620. +}
  5621. +
  5622. +/* this returns 0 when insert coord is set at the node end and fraction of flow
  5623. + fits into that node */
  5624. +static int
  5625. +make_space_by_new_nodes(carry_op * op, carry_level * doing, carry_level * todo)
  5626. +{
  5627. + int result;
  5628. + znode *node;
  5629. + carry_node *new;
  5630. +
  5631. + node = flow_insert_point(op)->node;
  5632. +
  5633. + if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT)
  5634. + return RETERR(-E_NODE_FULL);
  5635. + /* add new node after insert point node */
  5636. + new = add_new_znode(node, op->node, doing, todo);
  5637. + if (unlikely(IS_ERR(new)))
  5638. + return PTR_ERR(new);
  5639. + result = lock_carry_node(doing, new);
  5640. + zput(reiser4_carry_real(new));
  5641. + if (unlikely(result))
  5642. + return result;
  5643. + op->u.insert_flow.new_nodes++;
  5644. + if (!coord_is_after_rightmost(flow_insert_point(op))) {
  5645. + carry_shift_data(RIGHT_SIDE, flow_insert_point(op),
  5646. + reiser4_carry_real(new), doing, todo,
  5647. + 0/* not including insert point */);
  5648. + assert("vs-901",
  5649. + coord_is_after_rightmost(flow_insert_point(op)));
  5650. +
  5651. + if (enough_space_for_min_flow_fraction(op))
  5652. + return 0;
  5653. + if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT)
  5654. + return RETERR(-E_NODE_FULL);
  5655. +
  5656. + /* add one more new node */
  5657. + new = add_new_znode(node, op->node, doing, todo);
  5658. + if (unlikely(IS_ERR(new)))
  5659. + return PTR_ERR(new);
  5660. + result = lock_carry_node(doing, new);
  5661. + zput(reiser4_carry_real(new));
  5662. + if (unlikely(result))
  5663. + return result;
  5664. + op->u.insert_flow.new_nodes++;
  5665. + }
  5666. +
  5667. + /* move insertion point to new node */
  5668. + coord_init_before_first_item(flow_insert_point(op),
  5669. + reiser4_carry_real(new));
  5670. + op->node = new;
  5671. + return 0;
  5672. +}
  5673. +
  5674. +static int
  5675. +make_space_for_flow_insertion(carry_op * op, carry_level * doing,
  5676. + carry_level * todo)
  5677. +{
  5678. + __u32 flags = op->u.insert_flow.flags;
  5679. +
  5680. + if (enough_space_for_whole_flow(op)) {
  5681. + /* whole flow fits into insert point node */
  5682. + return 0;
  5683. + }
  5684. + if ((flags & COPI_SWEEP) &&
  5685. + enough_space_for_min_flow_fraction(op))
  5686. + /* use the rest of space in the current node */
  5687. + return 0;
  5688. +
  5689. + if (!(flags & COPI_DONT_SHIFT_LEFT)
  5690. + && (make_space_by_shift_left(op, doing, todo) == 0)) {
  5691. + /* insert point is shifted to left neighbor of original insert
  5692. + point node and is set after last unit in that node. It has
  5693. + enough space to fit at least minimal fraction of flow. */
  5694. + return 0;
  5695. + }
  5696. +
  5697. + if (enough_space_for_whole_flow(op)) {
  5698. + /* whole flow fits into insert point node */
  5699. + return 0;
  5700. + }
  5701. +
  5702. + if (!(flags & COPI_DONT_SHIFT_RIGHT)
  5703. + && (make_space_by_shift_right(op, doing, todo) == 0)) {
  5704. + /* insert point is still set to the same node, but there is
  5705. + nothing to the right of insert point. */
  5706. + return 0;
  5707. + }
  5708. +
  5709. + if (enough_space_for_whole_flow(op)) {
  5710. + /* whole flow fits into insert point node */
  5711. + return 0;
  5712. + }
  5713. +
  5714. + return make_space_by_new_nodes(op, doing, todo);
  5715. +}
  5716. +
  5717. +/* implements COP_INSERT_FLOW operation */
  5718. +static int
  5719. +carry_insert_flow(carry_op * op, carry_level * doing, carry_level * todo)
  5720. +{
  5721. + int result;
  5722. + flow_t *f;
  5723. + coord_t *insert_point;
  5724. + node_plugin *nplug;
  5725. + carry_plugin_info info;
  5726. + znode *orig_node;
  5727. + lock_handle *orig_lh;
  5728. +
  5729. + f = op->u.insert_flow.flow;
  5730. + result = 0;
  5731. +
  5732. + /* carry system needs this to work */
  5733. + info.doing = doing;
  5734. + info.todo = todo;
  5735. +
  5736. + orig_node = flow_insert_point(op)->node;
  5737. + orig_lh = doing->tracked;
  5738. +
  5739. + while (f->length) {
  5740. + result = make_space_for_flow_insertion(op, doing, todo);
  5741. + if (result)
  5742. + break;
  5743. +
  5744. + insert_point = flow_insert_point(op);
  5745. + nplug = node_plugin_by_node(insert_point->node);
  5746. +
  5747. + /* compose item data for insertion/pasting */
  5748. + flow_insert_data(op)->data = f->data;
  5749. + flow_insert_data(op)->length = what_can_fit_into_node(op);
  5750. +
  5751. + if (can_paste(insert_point, &f->key, flow_insert_data(op))) {
  5752. + /* insert point is set to item of file we are writing to
  5753. + and we have to append to it */
  5754. + assert("vs-903", insert_point->between == AFTER_UNIT);
  5755. + nplug->change_item_size(insert_point,
  5756. + flow_insert_data(op)->length);
  5757. + flow_insert_data(op)->iplug->b.paste(insert_point,
  5758. + flow_insert_data
  5759. + (op), &info);
  5760. + } else {
  5761. + /* new item must be inserted */
  5762. + pos_in_node_t new_pos;
  5763. + flow_insert_data(op)->length += item_data_overhead(op);
  5764. +
  5765. + /* FIXME-VS: this is because node40_create_item changes
  5766. + insert_point for obscure reasons */
  5767. + switch (insert_point->between) {
  5768. + case AFTER_ITEM:
  5769. + new_pos = insert_point->item_pos + 1;
  5770. + break;
  5771. + case EMPTY_NODE:
  5772. + new_pos = 0;
  5773. + break;
  5774. + case BEFORE_ITEM:
  5775. + assert("vs-905", insert_point->item_pos == 0);
  5776. + new_pos = 0;
  5777. + break;
  5778. + default:
  5779. + impossible("vs-906",
  5780. + "carry_insert_flow: invalid coord");
  5781. + new_pos = 0;
  5782. + break;
  5783. + }
  5784. +
  5785. + nplug->create_item(insert_point, &f->key,
  5786. + flow_insert_data(op), &info);
  5787. + coord_set_item_pos(insert_point, new_pos);
  5788. + }
  5789. + coord_init_after_item_end(insert_point);
  5790. + doing->restartable = 0;
  5791. + znode_make_dirty(insert_point->node);
  5792. +
  5793. + move_flow_forward(f, (unsigned)flow_insert_data(op)->length);
  5794. + }
  5795. +
  5796. + if (orig_node != flow_insert_point(op)->node) {
  5797. + /* move lock to new insert point */
  5798. + done_lh(orig_lh);
  5799. + init_lh(orig_lh);
  5800. + result =
  5801. + longterm_lock_znode(orig_lh, flow_insert_point(op)->node,
  5802. + ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI);
  5803. + }
  5804. +
  5805. + return result;
  5806. +}
  5807. +
  5808. +/* implements COP_DELETE operation
  5809. +
  5810. + Remove pointer to @op -> u.delete.child from it's parent.
  5811. +
  5812. + This function also handles killing of a tree root is last pointer from it
  5813. + was removed. This is complicated by our handling of "twig" level: root on
  5814. + twig level is never killed.
  5815. +
  5816. +*/
  5817. +static int carry_delete(carry_op * op /* operation to be performed */ ,
  5818. + carry_level * doing UNUSED_ARG /* current carry
  5819. + * level */ ,
  5820. + carry_level * todo/* next carry level */)
  5821. +{
  5822. + int result;
  5823. + coord_t coord;
  5824. + coord_t coord2;
  5825. + znode *parent;
  5826. + znode *child;
  5827. + carry_plugin_info info;
  5828. + reiser4_tree *tree;
  5829. +
  5830. + /*
  5831. + * This operation is called to delete internal item pointing to the
  5832. + * child node that was removed by carry from the tree on the previous
  5833. + * tree level.
  5834. + */
  5835. +
  5836. + assert("nikita-893", op != NULL);
  5837. + assert("nikita-894", todo != NULL);
  5838. + assert("nikita-895", op->op == COP_DELETE);
  5839. +
  5840. + coord_init_zero(&coord);
  5841. + coord_init_zero(&coord2);
  5842. +
  5843. + parent = reiser4_carry_real(op->node);
  5844. + child = op->u.delete.child ?
  5845. + reiser4_carry_real(op->u.delete.child) : op->node->node;
  5846. + tree = znode_get_tree(child);
  5847. + read_lock_tree(tree);
  5848. +
  5849. + /*
  5850. + * @parent was determined when carry entered parent level
  5851. + * (lock_carry_level/lock_carry_node). Since then, actual parent of
  5852. + * @child node could change due to other carry operations performed on
  5853. + * the parent level. Check for this.
  5854. + */
  5855. +
  5856. + if (znode_parent(child) != parent) {
  5857. + /* NOTE-NIKITA add stat counter for this. */
  5858. + parent = znode_parent(child);
  5859. + assert("nikita-2581", find_carry_node(doing, parent));
  5860. + }
  5861. + read_unlock_tree(tree);
  5862. +
  5863. + assert("nikita-1213", znode_get_level(parent) > LEAF_LEVEL);
  5864. +
  5865. + /* Twig level horrors: tree should be of height at least 2. So, last
  5866. + pointer from the root at twig level is preserved even if child is
  5867. + empty. This is ugly, but so it was architectured.
  5868. + */
  5869. +
  5870. + if (znode_is_root(parent) &&
  5871. + znode_get_level(parent) <= REISER4_MIN_TREE_HEIGHT &&
  5872. + node_num_items(parent) == 1) {
  5873. + /* Delimiting key manipulations. */
  5874. + write_lock_dk(tree);
  5875. + znode_set_ld_key(child, znode_set_ld_key(parent, reiser4_min_key()));
  5876. + znode_set_rd_key(child, znode_set_rd_key(parent, reiser4_max_key()));
  5877. + ZF_SET(child, JNODE_DKSET);
  5878. + write_unlock_dk(tree);
  5879. +
  5880. + /* @child escaped imminent death! */
  5881. + ZF_CLR(child, JNODE_HEARD_BANSHEE);
  5882. + return 0;
  5883. + }
  5884. +
  5885. + /* convert child pointer to the coord_t */
  5886. + result = find_child_ptr(parent, child, &coord);
  5887. + if (result != NS_FOUND) {
  5888. + warning("nikita-994", "Cannot find child pointer: %i", result);
  5889. + print_coord_content("coord", &coord);
  5890. + return result;
  5891. + }
  5892. +
  5893. + coord_dup(&coord2, &coord);
  5894. + info.doing = doing;
  5895. + info.todo = todo;
  5896. + {
  5897. + /*
  5898. + * Actually kill internal item: prepare structure with
  5899. + * arguments for ->cut_and_kill() method...
  5900. + */
  5901. +
  5902. + struct carry_kill_data kdata;
  5903. + kdata.params.from = &coord;
  5904. + kdata.params.to = &coord2;
  5905. + kdata.params.from_key = NULL;
  5906. + kdata.params.to_key = NULL;
  5907. + kdata.params.smallest_removed = NULL;
  5908. + kdata.params.truncate = 1;
  5909. + kdata.flags = op->u.delete.flags;
  5910. + kdata.inode = NULL;
  5911. + kdata.left = NULL;
  5912. + kdata.right = NULL;
  5913. + kdata.buf = NULL;
  5914. + /* ... and call it. */
  5915. + result = node_plugin_by_node(parent)->cut_and_kill(&kdata,
  5916. + &info);
  5917. + }
  5918. + doing->restartable = 0;
  5919. +
  5920. + /* check whether root should be killed violently */
  5921. + if (znode_is_root(parent) &&
  5922. + /* don't kill roots at and lower than twig level */
  5923. + znode_get_level(parent) > REISER4_MIN_TREE_HEIGHT &&
  5924. + node_num_items(parent) == 1)
  5925. + result = reiser4_kill_tree_root(coord.node);
  5926. +
  5927. + return result < 0 ? result : 0;
  5928. +}
  5929. +
  5930. +/* implements COP_CUT opration
  5931. +
  5932. + Cuts part or whole content of node.
  5933. +
  5934. +*/
  5935. +static int carry_cut(carry_op * op /* operation to be performed */ ,
  5936. + carry_level * doing /* current carry level */ ,
  5937. + carry_level * todo/* next carry level */)
  5938. +{
  5939. + int result;
  5940. + carry_plugin_info info;
  5941. + node_plugin *nplug;
  5942. +
  5943. + assert("nikita-896", op != NULL);
  5944. + assert("nikita-897", todo != NULL);
  5945. + assert("nikita-898", op->op == COP_CUT);
  5946. +
  5947. + info.doing = doing;
  5948. + info.todo = todo;
  5949. +
  5950. + nplug = node_plugin_by_node(reiser4_carry_real(op->node));
  5951. + if (op->u.cut_or_kill.is_cut)
  5952. + result = nplug->cut(op->u.cut_or_kill.u.cut, &info);
  5953. + else
  5954. + result = nplug->cut_and_kill(op->u.cut_or_kill.u.kill, &info);
  5955. +
  5956. + doing->restartable = 0;
  5957. + return result < 0 ? result : 0;
  5958. +}
  5959. +
  5960. +/* helper function for carry_paste(): returns true if @op can be continued as
  5961. + paste */
  5962. +static int
  5963. +can_paste(coord_t *icoord, const reiser4_key * key,
  5964. + const reiser4_item_data * data)
  5965. +{
  5966. + coord_t circa;
  5967. + item_plugin *new_iplug;
  5968. + item_plugin *old_iplug;
  5969. + int result = 0; /* to keep gcc shut */
  5970. +
  5971. + assert("", icoord->between != AT_UNIT);
  5972. +
  5973. + /* obviously, one cannot paste when node is empty---there is nothing
  5974. + to paste into. */
  5975. + if (node_is_empty(icoord->node))
  5976. + return 0;
  5977. + /* if insertion point is at the middle of the item, then paste */
  5978. + if (!coord_is_between_items(icoord))
  5979. + return 1;
  5980. + coord_dup(&circa, icoord);
  5981. + circa.between = AT_UNIT;
  5982. +
  5983. + old_iplug = item_plugin_by_coord(&circa);
  5984. + new_iplug = data->iplug;
  5985. +
  5986. + /* check whether we can paste to the item @icoord is "at" when we
  5987. + ignore ->between field */
  5988. + if (old_iplug == new_iplug && item_can_contain_key(&circa, key, data))
  5989. + result = 1;
  5990. + else if (icoord->between == BEFORE_UNIT
  5991. + || icoord->between == BEFORE_ITEM) {
  5992. + /* otherwise, try to glue to the item at the left, if any */
  5993. + coord_dup(&circa, icoord);
  5994. + if (coord_set_to_left(&circa)) {
  5995. + result = 0;
  5996. + coord_init_before_item(icoord);
  5997. + } else {
  5998. + old_iplug = item_plugin_by_coord(&circa);
  5999. + result = (old_iplug == new_iplug)
  6000. + && item_can_contain_key(icoord, key, data);
  6001. + if (result) {
  6002. + coord_dup(icoord, &circa);
  6003. + icoord->between = AFTER_UNIT;
  6004. + }
  6005. + }
  6006. + } else if (icoord->between == AFTER_UNIT
  6007. + || icoord->between == AFTER_ITEM) {
  6008. + coord_dup(&circa, icoord);
  6009. + /* otherwise, try to glue to the item at the right, if any */
  6010. + if (coord_set_to_right(&circa)) {
  6011. + result = 0;
  6012. + coord_init_after_item(icoord);
  6013. + } else {
  6014. + int (*cck) (const coord_t *, const reiser4_key *,
  6015. + const reiser4_item_data *);
  6016. +
  6017. + old_iplug = item_plugin_by_coord(&circa);
  6018. +
  6019. + cck = old_iplug->b.can_contain_key;
  6020. + if (cck == NULL)
  6021. + /* item doesn't define ->can_contain_key
  6022. + method? So it is not expandable. */
  6023. + result = 0;
  6024. + else {
  6025. + result = (old_iplug == new_iplug)
  6026. + && cck(&circa /*icoord */ , key, data);
  6027. + if (result) {
  6028. + coord_dup(icoord, &circa);
  6029. + icoord->between = BEFORE_UNIT;
  6030. + }
  6031. + }
  6032. + }
  6033. + } else
  6034. + impossible("nikita-2513", "Nothing works");
  6035. + if (result) {
  6036. + if (icoord->between == BEFORE_ITEM) {
  6037. + assert("vs-912", icoord->unit_pos == 0);
  6038. + icoord->between = BEFORE_UNIT;
  6039. + } else if (icoord->between == AFTER_ITEM) {
  6040. + coord_init_after_item_end(icoord);
  6041. + }
  6042. + }
  6043. + return result;
  6044. +}
  6045. +
  6046. +/* implements COP_PASTE operation
  6047. +
  6048. + Paste data into existing item. This is complicated by the fact that after
  6049. + we shifted something to the left or right neighbors trying to free some
  6050. + space, item we were supposed to paste into can be in different node than
  6051. + insertion coord. If so, we are no longer doing paste, but insert. See
  6052. + comments in insert_paste_common().
  6053. +
  6054. +*/
  6055. +static int carry_paste(carry_op * op /* operation to be performed */ ,
  6056. + carry_level * doing UNUSED_ARG /* current carry
  6057. + * level */ ,
  6058. + carry_level * todo/* next carry level */)
  6059. +{
  6060. + znode *node;
  6061. + carry_insert_data cdata;
  6062. + coord_t dcoord;
  6063. + reiser4_item_data data;
  6064. + int result;
  6065. + int real_size;
  6066. + item_plugin *iplug;
  6067. + carry_plugin_info info;
  6068. + coord_t *coord;
  6069. +
  6070. + assert("nikita-982", op != NULL);
  6071. + assert("nikita-983", todo != NULL);
  6072. + assert("nikita-984", op->op == COP_PASTE);
  6073. +
  6074. + coord_init_zero(&dcoord);
  6075. +
  6076. + result = insert_paste_common(op, doing, todo, &cdata, &dcoord, &data);
  6077. + if (result != 0)
  6078. + return result;
  6079. +
  6080. + coord = op->u.insert.d->coord;
  6081. +
  6082. + /* handle case when op -> u.insert.coord doesn't point to the item
  6083. + of required type. restart as insert. */
  6084. + if (!can_paste(coord, op->u.insert.d->key, op->u.insert.d->data)) {
  6085. + op->op = COP_INSERT;
  6086. + op->u.insert.type = COPT_PASTE_RESTARTED;
  6087. + result = op_dispatch_table[COP_INSERT].handler(op, doing, todo);
  6088. +
  6089. + return result;
  6090. + }
  6091. +
  6092. + node = coord->node;
  6093. + iplug = item_plugin_by_coord(coord);
  6094. + assert("nikita-992", iplug != NULL);
  6095. +
  6096. + assert("nikita-985", node != NULL);
  6097. + assert("nikita-986", node_plugin_by_node(node) != NULL);
  6098. +
  6099. + assert("nikita-987",
  6100. + space_needed_for_op(node, op) <= znode_free_space(node));
  6101. +
  6102. + assert("nikita-1286", coord_is_existing_item(coord));
  6103. +
  6104. + /*
  6105. + * if item is expanded as a result of this operation, we should first
  6106. + * change item size, than call ->b.paste item method. If item is
  6107. + * shrunk, it should be done other way around: first call ->b.paste
  6108. + * method, then reduce item size.
  6109. + */
  6110. +
  6111. + real_size = space_needed_for_op(node, op);
  6112. + if (real_size > 0)
  6113. + node->nplug->change_item_size(coord, real_size);
  6114. +
  6115. + doing->restartable = 0;
  6116. + info.doing = doing;
  6117. + info.todo = todo;
  6118. +
  6119. + result = iplug->b.paste(coord, op->u.insert.d->data, &info);
  6120. +
  6121. + if (real_size < 0)
  6122. + node->nplug->change_item_size(coord, real_size);
  6123. +
  6124. + /* if we pasted at the beginning of the item, update item's key. */
  6125. + if (coord->unit_pos == 0 && coord->between != AFTER_UNIT)
  6126. + node->nplug->update_item_key(coord, op->u.insert.d->key, &info);
  6127. +
  6128. + znode_make_dirty(node);
  6129. + return result;
  6130. +}
  6131. +
  6132. +/* handle carry COP_EXTENT operation. */
  6133. +static int carry_extent(carry_op * op /* operation to perform */ ,
  6134. + carry_level * doing /* queue of operations @op
  6135. + * is part of */ ,
  6136. + carry_level * todo /* queue where new operations
  6137. + * are accumulated */ )
  6138. +{
  6139. + znode *node;
  6140. + carry_insert_data cdata;
  6141. + coord_t coord;
  6142. + reiser4_item_data data;
  6143. + carry_op *delete_dummy;
  6144. + carry_op *insert_extent;
  6145. + int result;
  6146. + carry_plugin_info info;
  6147. +
  6148. + assert("nikita-1751", op != NULL);
  6149. + assert("nikita-1752", todo != NULL);
  6150. + assert("nikita-1753", op->op == COP_EXTENT);
  6151. +
  6152. + /* extent insertion overview:
  6153. +
  6154. + extents live on the TWIG LEVEL, which is level one above the leaf
  6155. + one. This complicates extent insertion logic somewhat: it may
  6156. + happen (and going to happen all the time) that in logical key
  6157. + ordering extent has to be placed between items I1 and I2, located
  6158. + at the leaf level, but I1 and I2 are in the same formatted leaf
  6159. + node N1. To insert extent one has to
  6160. +
  6161. + (1) reach node N1 and shift data between N1, its neighbors and
  6162. + possibly newly allocated nodes until I1 and I2 fall into different
  6163. + nodes. Since I1 and I2 are still neighboring items in logical key
  6164. + order, they will be necessary utmost items in their respective
  6165. + nodes.
  6166. +
  6167. + (2) After this new extent item is inserted into node on the twig
  6168. + level.
  6169. +
  6170. + Fortunately this process can reuse almost all code from standard
  6171. + insertion procedure (viz. make_space() and insert_paste_common()),
  6172. + due to the following observation: make_space() only shifts data up
  6173. + to and excluding or including insertion point. It never
  6174. + "over-moves" through insertion point. Thus, one can use
  6175. + make_space() to perform step (1). All required for this is just to
  6176. + instruct free_space_shortage() to keep make_space() shifting data
  6177. + until insertion point is at the node border.
  6178. +
  6179. + */
  6180. +
  6181. + /* perform common functionality of insert and paste. */
  6182. + result = insert_paste_common(op, doing, todo, &cdata, &coord, &data);
  6183. + if (result != 0)
  6184. + return result;
  6185. +
  6186. + node = op->u.extent.d->coord->node;
  6187. + assert("nikita-1754", node != NULL);
  6188. + assert("nikita-1755", node_plugin_by_node(node) != NULL);
  6189. + assert("nikita-1700", coord_wrt(op->u.extent.d->coord) != COORD_INSIDE);
  6190. +
  6191. + /* NOTE-NIKITA add some checks here. Not assertions, -EIO. Check that
  6192. + extent fits between items. */
  6193. +
  6194. + info.doing = doing;
  6195. + info.todo = todo;
  6196. +
  6197. + /* there is another complication due to placement of extents on the
  6198. + twig level: extents are "rigid" in the sense that key-range
  6199. + occupied by extent cannot grow indefinitely to the right as it is
  6200. + for the formatted leaf nodes. Because of this when search finds two
  6201. + adjacent extents on the twig level, it has to "drill" to the leaf
  6202. + level, creating new node. Here we are removing this node.
  6203. + */
  6204. + if (node_is_empty(node)) {
  6205. + delete_dummy = node_post_carry(&info, COP_DELETE, node, 1);
  6206. + if (IS_ERR(delete_dummy))
  6207. + return PTR_ERR(delete_dummy);
  6208. + delete_dummy->u.delete.child = NULL;
  6209. + delete_dummy->u.delete.flags = DELETE_RETAIN_EMPTY;
  6210. + ZF_SET(node, JNODE_HEARD_BANSHEE);
  6211. + }
  6212. +
  6213. + /* proceed with inserting extent item into parent. We are definitely
  6214. + inserting rather than pasting if we get that far. */
  6215. + insert_extent = node_post_carry(&info, COP_INSERT, node, 1);
  6216. + if (IS_ERR(insert_extent))
  6217. + /* @delete_dummy will be automatically destroyed on the level
  6218. + exiting */
  6219. + return PTR_ERR(insert_extent);
  6220. + /* NOTE-NIKITA insertion by key is simplest option here. Another
  6221. + possibility is to insert on the left or right of already existing
  6222. + item.
  6223. + */
  6224. + insert_extent->u.insert.type = COPT_KEY;
  6225. + insert_extent->u.insert.d = op->u.extent.d;
  6226. + assert("nikita-1719", op->u.extent.d->key != NULL);
  6227. + insert_extent->u.insert.d->data->arg = op->u.extent.d->coord;
  6228. + insert_extent->u.insert.flags =
  6229. + znode_get_tree(node)->carry.new_extent_flags;
  6230. +
  6231. + /*
  6232. + * if carry was asked to track lock handle we should actually track
  6233. + * lock handle on the twig node rather than on the leaf where
  6234. + * operation was started from. Transfer tracked lock handle.
  6235. + */
  6236. + if (doing->track_type) {
  6237. + assert("nikita-3242", doing->tracked != NULL);
  6238. + assert("nikita-3244", todo->tracked == NULL);
  6239. + todo->tracked = doing->tracked;
  6240. + todo->track_type = CARRY_TRACK_NODE;
  6241. + doing->tracked = NULL;
  6242. + doing->track_type = 0;
  6243. + }
  6244. +
  6245. + return 0;
  6246. +}
  6247. +
  6248. +/* update key in @parent between pointers to @left and @right.
  6249. +
  6250. + Find coords of @left and @right and update delimiting key between them.
  6251. + This is helper function called by carry_update(). Finds position of
  6252. + internal item involved. Updates item key. Updates delimiting keys of child
  6253. + nodes involved.
  6254. +*/
  6255. +static int update_delimiting_key(znode * parent /* node key is updated
  6256. + * in */ ,
  6257. + znode * left /* child of @parent */ ,
  6258. + znode * right /* child of @parent */ ,
  6259. + carry_level * doing /* current carry
  6260. + * level */ ,
  6261. + carry_level * todo /* parent carry
  6262. + * level */ ,
  6263. + const char **error_msg /* place to
  6264. + * store error
  6265. + * message */ )
  6266. +{
  6267. + coord_t left_pos;
  6268. + coord_t right_pos;
  6269. + int result;
  6270. + reiser4_key ldkey;
  6271. + carry_plugin_info info;
  6272. +
  6273. + assert("nikita-1177", right != NULL);
  6274. + /* find position of right left child in a parent */
  6275. + result = find_child_ptr(parent, right, &right_pos);
  6276. + if (result != NS_FOUND) {
  6277. + *error_msg = "Cannot find position of right child";
  6278. + return result;
  6279. + }
  6280. +
  6281. + if ((left != NULL) && !coord_is_leftmost_unit(&right_pos)) {
  6282. + /* find position of the left child in a parent */
  6283. + result = find_child_ptr(parent, left, &left_pos);
  6284. + if (result != NS_FOUND) {
  6285. + *error_msg = "Cannot find position of left child";
  6286. + return result;
  6287. + }
  6288. + assert("nikita-1355", left_pos.node != NULL);
  6289. + } else
  6290. + left_pos.node = NULL;
  6291. +
  6292. + /* check that they are separated by exactly one key and are basically
  6293. + sane */
  6294. + if (REISER4_DEBUG) {
  6295. + if ((left_pos.node != NULL)
  6296. + && !coord_is_existing_unit(&left_pos)) {
  6297. + *error_msg = "Left child is bastard";
  6298. + return RETERR(-EIO);
  6299. + }
  6300. + if (!coord_is_existing_unit(&right_pos)) {
  6301. + *error_msg = "Right child is bastard";
  6302. + return RETERR(-EIO);
  6303. + }
  6304. + if (left_pos.node != NULL &&
  6305. + !coord_are_neighbors(&left_pos, &right_pos)) {
  6306. + *error_msg = "Children are not direct siblings";
  6307. + return RETERR(-EIO);
  6308. + }
  6309. + }
  6310. + *error_msg = NULL;
  6311. +
  6312. + info.doing = doing;
  6313. + info.todo = todo;
  6314. +
  6315. + /*
  6316. + * If child node is not empty, new key of internal item is a key of
  6317. + * leftmost item in the child node. If the child is empty, take its
  6318. + * right delimiting key as a new key of the internal item. Precise key
  6319. + * in the latter case is not important per se, because the child (and
  6320. + * the internal item) are going to be killed shortly anyway, but we
  6321. + * have to preserve correct order of keys in the parent node.
  6322. + */
  6323. +
  6324. + if (!ZF_ISSET(right, JNODE_HEARD_BANSHEE))
  6325. + leftmost_key_in_node(right, &ldkey);
  6326. + else {
  6327. + read_lock_dk(znode_get_tree(parent));
  6328. + ldkey = *znode_get_rd_key(right);
  6329. + read_unlock_dk(znode_get_tree(parent));
  6330. + }
  6331. + node_plugin_by_node(parent)->update_item_key(&right_pos, &ldkey, &info);
  6332. + doing->restartable = 0;
  6333. + znode_make_dirty(parent);
  6334. + return 0;
  6335. +}
  6336. +
  6337. +/* implements COP_UPDATE opration
  6338. +
  6339. + Update delimiting keys.
  6340. +
  6341. +*/
  6342. +static int carry_update(carry_op * op /* operation to be performed */ ,
  6343. + carry_level * doing /* current carry level */ ,
  6344. + carry_level * todo/* next carry level */)
  6345. +{
  6346. + int result;
  6347. + carry_node *missing UNUSED_ARG;
  6348. + znode *left;
  6349. + znode *right;
  6350. + carry_node *lchild;
  6351. + carry_node *rchild;
  6352. + const char *error_msg;
  6353. + reiser4_tree *tree;
  6354. +
  6355. + /*
  6356. + * This operation is called to update key of internal item. This is
  6357. + * necessary when carry shifted of cut data on the child
  6358. + * level. Arguments of this operation are:
  6359. + *
  6360. + * @right --- child node. Operation should update key of internal
  6361. + * item pointing to @right.
  6362. + *
  6363. + * @left --- left neighbor of @right. This parameter is optional.
  6364. + */
  6365. +
  6366. + assert("nikita-902", op != NULL);
  6367. + assert("nikita-903", todo != NULL);
  6368. + assert("nikita-904", op->op == COP_UPDATE);
  6369. +
  6370. + lchild = op->u.update.left;
  6371. + rchild = op->node;
  6372. +
  6373. + if (lchild != NULL) {
  6374. + assert("nikita-1001", lchild->parent);
  6375. + assert("nikita-1003", !lchild->left);
  6376. + left = reiser4_carry_real(lchild);
  6377. + } else
  6378. + left = NULL;
  6379. +
  6380. + tree = znode_get_tree(rchild->node);
  6381. + read_lock_tree(tree);
  6382. + right = znode_parent(rchild->node);
  6383. + read_unlock_tree(tree);
  6384. +
  6385. + if (right != NULL) {
  6386. + result = update_delimiting_key(right,
  6387. + lchild ? lchild->node : NULL,
  6388. + rchild->node,
  6389. + doing, todo, &error_msg);
  6390. + } else {
  6391. + error_msg = "Cannot find node to update key in";
  6392. + result = RETERR(-EIO);
  6393. + }
  6394. + /* operation will be reposted to the next level by the
  6395. + ->update_item_key() method of node plugin, if necessary. */
  6396. +
  6397. + if (result != 0) {
  6398. + warning("nikita-999", "Error updating delimiting key: %s (%i)",
  6399. + error_msg ? : "", result);
  6400. + }
  6401. + return result;
  6402. +}
  6403. +
  6404. +/* move items from @node during carry */
  6405. +static int carry_shift_data(sideof side /* in what direction to move data */ ,
  6406. + coord_t *insert_coord /* coord where new item
  6407. + * is to be inserted */,
  6408. + znode * node /* node which data are moved from */ ,
  6409. + carry_level * doing /* active carry queue */ ,
  6410. + carry_level * todo /* carry queue where new
  6411. + * operations are to be put
  6412. + * in */ ,
  6413. + unsigned int including_insert_coord_p
  6414. + /* true if @insertion_coord can be moved */ )
  6415. +{
  6416. + int result;
  6417. + znode *source;
  6418. + carry_plugin_info info;
  6419. + node_plugin *nplug;
  6420. +
  6421. + source = insert_coord->node;
  6422. +
  6423. + info.doing = doing;
  6424. + info.todo = todo;
  6425. +
  6426. + nplug = node_plugin_by_node(node);
  6427. + result = nplug->shift(insert_coord, node,
  6428. + (side == LEFT_SIDE) ? SHIFT_LEFT : SHIFT_RIGHT, 0,
  6429. + (int)including_insert_coord_p, &info);
  6430. + /* the only error ->shift() method of node plugin can return is
  6431. + -ENOMEM due to carry node/operation allocation. */
  6432. + assert("nikita-915", result >= 0 || result == -ENOMEM);
  6433. + if (result > 0) {
  6434. + /*
  6435. + * if some number of bytes was actually shifted, mark nodes
  6436. + * dirty, and carry level as non-restartable.
  6437. + */
  6438. + doing->restartable = 0;
  6439. + znode_make_dirty(source);
  6440. + znode_make_dirty(node);
  6441. + }
  6442. +
  6443. + assert("nikita-2077", coord_check(insert_coord));
  6444. + return 0;
  6445. +}
  6446. +
  6447. +typedef carry_node *(*carry_iterator) (carry_node * node);
  6448. +static carry_node *find_dir_carry(carry_node * node, carry_level * level,
  6449. + carry_iterator iterator);
  6450. +
  6451. +static carry_node *pool_level_list_prev(carry_node *node)
  6452. +{
  6453. + return list_entry(node->header.level_linkage.prev, carry_node, header.level_linkage);
  6454. +}
  6455. +
  6456. +/* look for the left neighbor of given carry node in a carry queue.
  6457. +
  6458. + This is used by find_left_neighbor(), but I am not sure that this
  6459. + really gives any advantage. More statistics required.
  6460. +
  6461. +*/
  6462. +carry_node *find_left_carry(carry_node * node /* node to find left neighbor
  6463. + * of */ ,
  6464. + carry_level * level/* level to scan */)
  6465. +{
  6466. + return find_dir_carry(node, level,
  6467. + (carry_iterator) pool_level_list_prev);
  6468. +}
  6469. +
  6470. +static carry_node *pool_level_list_next(carry_node *node)
  6471. +{
  6472. + return list_entry(node->header.level_linkage.next, carry_node, header.level_linkage);
  6473. +}
  6474. +
  6475. +/* look for the right neighbor of given carry node in a
  6476. + carry queue.
  6477. +
  6478. + This is used by find_right_neighbor(), but I am not sure that this
  6479. + really gives any advantage. More statistics required.
  6480. +
  6481. +*/
  6482. +carry_node *find_right_carry(carry_node * node /* node to find right neighbor
  6483. + * of */ ,
  6484. + carry_level * level/* level to scan */)
  6485. +{
  6486. + return find_dir_carry(node, level,
  6487. + (carry_iterator) pool_level_list_next);
  6488. +}
  6489. +
  6490. +/* look for the left or right neighbor of given carry node in a carry
  6491. + queue.
  6492. +
  6493. + Helper function used by find_{left|right}_carry().
  6494. +*/
  6495. +static carry_node *find_dir_carry(carry_node * node /* node to start
  6496. + * scanning from */ ,
  6497. + carry_level * level /* level to scan */ ,
  6498. + carry_iterator iterator /* operation to
  6499. + * move to the
  6500. + * next node */)
  6501. +{
  6502. + carry_node *neighbor;
  6503. +
  6504. + assert("nikita-1059", node != NULL);
  6505. + assert("nikita-1060", level != NULL);
  6506. +
  6507. + /* scan list of carry nodes on this list dir-ward, skipping all
  6508. + carry nodes referencing the same znode. */
  6509. + neighbor = node;
  6510. + while (1) {
  6511. + neighbor = iterator(neighbor);
  6512. + if (carry_node_end(level, neighbor))
  6513. + /* list head is reached */
  6514. + return NULL;
  6515. + if (reiser4_carry_real(neighbor) != reiser4_carry_real(node))
  6516. + return neighbor;
  6517. + }
  6518. +}
  6519. +
  6520. +/*
  6521. + * Memory reservation estimation.
  6522. + *
  6523. + * Carry process proceeds through tree levels upwards. Carry assumes that it
  6524. + * takes tree in consistent state (e.g., that search tree invariants hold),
  6525. + * and leaves tree consistent after it finishes. This means that when some
  6526. + * error occurs carry cannot simply return if there are pending carry
  6527. + * operations. Generic solution for this problem is carry-undo either as
  6528. + * transaction manager feature (requiring checkpoints and isolation), or
  6529. + * through some carry specific mechanism.
  6530. + *
  6531. + * Our current approach is to panic if carry hits an error while tree is
  6532. + * inconsistent. Unfortunately -ENOMEM can easily be triggered. To work around
  6533. + * this "memory reservation" mechanism was added.
  6534. + *
  6535. + * Memory reservation is implemented by perthread-pages.diff patch from
  6536. + * core-patches. Its API is defined in <linux/gfp.h>
  6537. + *
  6538. + * int perthread_pages_reserve(int nrpages, gfp_t gfp);
  6539. + * void perthread_pages_release(int nrpages);
  6540. + * int perthread_pages_count(void);
  6541. + *
  6542. + * carry estimates its worst case memory requirements at the entry, reserved
  6543. + * enough memory, and released unused pages before returning.
  6544. + *
  6545. + * Code below estimates worst case memory requirements for a given carry
  6546. + * queue. This is dome by summing worst case memory requirements for each
  6547. + * operation in the queue.
  6548. + *
  6549. + */
  6550. +
  6551. +/*
  6552. + * Memory memory requirements of many operations depends on the tree
  6553. + * height. For example, item insertion requires new node to be inserted at
  6554. + * each tree level in the worst case. What tree height should be used for
  6555. + * estimation? Current tree height is wrong, because tree height can change
  6556. + * between the time when estimation was done and the time when operation is
  6557. + * actually performed. Maximal possible tree height (REISER4_MAX_ZTREE_HEIGHT)
  6558. + * is also not desirable, because it would lead to the huge over-estimation
  6559. + * all the time. Plausible solution is "capped tree height": if current tree
  6560. + * height is less than some TREE_HEIGHT_CAP constant, capped tree height is
  6561. + * TREE_HEIGHT_CAP, otherwise it's current tree height. Idea behind this is
  6562. + * that if tree height is TREE_HEIGHT_CAP or larger, it's extremely unlikely
  6563. + * to be increased even more during short interval of time.
  6564. + */
  6565. +#define TREE_HEIGHT_CAP (5)
  6566. +
  6567. +/* return capped tree height for the @tree. See comment above. */
  6568. +static int cap_tree_height(reiser4_tree * tree)
  6569. +{
  6570. + return max_t(int, tree->height, TREE_HEIGHT_CAP);
  6571. +}
  6572. +
  6573. +/* return capped tree height for the current tree. */
  6574. +static int capped_height(void)
  6575. +{
  6576. + return cap_tree_height(current_tree);
  6577. +}
  6578. +
  6579. +/* return number of pages required to store given number of bytes */
  6580. +static int bytes_to_pages(int bytes)
  6581. +{
  6582. + return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
  6583. +}
  6584. +
  6585. +/* how many pages are required to allocate znodes during item insertion. */
  6586. +static int carry_estimate_znodes(void)
  6587. +{
  6588. + /*
  6589. + * Note, that there we have some problem here: there is no way to
  6590. + * reserve pages specifically for the given slab. This means that
  6591. + * these pages can be hijacked for some other end.
  6592. + */
  6593. +
  6594. + /* in the worst case we need 3 new znode on each tree level */
  6595. + return bytes_to_pages(capped_height() * sizeof(znode) * 3);
  6596. +}
  6597. +
  6598. +/*
  6599. + * how many pages are required to load bitmaps. One bitmap per level.
  6600. + */
  6601. +static int carry_estimate_bitmaps(void)
  6602. +{
  6603. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_LOAD_BITMAP)) {
  6604. + int bytes;
  6605. +
  6606. + bytes = capped_height() * (0 + /* bnode should be added, but
  6607. + * it is private to bitmap.c,
  6608. + * skip for now. */
  6609. + 2 * sizeof(jnode));
  6610. + /* working and commit jnodes */
  6611. + return bytes_to_pages(bytes) + 2; /* and their contents */
  6612. + } else
  6613. + /* bitmaps were pre-loaded during mount */
  6614. + return 0;
  6615. +}
  6616. +
  6617. +/* worst case item insertion memory requirements */
  6618. +static int carry_estimate_insert(carry_op * op, carry_level * level)
  6619. +{
  6620. + return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 +
  6621. + /* new atom */
  6622. + capped_height() + /* new block on each level */
  6623. + 1 + /* and possibly extra new block at the leaf level */
  6624. + 3; /* loading of leaves into memory */
  6625. +}
  6626. +
  6627. +/* worst case item deletion memory requirements */
  6628. +static int carry_estimate_delete(carry_op * op, carry_level * level)
  6629. +{
  6630. + return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 +
  6631. + /* new atom */
  6632. + 3; /* loading of leaves into memory */
  6633. +}
  6634. +
  6635. +/* worst case tree cut memory requirements */
  6636. +static int carry_estimate_cut(carry_op * op, carry_level * level)
  6637. +{
  6638. + return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 +
  6639. + /* new atom */
  6640. + 3; /* loading of leaves into memory */
  6641. +}
  6642. +
  6643. +/* worst case memory requirements of pasting into item */
  6644. +static int carry_estimate_paste(carry_op * op, carry_level * level)
  6645. +{
  6646. + return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 +
  6647. + /* new atom */
  6648. + capped_height() + /* new block on each level */
  6649. + 1 + /* and possibly extra new block at the leaf level */
  6650. + 3; /* loading of leaves into memory */
  6651. +}
  6652. +
  6653. +/* worst case memory requirements of extent insertion */
  6654. +static int carry_estimate_extent(carry_op * op, carry_level * level)
  6655. +{
  6656. + return carry_estimate_insert(op, level) + /* insert extent */
  6657. + carry_estimate_delete(op, level); /* kill leaf */
  6658. +}
  6659. +
  6660. +/* worst case memory requirements of key update */
  6661. +static int carry_estimate_update(carry_op * op, carry_level * level)
  6662. +{
  6663. + return 0;
  6664. +}
  6665. +
  6666. +/* worst case memory requirements of flow insertion */
  6667. +static int carry_estimate_insert_flow(carry_op * op, carry_level * level)
  6668. +{
  6669. + int newnodes;
  6670. +
  6671. + newnodes = min(bytes_to_pages(op->u.insert_flow.flow->length),
  6672. + CARRY_FLOW_NEW_NODES_LIMIT);
  6673. + /*
  6674. + * roughly estimate insert_flow as a sequence of insertions.
  6675. + */
  6676. + return newnodes * carry_estimate_insert(op, level);
  6677. +}
  6678. +
  6679. +/* This is dispatch table for carry operations. It can be trivially
  6680. + abstracted into useful plugin: tunable balancing policy is a good
  6681. + thing. */
  6682. +carry_op_handler op_dispatch_table[COP_LAST_OP] = {
  6683. + [COP_INSERT] = {
  6684. + .handler = carry_insert,
  6685. + .estimate = carry_estimate_insert}
  6686. + ,
  6687. + [COP_DELETE] = {
  6688. + .handler = carry_delete,
  6689. + .estimate = carry_estimate_delete}
  6690. + ,
  6691. + [COP_CUT] = {
  6692. + .handler = carry_cut,
  6693. + .estimate = carry_estimate_cut}
  6694. + ,
  6695. + [COP_PASTE] = {
  6696. + .handler = carry_paste,
  6697. + .estimate = carry_estimate_paste}
  6698. + ,
  6699. + [COP_EXTENT] = {
  6700. + .handler = carry_extent,
  6701. + .estimate = carry_estimate_extent}
  6702. + ,
  6703. + [COP_UPDATE] = {
  6704. + .handler = carry_update,
  6705. + .estimate = carry_estimate_update}
  6706. + ,
  6707. + [COP_INSERT_FLOW] = {
  6708. + .handler = carry_insert_flow,
  6709. + .estimate = carry_estimate_insert_flow}
  6710. +};
  6711. +
  6712. +/* Make Linus happy.
  6713. + Local variables:
  6714. + c-indentation-style: "K&R"
  6715. + mode-name: "LC"
  6716. + c-basic-offset: 8
  6717. + tab-width: 8
  6718. + fill-column: 120
  6719. + scroll-step: 1
  6720. + End:
  6721. +*/
  6722. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/carry_ops.h linux-5.16.14/fs/reiser4/carry_ops.h
  6723. --- linux-5.16.14.orig/fs/reiser4/carry_ops.h 1970-01-01 01:00:00.000000000 +0100
  6724. +++ linux-5.16.14/fs/reiser4/carry_ops.h 2022-03-12 13:26:19.643892707 +0100
  6725. @@ -0,0 +1,43 @@
  6726. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  6727. + reiser4/README */
  6728. +
  6729. +/* implementation of carry operations. See carry_ops.c for details. */
  6730. +
  6731. +#if !defined(__CARRY_OPS_H__)
  6732. +#define __CARRY_OPS_H__
  6733. +
  6734. +#include "forward.h"
  6735. +#include "znode.h"
  6736. +#include "carry.h"
  6737. +
  6738. +/* carry operation handlers */
  6739. +typedef struct carry_op_handler {
  6740. + /* perform operation */
  6741. + int (*handler) (carry_op * op, carry_level * doing, carry_level * todo);
  6742. + /* estimate memory requirements for @op */
  6743. + int (*estimate) (carry_op * op, carry_level * level);
  6744. +} carry_op_handler;
  6745. +
  6746. +/* This is dispatch table for carry operations. It can be trivially
  6747. + abstracted into useful plugin: tunable balancing policy is a good
  6748. + thing. */
  6749. +extern carry_op_handler op_dispatch_table[COP_LAST_OP];
  6750. +
  6751. +unsigned int space_needed(const znode * node, const coord_t *coord,
  6752. + const reiser4_item_data * data, int inserting);
  6753. +extern carry_node *find_left_carry(carry_node * node, carry_level * level);
  6754. +extern carry_node *find_right_carry(carry_node * node, carry_level * level);
  6755. +
  6756. +/* __CARRY_OPS_H__ */
  6757. +#endif
  6758. +
  6759. +/* Make Linus happy.
  6760. + Local variables:
  6761. + c-indentation-style: "K&R"
  6762. + mode-name: "LC"
  6763. + c-basic-offset: 8
  6764. + tab-width: 8
  6765. + fill-column: 120
  6766. + scroll-step: 1
  6767. + End:
  6768. +*/
  6769. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/checksum.c linux-5.16.14/fs/reiser4/checksum.c
  6770. --- linux-5.16.14.orig/fs/reiser4/checksum.c 1970-01-01 01:00:00.000000000 +0100
  6771. +++ linux-5.16.14/fs/reiser4/checksum.c 2022-03-12 13:26:19.643892707 +0100
  6772. @@ -0,0 +1,33 @@
  6773. +#include <linux/err.h>
  6774. +#include "debug.h"
  6775. +#include "checksum.h"
  6776. +
  6777. +int reiser4_init_csum_tfm(struct crypto_shash **tfm)
  6778. +{
  6779. + struct crypto_shash *new_tfm;
  6780. +
  6781. + new_tfm = crypto_alloc_shash("crc32c", 0, 0);
  6782. + if (IS_ERR(new_tfm)) {
  6783. + warning("intelfx-81", "Could not load crc32c driver");
  6784. + return PTR_ERR(new_tfm);
  6785. + }
  6786. +
  6787. + *tfm = new_tfm;
  6788. + return 0;
  6789. +}
  6790. +
  6791. +void reiser4_done_csum_tfm(struct crypto_shash *tfm)
  6792. +{
  6793. + crypto_free_shash(tfm);
  6794. +}
  6795. +
  6796. +/*
  6797. + Local variables:
  6798. + c-indentation-style: "K&R"
  6799. + mode-name: "LC"
  6800. + c-basic-offset: 8
  6801. + tab-width: 8
  6802. + fill-column: 120
  6803. + scroll-step: 1
  6804. + End:
  6805. +*/
  6806. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/checksum.h linux-5.16.14/fs/reiser4/checksum.h
  6807. --- linux-5.16.14.orig/fs/reiser4/checksum.h 1970-01-01 01:00:00.000000000 +0100
  6808. +++ linux-5.16.14/fs/reiser4/checksum.h 2022-03-12 13:26:19.643892707 +0100
  6809. @@ -0,0 +1,38 @@
  6810. +#ifndef __CHECKSUM__
  6811. +#define __CHECKSUM__
  6812. +
  6813. +#include <crypto/hash.h>
  6814. +
  6815. +int reiser4_init_csum_tfm(struct crypto_shash **tfm);
  6816. +void reiser4_done_csum_tfm(struct crypto_shash *tfm);
  6817. +u32 static inline reiser4_crc32c(struct crypto_shash *tfm,
  6818. + u32 crc, const void *address,
  6819. + unsigned int length)
  6820. +{
  6821. + struct {
  6822. + struct shash_desc shash;
  6823. + char ctx[4];
  6824. + } desc;
  6825. + int err;
  6826. +
  6827. + desc.shash.tfm = tfm;
  6828. + *(u32 *)desc.ctx = crc;
  6829. +
  6830. + err = crypto_shash_update(&desc.shash, address, length);
  6831. + BUG_ON(err);
  6832. + return *(u32 *)desc.ctx;
  6833. +}
  6834. +
  6835. +#endif /* __CHECKSUM__ */
  6836. +
  6837. +/*
  6838. + Local variables:
  6839. + c-indentation-style: "K&R"
  6840. + mode-name: "LC"
  6841. + c-basic-offset: 8
  6842. + tab-width: 8
  6843. + fill-column: 120
  6844. + scroll-step: 1
  6845. + End:
  6846. +*/
  6847. +
  6848. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/context.c linux-5.16.14/fs/reiser4/context.c
  6849. --- linux-5.16.14.orig/fs/reiser4/context.c 1970-01-01 01:00:00.000000000 +0100
  6850. +++ linux-5.16.14/fs/reiser4/context.c 2022-03-12 13:26:19.643892707 +0100
  6851. @@ -0,0 +1,288 @@
  6852. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  6853. +
  6854. +/* Manipulation of reiser4_context */
  6855. +
  6856. +/*
  6857. + * global context used during system call. Variable of this type is allocated
  6858. + * on the stack at the beginning of the reiser4 part of the system call and
  6859. + * pointer to it is stored in the current->fs_context. This allows us to avoid
  6860. + * passing pointer to current transaction and current lockstack (both in
  6861. + * one-to-one mapping with threads) all over the call chain.
  6862. + *
  6863. + * It's kind of like those global variables the prof used to tell you not to
  6864. + * use in CS1, except thread specific.;-) Nikita, this was a good idea.
  6865. + *
  6866. + * In some situations it is desirable to have ability to enter reiser4_context
  6867. + * more than once for the same thread (nested contexts). For example, there
  6868. + * are some functions that can be called either directly from VFS/VM or from
  6869. + * already active reiser4 context (->writepage, for example).
  6870. + *
  6871. + * In such situations "child" context acts like dummy: all activity is
  6872. + * actually performed in the top level context, and get_current_context()
  6873. + * always returns top level context.
  6874. + * Of course, reiser4_init_context()/reiser4_done_context() have to be properly
  6875. + * nested any way.
  6876. + *
  6877. + * Note that there is an important difference between reiser4 uses
  6878. + * ->fs_context and the way other file systems use it. Other file systems
  6879. + * (ext3 and reiserfs) use ->fs_context only for the duration of _transaction_
  6880. + * (this is why ->fs_context was initially called ->journal_info). This means,
  6881. + * that when ext3 or reiserfs finds that ->fs_context is not NULL on the entry
  6882. + * to the file system, they assume that some transaction is already underway,
  6883. + * and usually bail out, because starting nested transaction would most likely
  6884. + * lead to the deadlock. This gives false positives with reiser4, because we
  6885. + * set ->fs_context before starting transaction.
  6886. + */
  6887. +
  6888. +#include "debug.h"
  6889. +#include "super.h"
  6890. +#include "context.h"
  6891. +#include "vfs_ops.h" /* for reiser4_throttle_write() */
  6892. +
  6893. +#include <linux/writeback.h> /* for current_is_pdflush() */
  6894. +#include <linux/hardirq.h>
  6895. +
  6896. +static void _reiser4_init_context(reiser4_context * context,
  6897. + struct super_block *super)
  6898. +{
  6899. + memset(context, 0, sizeof(*context));
  6900. +
  6901. + context->super = super;
  6902. + context->magic = context_magic;
  6903. + context->outer = current->journal_info;
  6904. + current->journal_info = (void *)context;
  6905. + context->nr_children = 0;
  6906. + context->gfp_mask = GFP_KERNEL;
  6907. +
  6908. + init_lock_stack(&context->stack);
  6909. +
  6910. + reiser4_txn_begin(context);
  6911. +
  6912. + /* initialize head of tap list */
  6913. + INIT_LIST_HEAD(&context->taps);
  6914. +#if REISER4_DEBUG
  6915. + context->task = current;
  6916. +#endif
  6917. + grab_space_enable();
  6918. +}
  6919. +
  6920. +/* initialize context and bind it to the current thread
  6921. +
  6922. + This function should be called at the beginning of reiser4 part of
  6923. + syscall.
  6924. +*/
  6925. +reiser4_context * reiser4_init_context(struct super_block *super)
  6926. +{
  6927. + reiser4_context *context;
  6928. +
  6929. + assert("nikita-2662", !in_interrupt() && !in_irq());
  6930. + assert("nikita-3357", super != NULL);
  6931. + assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super));
  6932. +
  6933. + context = get_current_context_check();
  6934. + if (context && context->super == super) {
  6935. + context = (reiser4_context *) current->journal_info;
  6936. + context->nr_children++;
  6937. + return context;
  6938. + }
  6939. +
  6940. + context = kmalloc(sizeof(*context), GFP_KERNEL);
  6941. + if (context == NULL)
  6942. + return ERR_PTR(RETERR(-ENOMEM));
  6943. +
  6944. + _reiser4_init_context(context, super);
  6945. + return context;
  6946. +}
  6947. +
  6948. +/* this is used in scan_mgr which is called with spinlock held and in
  6949. + reiser4_fill_super magic */
  6950. +void init_stack_context(reiser4_context *context, struct super_block *super)
  6951. +{
  6952. + assert("nikita-2662", !in_interrupt() && !in_irq());
  6953. + assert("nikita-3357", super != NULL);
  6954. + assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super));
  6955. + assert("vs-12", !is_in_reiser4_context());
  6956. +
  6957. + _reiser4_init_context(context, super);
  6958. + context->on_stack = 1;
  6959. + return;
  6960. +}
  6961. +
  6962. +/* cast lock stack embedded into reiser4 context up to its container */
  6963. +reiser4_context *get_context_by_lock_stack(lock_stack * owner)
  6964. +{
  6965. + return container_of(owner, reiser4_context, stack);
  6966. +}
  6967. +
  6968. +/* true if there is already _any_ reiser4 context for the current thread */
  6969. +int is_in_reiser4_context(void)
  6970. +{
  6971. + reiser4_context *ctx;
  6972. +
  6973. + ctx = current->journal_info;
  6974. + return ctx != NULL && ((unsigned long)ctx->magic) == context_magic;
  6975. +}
  6976. +
  6977. +/*
  6978. + * call balance dirty pages for the current context.
  6979. + *
  6980. + * File system is expected to call balance_dirty_pages_ratelimited() whenever
  6981. + * it dirties a page. reiser4 does this for unformatted nodes (that is, during
  6982. + * write---this covers vast majority of all dirty traffic), but we cannot do
  6983. + * this immediately when formatted node is dirtied, because long term lock is
  6984. + * usually held at that time. To work around this, dirtying of formatted node
  6985. + * simply increases ->nr_marked_dirty counter in the current reiser4
  6986. + * context. When we are about to leave this context,
  6987. + * balance_dirty_pages_ratelimited() is called, if necessary.
  6988. + *
  6989. + * This introduces another problem: sometimes we do not want to run
  6990. + * balance_dirty_pages_ratelimited() when leaving a context, for example
  6991. + * because some important lock (like ->i_mutex on the parent directory) is
  6992. + * held. To achieve this, ->nobalance flag can be set in the current context.
  6993. + */
  6994. +static void reiser4_throttle_write_at(reiser4_context *context)
  6995. +{
  6996. + reiser4_super_info_data *sbinfo = get_super_private(context->super);
  6997. +
  6998. + /*
  6999. + * call balance_dirty_pages_ratelimited() to process formatted nodes
  7000. + * dirtied during this system call. Do that only if we are not in mount
  7001. + * and there were nodes dirtied in this context and we are not in
  7002. + * writepage (to avoid deadlock) and not in pdflush
  7003. + */
  7004. + if (sbinfo != NULL && sbinfo->fake != NULL &&
  7005. + context->nr_marked_dirty != 0 &&
  7006. + !(current->flags & PF_MEMALLOC) &&
  7007. + !context->flush_bd_task)
  7008. + reiser4_throttle_write(sbinfo->fake);
  7009. +}
  7010. +
  7011. +/* release resources associated with context.
  7012. +
  7013. + This function should be called at the end of "session" with reiser4,
  7014. + typically just before leaving reiser4 driver back to VFS.
  7015. +
  7016. + This is good place to put some degugging consistency checks, like that
  7017. + thread released all locks and closed transcrash etc.
  7018. +
  7019. +*/
  7020. +static void reiser4_done_context(reiser4_context * context)
  7021. + /* context being released */
  7022. +{
  7023. + assert("nikita-860", context != NULL);
  7024. + assert("nikita-859", context->magic == context_magic);
  7025. + assert("vs-646", (reiser4_context *) current->journal_info == context);
  7026. + assert("zam-686", !in_interrupt() && !in_irq());
  7027. +
  7028. + /* only do anything when leaving top-level reiser4 context. All nested
  7029. + * contexts are just dummies. */
  7030. + if (context->nr_children == 0) {
  7031. + assert("jmacd-673", context->trans == NULL);
  7032. + assert("jmacd-1002", lock_stack_isclean(&context->stack));
  7033. + assert("nikita-1936", reiser4_no_counters_are_held());
  7034. + assert("nikita-2626", list_empty_careful(reiser4_taps_list()));
  7035. + assert("zam-1004", ergo(get_super_private(context->super),
  7036. + get_super_private(context->super)->delete_mutex_owner !=
  7037. + current));
  7038. +
  7039. + /* release all grabbed but as yet unused blocks */
  7040. + if (context->grabbed_blocks != 0)
  7041. + all_grabbed2free();
  7042. +
  7043. + /*
  7044. + * synchronize against longterm_unlock_znode():
  7045. + * wake_up_requestor() wakes up requestors without holding
  7046. + * zlock (otherwise they will immediately bump into that lock
  7047. + * after wake up on another CPU). To work around (rare)
  7048. + * situation where requestor has been woken up asynchronously
  7049. + * and managed to run until completion (and destroy its
  7050. + * context and lock stack) before wake_up_requestor() called
  7051. + * wake_up() on it, wake_up_requestor() synchronize on lock
  7052. + * stack spin lock. It has actually been observed that spin
  7053. + * lock _was_ locked at this point, because
  7054. + * wake_up_requestor() took interrupt.
  7055. + */
  7056. + spin_lock_stack(&context->stack);
  7057. + spin_unlock_stack(&context->stack);
  7058. +
  7059. + assert("zam-684", context->nr_children == 0);
  7060. + /* restore original ->fs_context value */
  7061. + current->journal_info = context->outer;
  7062. + if (context->on_stack == 0)
  7063. + kfree(context);
  7064. + } else {
  7065. + context->nr_children--;
  7066. +#if REISER4_DEBUG
  7067. + assert("zam-685", context->nr_children >= 0);
  7068. +#endif
  7069. + }
  7070. +}
  7071. +
  7072. +/*
  7073. + * exit reiser4 context. Call balance_dirty_pages_at() if necessary. Close
  7074. + * transaction. Call done_context() to do context related book-keeping.
  7075. + */
  7076. +void reiser4_exit_context(reiser4_context * context)
  7077. +{
  7078. + assert("nikita-3021", reiser4_schedulable());
  7079. +
  7080. + if (context->nr_children == 0) {
  7081. + if (!context->nobalance)
  7082. + reiser4_throttle_write_at(context);
  7083. +
  7084. + /* if filesystem is mounted with -o sync or -o dirsync - commit
  7085. + transaction. FIXME: TXNH_DONT_COMMIT is used to avoid
  7086. + commiting on exit_context when inode semaphore is held and
  7087. + to have ktxnmgrd to do commit instead to get better
  7088. + concurrent filesystem accesses. But, when one mounts with -o
  7089. + sync, he cares more about reliability than about
  7090. + performance. So, for now we have this simple mount -o sync
  7091. + support. */
  7092. + if (context->super->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC)) {
  7093. + txn_atom *atom;
  7094. +
  7095. + atom = get_current_atom_locked_nocheck();
  7096. + if (atom) {
  7097. + atom->flags |= ATOM_FORCE_COMMIT;
  7098. + context->trans->flags &= ~TXNH_DONT_COMMIT;
  7099. + spin_unlock_atom(atom);
  7100. + }
  7101. + }
  7102. + reiser4_txn_end(context);
  7103. + }
  7104. + reiser4_done_context(context);
  7105. +}
  7106. +
  7107. +void reiser4_ctx_gfp_mask_set(void)
  7108. +{
  7109. + reiser4_context *ctx;
  7110. +
  7111. + ctx = get_current_context();
  7112. + if (ctx->entd == 0 &&
  7113. + list_empty(&ctx->stack.locks) &&
  7114. + ctx->trans->atom == NULL)
  7115. + ctx->gfp_mask = GFP_KERNEL;
  7116. + else
  7117. + ctx->gfp_mask = GFP_NOFS;
  7118. +}
  7119. +
  7120. +void reiser4_ctx_gfp_mask_force(gfp_t mask)
  7121. +{
  7122. + reiser4_context *ctx;
  7123. + ctx = get_current_context();
  7124. +
  7125. + assert("edward-1454", ctx != NULL);
  7126. +
  7127. + ctx->gfp_mask = mask;
  7128. +}
  7129. +
  7130. +/*
  7131. + * Local variables:
  7132. + * c-indentation-style: "K&R"
  7133. + * mode-name: "LC"
  7134. + * c-basic-offset: 8
  7135. + * tab-width: 8
  7136. + * fill-column: 120
  7137. + * scroll-step: 1
  7138. + * End:
  7139. + */
  7140. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/context.h linux-5.16.14/fs/reiser4/context.h
  7141. --- linux-5.16.14.orig/fs/reiser4/context.h 1970-01-01 01:00:00.000000000 +0100
  7142. +++ linux-5.16.14/fs/reiser4/context.h 2022-03-12 13:26:19.644892709 +0100
  7143. @@ -0,0 +1,237 @@
  7144. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  7145. + * reiser4/README */
  7146. +
  7147. +/* Reiser4 context. See context.c for details. */
  7148. +
  7149. +#if !defined( __REISER4_CONTEXT_H__ )
  7150. +#define __REISER4_CONTEXT_H__
  7151. +
  7152. +#include "forward.h"
  7153. +#include "debug.h"
  7154. +#include "dformat.h"
  7155. +#include "tap.h"
  7156. +#include "lock.h"
  7157. +
  7158. +#include <linux/types.h> /* for __u?? */
  7159. +#include <linux/fs.h> /* for struct super_block */
  7160. +#include <linux/spinlock.h>
  7161. +#include <linux/sched.h> /* for struct task_struct */
  7162. +
  7163. +/* reiser4 per-thread context */
  7164. +struct reiser4_context {
  7165. + /* magic constant. For identification of reiser4 contexts. */
  7166. + __u32 magic;
  7167. +
  7168. + /* current lock stack. See lock.[ch]. This is where list of all
  7169. + locks taken by current thread is kept. This is also used in
  7170. + deadlock detection. */
  7171. + lock_stack stack;
  7172. +
  7173. + /* current transcrash. */
  7174. + txn_handle *trans;
  7175. + /* transaction handle embedded into reiser4_context. ->trans points
  7176. + * here by default. */
  7177. + txn_handle trans_in_ctx;
  7178. +
  7179. + /* super block we are working with. To get the current tree
  7180. + use &get_super_private (reiser4_get_current_sb ())->tree. */
  7181. + struct super_block *super;
  7182. +
  7183. + /* parent fs activation */
  7184. + struct fs_activation *outer;
  7185. +
  7186. + /* per-thread grabbed (for further allocation) blocks counter */
  7187. + reiser4_block_nr grabbed_blocks;
  7188. +
  7189. + /* list of taps currently monitored. See tap.c */
  7190. + struct list_head taps;
  7191. +
  7192. + /* grabbing space is enabled */
  7193. + unsigned int grab_enabled:1;
  7194. + /* should be set when we are write dirty nodes to disk in jnode_flush or
  7195. + * reiser4_write_logs() */
  7196. + unsigned int writeout_mode:1;
  7197. + /* true, if current thread is an ent thread */
  7198. + unsigned int entd:1;
  7199. + /* true, if balance_dirty_pages() should not be run when leaving this
  7200. + * context. This is used to avoid lengthly balance_dirty_pages()
  7201. + * operation when holding some important resource, like directory
  7202. + * ->i_mutex */
  7203. + unsigned int nobalance:1;
  7204. +
  7205. + /* this bit is used on reiser4_done_context to decide whether context is
  7206. + kmalloc-ed and has to be kfree-ed */
  7207. + unsigned int on_stack:1;
  7208. + /* file system is read-only */
  7209. + unsigned int ro:1;
  7210. + /* replacement of PF_FLUSHER */
  7211. + unsigned int flush_bd_task:1;
  7212. +
  7213. + /* count non-trivial jnode_set_dirty() calls */
  7214. + unsigned long nr_marked_dirty;
  7215. + /*
  7216. + * reiser4_writeback_inodes calls (via generic_writeback_sb_inodes)
  7217. + * reiser4_writepages_dispatch for each of dirty inodes.
  7218. + * Reiser4_writepages_dispatch captures pages. When number of pages
  7219. + * captured in one reiser4_writeback_inodes reaches some threshold -
  7220. + * some atoms get flushed
  7221. + */
  7222. + int nr_captured;
  7223. + int nr_children; /* number of child contexts */
  7224. + struct page *locked_page; /* page that should be unlocked in
  7225. + * reiser4_dirty_inode() before taking
  7226. + * a longterm lock (to not violate
  7227. + * reiser4 lock ordering) */
  7228. +#if REISER4_DEBUG
  7229. + /* debugging information about reiser4 locks held by the current
  7230. + * thread */
  7231. + reiser4_lock_cnt_info locks;
  7232. + struct task_struct *task; /* so we can easily find owner of the stack */
  7233. +
  7234. + /*
  7235. + * disk space grabbing debugging support
  7236. + */
  7237. + /* how many disk blocks were grabbed by the first call to
  7238. + * reiser4_grab_space() in this context */
  7239. + reiser4_block_nr grabbed_initially;
  7240. +
  7241. + /* list of all threads doing flush currently */
  7242. + struct list_head flushers_link;
  7243. + /* information about last error encountered by reiser4 */
  7244. + err_site err;
  7245. +#endif
  7246. + void *vp;
  7247. + gfp_t gfp_mask;
  7248. +};
  7249. +
  7250. +extern reiser4_context *get_context_by_lock_stack(lock_stack *);
  7251. +
  7252. +/* Debugging helps. */
  7253. +#if REISER4_DEBUG
  7254. +extern void print_contexts(void);
  7255. +#endif
  7256. +
  7257. +#define current_tree (&(get_super_private(reiser4_get_current_sb())->tree))
  7258. +#define current_blocksize reiser4_get_current_sb()->s_blocksize
  7259. +#define current_blocksize_bits reiser4_get_current_sb()->s_blocksize_bits
  7260. +
  7261. +extern reiser4_context *reiser4_init_context(struct super_block *);
  7262. +extern void init_stack_context(reiser4_context *, struct super_block *);
  7263. +extern void reiser4_exit_context(reiser4_context *);
  7264. +
  7265. +/* magic constant we store in reiser4_context allocated at the stack. Used to
  7266. + catch accesses to staled or uninitialized contexts. */
  7267. +#define context_magic ((__u32) 0x4b1b5d0b)
  7268. +
  7269. +extern int is_in_reiser4_context(void);
  7270. +
  7271. +/*
  7272. + * return reiser4_context for the thread @tsk
  7273. + */
  7274. +static inline reiser4_context *get_context(const struct task_struct *tsk)
  7275. +{
  7276. + assert("vs-1682",
  7277. + ((reiser4_context *) tsk->journal_info)->magic == context_magic);
  7278. + return (reiser4_context *) tsk->journal_info;
  7279. +}
  7280. +
  7281. +/*
  7282. + * return reiser4 context of the current thread, or NULL if there is none.
  7283. + */
  7284. +static inline reiser4_context *get_current_context_check(void)
  7285. +{
  7286. + if (is_in_reiser4_context())
  7287. + return get_context(current);
  7288. + else
  7289. + return NULL;
  7290. +}
  7291. +
  7292. +static inline reiser4_context *get_current_context(void); /* __attribute__((const)); */
  7293. +
  7294. +/* return context associated with current thread */
  7295. +static inline reiser4_context *get_current_context(void)
  7296. +{
  7297. + return get_context(current);
  7298. +}
  7299. +
  7300. +static inline gfp_t reiser4_ctx_gfp_mask_get(void)
  7301. +{
  7302. + reiser4_context *ctx;
  7303. +
  7304. + ctx = get_current_context_check();
  7305. + return (ctx == NULL) ? GFP_KERNEL : ctx->gfp_mask;
  7306. +}
  7307. +
  7308. +void reiser4_ctx_gfp_mask_set(void);
  7309. +void reiser4_ctx_gfp_mask_force (gfp_t mask);
  7310. +
  7311. +/*
  7312. + * true if current thread is in the write-out mode. Thread enters write-out
  7313. + * mode during jnode_flush and reiser4_write_logs().
  7314. + */
  7315. +static inline int is_writeout_mode(void)
  7316. +{
  7317. + return get_current_context()->writeout_mode;
  7318. +}
  7319. +
  7320. +/*
  7321. + * enter write-out mode
  7322. + */
  7323. +static inline void writeout_mode_enable(void)
  7324. +{
  7325. + assert("zam-941", !get_current_context()->writeout_mode);
  7326. + get_current_context()->writeout_mode = 1;
  7327. +}
  7328. +
  7329. +/*
  7330. + * leave write-out mode
  7331. + */
  7332. +static inline void writeout_mode_disable(void)
  7333. +{
  7334. + assert("zam-942", get_current_context()->writeout_mode);
  7335. + get_current_context()->writeout_mode = 0;
  7336. +}
  7337. +
  7338. +static inline void grab_space_enable(void)
  7339. +{
  7340. + get_current_context()->grab_enabled = 1;
  7341. +}
  7342. +
  7343. +static inline void grab_space_disable(void)
  7344. +{
  7345. + get_current_context()->grab_enabled = 0;
  7346. +}
  7347. +
  7348. +static inline void grab_space_set_enabled(int enabled)
  7349. +{
  7350. + get_current_context()->grab_enabled = enabled;
  7351. +}
  7352. +
  7353. +static inline int is_grab_enabled(reiser4_context * ctx)
  7354. +{
  7355. + return ctx->grab_enabled;
  7356. +}
  7357. +
  7358. +/* mark transaction handle in @ctx as TXNH_DONT_COMMIT, so that no commit or
  7359. + * flush would be performed when it is closed. This is necessary when handle
  7360. + * has to be closed under some coarse semaphore, like i_mutex of
  7361. + * directory. Commit will be performed by ktxnmgrd. */
  7362. +static inline void context_set_commit_async(reiser4_context * context)
  7363. +{
  7364. + context->nobalance = 1;
  7365. + context->trans->flags |= TXNH_DONT_COMMIT;
  7366. +}
  7367. +
  7368. +/* __REISER4_CONTEXT_H__ */
  7369. +#endif
  7370. +
  7371. +/* Make Linus happy.
  7372. + Local variables:
  7373. + c-indentation-style: "K&R"
  7374. + mode-name: "LC"
  7375. + c-basic-offset: 8
  7376. + tab-width: 8
  7377. + fill-column: 120
  7378. + scroll-step: 1
  7379. + End:
  7380. +*/
  7381. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/coord.c linux-5.16.14/fs/reiser4/coord.c
  7382. --- linux-5.16.14.orig/fs/reiser4/coord.c 1970-01-01 01:00:00.000000000 +0100
  7383. +++ linux-5.16.14/fs/reiser4/coord.c 2022-03-12 13:26:19.644892709 +0100
  7384. @@ -0,0 +1,928 @@
  7385. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  7386. + reiser4/README */
  7387. +
  7388. +#include "forward.h"
  7389. +#include "debug.h"
  7390. +#include "dformat.h"
  7391. +#include "tree.h"
  7392. +#include "plugin/item/item.h"
  7393. +#include "znode.h"
  7394. +#include "coord.h"
  7395. +
  7396. +/* Internal constructor. */
  7397. +static inline void
  7398. +coord_init_values(coord_t *coord, const znode * node, pos_in_node_t item_pos,
  7399. + pos_in_node_t unit_pos, between_enum between)
  7400. +{
  7401. + coord->node = (znode *) node;
  7402. + coord_set_item_pos(coord, item_pos);
  7403. + coord->unit_pos = unit_pos;
  7404. + coord->between = between;
  7405. + ON_DEBUG(coord->plug_v = 0);
  7406. + ON_DEBUG(coord->body_v = 0);
  7407. +
  7408. + /*ON_TRACE (TRACE_COORDS, "init coord %p node %p: %u %u %s\n", coord,
  7409. + node, item_pos, unit_pos, coord_tween_tostring (between)); */
  7410. +}
  7411. +
  7412. +/* after shifting of node content, coord previously set properly may become
  7413. + invalid, try to "normalize" it. */
  7414. +void coord_normalize(coord_t *coord)
  7415. +{
  7416. + znode *node;
  7417. +
  7418. + node = coord->node;
  7419. + assert("vs-683", node);
  7420. +
  7421. + coord_clear_iplug(coord);
  7422. +
  7423. + if (node_is_empty(node)) {
  7424. + coord_init_first_unit(coord, node);
  7425. + } else if ((coord->between == AFTER_ITEM)
  7426. + || (coord->between == AFTER_UNIT)) {
  7427. + return;
  7428. + } else if (coord->item_pos == coord_num_items(coord)
  7429. + && coord->between == BEFORE_ITEM) {
  7430. + coord_dec_item_pos(coord);
  7431. + coord->between = AFTER_ITEM;
  7432. + } else if (coord->unit_pos == coord_num_units(coord)
  7433. + && coord->between == BEFORE_UNIT) {
  7434. + coord->unit_pos--;
  7435. + coord->between = AFTER_UNIT;
  7436. + } else if (coord->item_pos == coord_num_items(coord)
  7437. + && coord->unit_pos == 0 && coord->between == BEFORE_UNIT) {
  7438. + coord_dec_item_pos(coord);
  7439. + coord->unit_pos = 0;
  7440. + coord->between = AFTER_ITEM;
  7441. + }
  7442. +}
  7443. +
  7444. +/* Copy a coordinate. */
  7445. +void coord_dup(coord_t *coord, const coord_t *old_coord)
  7446. +{
  7447. + assert("jmacd-9800", coord_check(old_coord));
  7448. + coord_dup_nocheck(coord, old_coord);
  7449. +}
  7450. +
  7451. +/* Copy a coordinate without check. Useful when old_coord->node is not
  7452. + loaded. As in cbk_tree_lookup -> connect_znode -> connect_one_side */
  7453. +void coord_dup_nocheck(coord_t *coord, const coord_t *old_coord)
  7454. +{
  7455. + coord->node = old_coord->node;
  7456. + coord_set_item_pos(coord, old_coord->item_pos);
  7457. + coord->unit_pos = old_coord->unit_pos;
  7458. + coord->between = old_coord->between;
  7459. + coord->iplugid = old_coord->iplugid;
  7460. + ON_DEBUG(coord->plug_v = old_coord->plug_v);
  7461. + ON_DEBUG(coord->body_v = old_coord->body_v);
  7462. +}
  7463. +
  7464. +/* Initialize an invalid coordinate. */
  7465. +void coord_init_invalid(coord_t *coord, const znode * node)
  7466. +{
  7467. + coord_init_values(coord, node, 0, 0, INVALID_COORD);
  7468. +}
  7469. +
  7470. +void coord_init_first_unit_nocheck(coord_t *coord, const znode * node)
  7471. +{
  7472. + coord_init_values(coord, node, 0, 0, AT_UNIT);
  7473. +}
  7474. +
  7475. +/* Initialize a coordinate to point at the first unit of the first item. If the
  7476. + node is empty, it is positioned at the EMPTY_NODE. */
  7477. +void coord_init_first_unit(coord_t *coord, const znode * node)
  7478. +{
  7479. + int is_empty = node_is_empty(node);
  7480. +
  7481. + coord_init_values(coord, node, 0, 0, (is_empty ? EMPTY_NODE : AT_UNIT));
  7482. +
  7483. + assert("jmacd-9801", coord_check(coord));
  7484. +}
  7485. +
  7486. +/* Initialize a coordinate to point at the last unit of the last item. If the
  7487. + node is empty, it is positioned at the EMPTY_NODE. */
  7488. +void coord_init_last_unit(coord_t *coord, const znode * node)
  7489. +{
  7490. + int is_empty = node_is_empty(node);
  7491. +
  7492. + coord_init_values(coord, node,
  7493. + (is_empty ? 0 : node_num_items(node) - 1), 0,
  7494. + (is_empty ? EMPTY_NODE : AT_UNIT));
  7495. + if (!is_empty)
  7496. + coord->unit_pos = coord_last_unit_pos(coord);
  7497. + assert("jmacd-9802", coord_check(coord));
  7498. +}
  7499. +
  7500. +/* Initialize a coordinate to before the first item. If the node is empty, it is
  7501. + positioned at the EMPTY_NODE. */
  7502. +void coord_init_before_first_item(coord_t *coord, const znode * node)
  7503. +{
  7504. + int is_empty = node_is_empty(node);
  7505. +
  7506. + coord_init_values(coord, node, 0, 0,
  7507. + (is_empty ? EMPTY_NODE : BEFORE_UNIT));
  7508. +
  7509. + assert("jmacd-9803", coord_check(coord));
  7510. +}
  7511. +
  7512. +/* Initialize a coordinate to after the last item. If the node is empty, it is
  7513. + positioned at the EMPTY_NODE. */
  7514. +void coord_init_after_last_item(coord_t *coord, const znode * node)
  7515. +{
  7516. + int is_empty = node_is_empty(node);
  7517. +
  7518. + coord_init_values(coord, node,
  7519. + (is_empty ? 0 : node_num_items(node) - 1), 0,
  7520. + (is_empty ? EMPTY_NODE : AFTER_ITEM));
  7521. +
  7522. + assert("jmacd-9804", coord_check(coord));
  7523. +}
  7524. +
  7525. +/* Initialize a coordinate to after last unit in the item. Coord must be set
  7526. + already to existing item */
  7527. +void coord_init_after_item_end(coord_t *coord)
  7528. +{
  7529. + coord->between = AFTER_UNIT;
  7530. + coord->unit_pos = coord_last_unit_pos(coord);
  7531. +}
  7532. +
  7533. +/* Initialize a coordinate to before the item. Coord must be set already to
  7534. + existing item */
  7535. +void coord_init_before_item(coord_t *coord)
  7536. +{
  7537. + coord->unit_pos = 0;
  7538. + coord->between = BEFORE_ITEM;
  7539. +}
  7540. +
  7541. +/* Initialize a coordinate to after the item. Coord must be set already to
  7542. + existing item */
  7543. +void coord_init_after_item(coord_t *coord)
  7544. +{
  7545. + coord->unit_pos = 0;
  7546. + coord->between = AFTER_ITEM;
  7547. +}
  7548. +
  7549. +/* Initialize a coordinate by 0s. Used in places where init_coord was used and
  7550. + it was not clear how actually */
  7551. +void coord_init_zero(coord_t *coord)
  7552. +{
  7553. + memset(coord, 0, sizeof(*coord));
  7554. +}
  7555. +
  7556. +/* Return the number of units at the present item.
  7557. + Asserts coord_is_existing_item(). */
  7558. +unsigned coord_num_units(const coord_t *coord)
  7559. +{
  7560. + assert("jmacd-9806", coord_is_existing_item(coord));
  7561. +
  7562. + return item_plugin_by_coord(coord)->b.nr_units(coord);
  7563. +}
  7564. +
  7565. +/* Returns true if the coord was initializewd by coord_init_invalid (). */
  7566. +/* Audited by: green(2002.06.15) */
  7567. +int coord_is_invalid(const coord_t *coord)
  7568. +{
  7569. + return coord->between == INVALID_COORD;
  7570. +}
  7571. +
  7572. +/* Returns true if the coordinate is positioned at an existing item, not before
  7573. + or after an item. It may be placed at, before, or after any unit within the
  7574. + item, whether existing or not. */
  7575. +int coord_is_existing_item(const coord_t *coord)
  7576. +{
  7577. + switch (coord->between) {
  7578. + case EMPTY_NODE:
  7579. + case BEFORE_ITEM:
  7580. + case AFTER_ITEM:
  7581. + case INVALID_COORD:
  7582. + return 0;
  7583. +
  7584. + case BEFORE_UNIT:
  7585. + case AT_UNIT:
  7586. + case AFTER_UNIT:
  7587. + return coord->item_pos < coord_num_items(coord);
  7588. + }
  7589. +
  7590. + impossible("jmacd-9900", "unreachable coord: %p", coord);
  7591. + return 0;
  7592. +}
  7593. +
  7594. +/* Returns true if the coordinate is positioned at an existing unit, not before
  7595. + or after a unit. */
  7596. +/* Audited by: green(2002.06.15) */
  7597. +int coord_is_existing_unit(const coord_t *coord)
  7598. +{
  7599. + switch (coord->between) {
  7600. + case EMPTY_NODE:
  7601. + case BEFORE_UNIT:
  7602. + case AFTER_UNIT:
  7603. + case BEFORE_ITEM:
  7604. + case AFTER_ITEM:
  7605. + case INVALID_COORD:
  7606. + return 0;
  7607. +
  7608. + case AT_UNIT:
  7609. + return (coord->item_pos < coord_num_items(coord)
  7610. + && coord->unit_pos < coord_num_units(coord));
  7611. + }
  7612. +
  7613. + impossible("jmacd-9902", "unreachable");
  7614. + return 0;
  7615. +}
  7616. +
  7617. +/* Returns true if the coordinate is positioned at the first unit of the first
  7618. + item. Not true for empty nodes nor coordinates positioned before the first
  7619. + item. */
  7620. +/* Audited by: green(2002.06.15) */
  7621. +int coord_is_leftmost_unit(const coord_t *coord)
  7622. +{
  7623. + return (coord->between == AT_UNIT && coord->item_pos == 0
  7624. + && coord->unit_pos == 0);
  7625. +}
  7626. +
  7627. +#if REISER4_DEBUG
  7628. +/* For assertions only, checks for a valid coordinate. */
  7629. +int coord_check(const coord_t *coord)
  7630. +{
  7631. + if (coord->node == NULL)
  7632. + return 0;
  7633. + if (znode_above_root(coord->node))
  7634. + return 1;
  7635. +
  7636. + switch (coord->between) {
  7637. + default:
  7638. + case INVALID_COORD:
  7639. + return 0;
  7640. + case EMPTY_NODE:
  7641. + if (!node_is_empty(coord->node))
  7642. + return 0;
  7643. + return coord->item_pos == 0 && coord->unit_pos == 0;
  7644. +
  7645. + case BEFORE_UNIT:
  7646. + case AFTER_UNIT:
  7647. + if (node_is_empty(coord->node) && (coord->item_pos == 0)
  7648. + && (coord->unit_pos == 0))
  7649. + return 1;
  7650. + case AT_UNIT:
  7651. + break;
  7652. + case AFTER_ITEM:
  7653. + case BEFORE_ITEM:
  7654. + /* before/after item should not set unit_pos. */
  7655. + if (coord->unit_pos != 0)
  7656. + return 0;
  7657. + break;
  7658. + }
  7659. +
  7660. + if (coord->item_pos >= node_num_items(coord->node))
  7661. + return 0;
  7662. +
  7663. + /* FIXME-VS: we are going to check unit_pos. This makes no sense when
  7664. + between is set either AFTER_ITEM or BEFORE_ITEM */
  7665. + if (coord->between == AFTER_ITEM || coord->between == BEFORE_ITEM)
  7666. + return 1;
  7667. +
  7668. + if (coord_is_iplug_set(coord) &&
  7669. + coord->unit_pos >
  7670. + item_plugin_by_coord(coord)->b.nr_units(coord) - 1)
  7671. + return 0;
  7672. + return 1;
  7673. +}
  7674. +#endif
  7675. +
  7676. +/* Adjust coordinate boundaries based on the number of items prior to
  7677. + coord_next/prev. Returns 1 if the new position is does not exist. */
  7678. +static int coord_adjust_items(coord_t *coord, unsigned items, int is_next)
  7679. +{
  7680. + /* If the node is invalid, leave it. */
  7681. + if (coord->between == INVALID_COORD)
  7682. + return 1;
  7683. +
  7684. + /* If the node is empty, set it appropriately. */
  7685. + if (items == 0) {
  7686. + coord->between = EMPTY_NODE;
  7687. + coord_set_item_pos(coord, 0);
  7688. + coord->unit_pos = 0;
  7689. + return 1;
  7690. + }
  7691. +
  7692. + /* If it was empty and it no longer is, set to BEFORE/AFTER_ITEM. */
  7693. + if (coord->between == EMPTY_NODE) {
  7694. + coord->between = (is_next ? BEFORE_ITEM : AFTER_ITEM);
  7695. + coord_set_item_pos(coord, 0);
  7696. + coord->unit_pos = 0;
  7697. + return 0;
  7698. + }
  7699. +
  7700. + /* If the item_pos is out-of-range, set it appropriatly. */
  7701. + if (coord->item_pos >= items) {
  7702. + coord->between = AFTER_ITEM;
  7703. + coord_set_item_pos(coord, items - 1);
  7704. + coord->unit_pos = 0;
  7705. + /* If is_next, return 1 (can't go any further). */
  7706. + return is_next;
  7707. + }
  7708. +
  7709. + return 0;
  7710. +}
  7711. +
  7712. +/* Advances the coordinate by one unit to the right. If empty, no change. If
  7713. + coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new
  7714. + position is an existing unit. */
  7715. +int coord_next_unit(coord_t *coord)
  7716. +{
  7717. + unsigned items = coord_num_items(coord);
  7718. +
  7719. + if (coord_adjust_items(coord, items, 1) == 1)
  7720. + return 1;
  7721. +
  7722. + switch (coord->between) {
  7723. + case BEFORE_UNIT:
  7724. + /* Now it is positioned at the same unit. */
  7725. + coord->between = AT_UNIT;
  7726. + return 0;
  7727. +
  7728. + case AFTER_UNIT:
  7729. + case AT_UNIT:
  7730. + /* If it was at or after a unit and there are more units in this
  7731. + item, advance to the next one. */
  7732. + if (coord->unit_pos < coord_last_unit_pos(coord)) {
  7733. + coord->unit_pos += 1;
  7734. + coord->between = AT_UNIT;
  7735. + return 0;
  7736. + }
  7737. +
  7738. + /* Otherwise, it is crossing an item boundary and treated as if
  7739. + it was after the current item. */
  7740. + coord->between = AFTER_ITEM;
  7741. + coord->unit_pos = 0;
  7742. + fallthrough;
  7743. +
  7744. + case AFTER_ITEM:
  7745. + /* Check for end-of-node. */
  7746. + if (coord->item_pos == items - 1)
  7747. + return 1;
  7748. +
  7749. + coord_inc_item_pos(coord);
  7750. + coord->unit_pos = 0;
  7751. + coord->between = AT_UNIT;
  7752. + return 0;
  7753. +
  7754. + case BEFORE_ITEM:
  7755. + /* The adjust_items checks ensure that we are valid here. */
  7756. + coord->unit_pos = 0;
  7757. + coord->between = AT_UNIT;
  7758. + return 0;
  7759. +
  7760. + case INVALID_COORD:
  7761. + case EMPTY_NODE:
  7762. + /* Handled in coord_adjust_items(). */
  7763. + break;
  7764. + }
  7765. +
  7766. + impossible("jmacd-9902", "unreachable");
  7767. + return 0;
  7768. +}
  7769. +
  7770. +/* Advances the coordinate by one item to the right. If empty, no change. If
  7771. + coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new
  7772. + position is an existing item. */
  7773. +int coord_next_item(coord_t *coord)
  7774. +{
  7775. + unsigned items = coord_num_items(coord);
  7776. +
  7777. + if (coord_adjust_items(coord, items, 1) == 1)
  7778. + return 1;
  7779. +
  7780. + switch (coord->between) {
  7781. + case AFTER_UNIT:
  7782. + case AT_UNIT:
  7783. + case BEFORE_UNIT:
  7784. + case AFTER_ITEM:
  7785. + /* Check for end-of-node. */
  7786. + if (coord->item_pos == items - 1) {
  7787. + coord->between = AFTER_ITEM;
  7788. + coord->unit_pos = 0;
  7789. + coord_clear_iplug(coord);
  7790. + return 1;
  7791. + }
  7792. +
  7793. + /* Anywhere in an item, go to the next one. */
  7794. + coord->between = AT_UNIT;
  7795. + coord_inc_item_pos(coord);
  7796. + coord->unit_pos = 0;
  7797. + return 0;
  7798. +
  7799. + case BEFORE_ITEM:
  7800. + /* The out-of-range check ensures that we are valid here. */
  7801. + coord->unit_pos = 0;
  7802. + coord->between = AT_UNIT;
  7803. + return 0;
  7804. + case INVALID_COORD:
  7805. + case EMPTY_NODE:
  7806. + /* Handled in coord_adjust_items(). */
  7807. + break;
  7808. + }
  7809. +
  7810. + impossible("jmacd-9903", "unreachable");
  7811. + return 0;
  7812. +}
  7813. +
  7814. +/* Advances the coordinate by one unit to the left. If empty, no change. If
  7815. + coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new
  7816. + position is an existing unit. */
  7817. +int coord_prev_unit(coord_t *coord)
  7818. +{
  7819. + unsigned items = coord_num_items(coord);
  7820. +
  7821. + if (coord_adjust_items(coord, items, 0) == 1)
  7822. + return 1;
  7823. +
  7824. + switch (coord->between) {
  7825. + case AT_UNIT:
  7826. + case BEFORE_UNIT:
  7827. + if (coord->unit_pos > 0) {
  7828. + coord->unit_pos -= 1;
  7829. + coord->between = AT_UNIT;
  7830. + return 0;
  7831. + }
  7832. +
  7833. + if (coord->item_pos == 0) {
  7834. + coord->between = BEFORE_ITEM;
  7835. + return 1;
  7836. + }
  7837. +
  7838. + coord_dec_item_pos(coord);
  7839. + coord->unit_pos = coord_last_unit_pos(coord);
  7840. + coord->between = AT_UNIT;
  7841. + return 0;
  7842. +
  7843. + case AFTER_UNIT:
  7844. + /* What if unit_pos is out-of-range? */
  7845. + assert("jmacd-5442",
  7846. + coord->unit_pos <= coord_last_unit_pos(coord));
  7847. + coord->between = AT_UNIT;
  7848. + return 0;
  7849. +
  7850. + case BEFORE_ITEM:
  7851. + if (coord->item_pos == 0)
  7852. + return 1;
  7853. +
  7854. + coord_dec_item_pos(coord);
  7855. + fallthrough;
  7856. +
  7857. + case AFTER_ITEM:
  7858. + coord->between = AT_UNIT;
  7859. + coord->unit_pos = coord_last_unit_pos(coord);
  7860. + return 0;
  7861. +
  7862. + case INVALID_COORD:
  7863. + case EMPTY_NODE:
  7864. + break;
  7865. + }
  7866. +
  7867. + impossible("jmacd-9904", "unreachable");
  7868. + return 0;
  7869. +}
  7870. +
  7871. +/* Advances the coordinate by one item to the left. If empty, no change. If
  7872. + coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new
  7873. + position is an existing item. */
  7874. +int coord_prev_item(coord_t *coord)
  7875. +{
  7876. + unsigned items = coord_num_items(coord);
  7877. +
  7878. + if (coord_adjust_items(coord, items, 0) == 1)
  7879. + return 1;
  7880. +
  7881. + switch (coord->between) {
  7882. + case AT_UNIT:
  7883. + case AFTER_UNIT:
  7884. + case BEFORE_UNIT:
  7885. + case BEFORE_ITEM:
  7886. +
  7887. + if (coord->item_pos == 0) {
  7888. + coord->between = BEFORE_ITEM;
  7889. + coord->unit_pos = 0;
  7890. + return 1;
  7891. + }
  7892. +
  7893. + coord_dec_item_pos(coord);
  7894. + coord->unit_pos = 0;
  7895. + coord->between = AT_UNIT;
  7896. + return 0;
  7897. +
  7898. + case AFTER_ITEM:
  7899. + coord->between = AT_UNIT;
  7900. + coord->unit_pos = 0;
  7901. + return 0;
  7902. +
  7903. + case INVALID_COORD:
  7904. + case EMPTY_NODE:
  7905. + break;
  7906. + }
  7907. +
  7908. + impossible("jmacd-9905", "unreachable");
  7909. + return 0;
  7910. +}
  7911. +
  7912. +/* Calls either coord_init_first_unit or coord_init_last_unit depending on
  7913. + sideof argument. */
  7914. +void coord_init_sideof_unit(coord_t *coord, const znode * node, sideof dir)
  7915. +{
  7916. + assert("jmacd-9821", dir == LEFT_SIDE || dir == RIGHT_SIDE);
  7917. + if (dir == LEFT_SIDE) {
  7918. + coord_init_first_unit(coord, node);
  7919. + } else {
  7920. + coord_init_last_unit(coord, node);
  7921. + }
  7922. +}
  7923. +
  7924. +/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending
  7925. + on sideof argument. */
  7926. +/* Audited by: green(2002.06.15) */
  7927. +int coord_is_after_sideof_unit(coord_t *coord, sideof dir)
  7928. +{
  7929. + assert("jmacd-9822", dir == LEFT_SIDE || dir == RIGHT_SIDE);
  7930. + if (dir == LEFT_SIDE) {
  7931. + return coord_is_before_leftmost(coord);
  7932. + } else {
  7933. + return coord_is_after_rightmost(coord);
  7934. + }
  7935. +}
  7936. +
  7937. +/* Calls either coord_next_unit or coord_prev_unit depending on sideof argument.
  7938. + */
  7939. +/* Audited by: green(2002.06.15) */
  7940. +int coord_sideof_unit(coord_t *coord, sideof dir)
  7941. +{
  7942. + assert("jmacd-9823", dir == LEFT_SIDE || dir == RIGHT_SIDE);
  7943. + if (dir == LEFT_SIDE) {
  7944. + return coord_prev_unit(coord);
  7945. + } else {
  7946. + return coord_next_unit(coord);
  7947. + }
  7948. +}
  7949. +
  7950. +#if REISER4_DEBUG
  7951. +int coords_equal(const coord_t *c1, const coord_t *c2)
  7952. +{
  7953. + assert("nikita-2840", c1 != NULL);
  7954. + assert("nikita-2841", c2 != NULL);
  7955. +
  7956. + return
  7957. + c1->node == c2->node &&
  7958. + c1->item_pos == c2->item_pos &&
  7959. + c1->unit_pos == c2->unit_pos && c1->between == c2->between;
  7960. +}
  7961. +#endif /* REISER4_DEBUG */
  7962. +
  7963. +/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if
  7964. + coord_is_after_leftmost return NCOORD_ON_THE_LEFT, otherwise return
  7965. + NCOORD_INSIDE. */
  7966. +/* Audited by: green(2002.06.15) */
  7967. +coord_wrt_node coord_wrt(const coord_t *coord)
  7968. +{
  7969. + if (coord_is_before_leftmost(coord))
  7970. + return COORD_ON_THE_LEFT;
  7971. +
  7972. + if (coord_is_after_rightmost(coord))
  7973. + return COORD_ON_THE_RIGHT;
  7974. +
  7975. + return COORD_INSIDE;
  7976. +}
  7977. +
  7978. +/* Returns true if the coordinate is positioned after the last item or after the
  7979. + last unit of the last item or it is an empty node. */
  7980. +/* Audited by: green(2002.06.15) */
  7981. +int coord_is_after_rightmost(const coord_t *coord)
  7982. +{
  7983. + assert("jmacd-7313", coord_check(coord));
  7984. +
  7985. + switch (coord->between) {
  7986. + case INVALID_COORD:
  7987. + case AT_UNIT:
  7988. + case BEFORE_UNIT:
  7989. + case BEFORE_ITEM:
  7990. + return 0;
  7991. +
  7992. + case EMPTY_NODE:
  7993. + return 1;
  7994. +
  7995. + case AFTER_ITEM:
  7996. + return (coord->item_pos == node_num_items(coord->node) - 1);
  7997. +
  7998. + case AFTER_UNIT:
  7999. + return ((coord->item_pos == node_num_items(coord->node) - 1) &&
  8000. + coord->unit_pos == coord_last_unit_pos(coord));
  8001. + }
  8002. +
  8003. + impossible("jmacd-9908", "unreachable");
  8004. + return 0;
  8005. +}
  8006. +
  8007. +/* Returns true if the coordinate is positioned before the first item or it is
  8008. + an empty node. */
  8009. +int coord_is_before_leftmost(const coord_t *coord)
  8010. +{
  8011. + /* FIXME-VS: coord_check requires node to be loaded whereas it is not
  8012. + necessary to check if coord is set before leftmost
  8013. + assert ("jmacd-7313", coord_check (coord)); */
  8014. + switch (coord->between) {
  8015. + case INVALID_COORD:
  8016. + case AT_UNIT:
  8017. + case AFTER_ITEM:
  8018. + case AFTER_UNIT:
  8019. + return 0;
  8020. +
  8021. + case EMPTY_NODE:
  8022. + return 1;
  8023. +
  8024. + case BEFORE_ITEM:
  8025. + case BEFORE_UNIT:
  8026. + return (coord->item_pos == 0) && (coord->unit_pos == 0);
  8027. + }
  8028. +
  8029. + impossible("jmacd-9908", "unreachable");
  8030. + return 0;
  8031. +}
  8032. +
  8033. +/* Returns true if the coordinate is positioned after a item, before a item,
  8034. + after the last unit of an item, before the first unit of an item, or at an
  8035. + empty node. */
  8036. +/* Audited by: green(2002.06.15) */
  8037. +int coord_is_between_items(const coord_t *coord)
  8038. +{
  8039. + assert("jmacd-7313", coord_check(coord));
  8040. +
  8041. + switch (coord->between) {
  8042. + case INVALID_COORD:
  8043. + case AT_UNIT:
  8044. + return 0;
  8045. +
  8046. + case AFTER_ITEM:
  8047. + case BEFORE_ITEM:
  8048. + case EMPTY_NODE:
  8049. + return 1;
  8050. +
  8051. + case BEFORE_UNIT:
  8052. + return coord->unit_pos == 0;
  8053. +
  8054. + case AFTER_UNIT:
  8055. + return coord->unit_pos == coord_last_unit_pos(coord);
  8056. + }
  8057. +
  8058. + impossible("jmacd-9908", "unreachable");
  8059. + return 0;
  8060. +}
  8061. +
  8062. +#if REISER4_DEBUG
  8063. +/* Returns true if the coordinates are positioned at adjacent units, regardless
  8064. + of before-after or item boundaries. */
  8065. +int coord_are_neighbors(coord_t *c1, coord_t *c2)
  8066. +{
  8067. + coord_t *left;
  8068. + coord_t *right;
  8069. +
  8070. + assert("nikita-1241", c1 != NULL);
  8071. + assert("nikita-1242", c2 != NULL);
  8072. + assert("nikita-1243", c1->node == c2->node);
  8073. + assert("nikita-1244", coord_is_existing_unit(c1));
  8074. + assert("nikita-1245", coord_is_existing_unit(c2));
  8075. +
  8076. + left = right = NULL;
  8077. + switch (coord_compare(c1, c2)) {
  8078. + case COORD_CMP_ON_LEFT:
  8079. + left = c1;
  8080. + right = c2;
  8081. + break;
  8082. + case COORD_CMP_ON_RIGHT:
  8083. + left = c2;
  8084. + right = c1;
  8085. + break;
  8086. + case COORD_CMP_SAME:
  8087. + return 0;
  8088. + default:
  8089. + wrong_return_value("nikita-1246", "compare_coords()");
  8090. + }
  8091. + assert("vs-731", left && right);
  8092. + if (left->item_pos == right->item_pos) {
  8093. + return left->unit_pos + 1 == right->unit_pos;
  8094. + } else if (left->item_pos + 1 == right->item_pos) {
  8095. + return (left->unit_pos == coord_last_unit_pos(left))
  8096. + && (right->unit_pos == 0);
  8097. + } else {
  8098. + return 0;
  8099. + }
  8100. +}
  8101. +#endif /* REISER4_DEBUG */
  8102. +
  8103. +/* Assuming two coordinates are positioned in the same node, return
  8104. + COORD_CMP_ON_RIGHT, COORD_CMP_ON_LEFT, or COORD_CMP_SAME depending on c1's
  8105. + position relative to c2. */
  8106. +/* Audited by: green(2002.06.15) */
  8107. +coord_cmp coord_compare(coord_t *c1, coord_t *c2)
  8108. +{
  8109. + assert("vs-209", c1->node == c2->node);
  8110. + assert("vs-194", coord_is_existing_unit(c1)
  8111. + && coord_is_existing_unit(c2));
  8112. +
  8113. + if (c1->item_pos > c2->item_pos)
  8114. + return COORD_CMP_ON_RIGHT;
  8115. + if (c1->item_pos < c2->item_pos)
  8116. + return COORD_CMP_ON_LEFT;
  8117. + if (c1->unit_pos > c2->unit_pos)
  8118. + return COORD_CMP_ON_RIGHT;
  8119. + if (c1->unit_pos < c2->unit_pos)
  8120. + return COORD_CMP_ON_LEFT;
  8121. + return COORD_CMP_SAME;
  8122. +}
  8123. +
  8124. +/* If the coordinate is between items, shifts it to the right. Returns 0 on
  8125. + success and non-zero if there is no position to the right. */
  8126. +int coord_set_to_right(coord_t *coord)
  8127. +{
  8128. + unsigned items = coord_num_items(coord);
  8129. +
  8130. + if (coord_adjust_items(coord, items, 1) == 1)
  8131. + return 1;
  8132. +
  8133. + switch (coord->between) {
  8134. + case AT_UNIT:
  8135. + return 0;
  8136. +
  8137. + case BEFORE_ITEM:
  8138. + case BEFORE_UNIT:
  8139. + coord->between = AT_UNIT;
  8140. + return 0;
  8141. +
  8142. + case AFTER_UNIT:
  8143. + if (coord->unit_pos < coord_last_unit_pos(coord)) {
  8144. + coord->unit_pos += 1;
  8145. + coord->between = AT_UNIT;
  8146. + return 0;
  8147. + } else {
  8148. +
  8149. + coord->unit_pos = 0;
  8150. +
  8151. + if (coord->item_pos == items - 1) {
  8152. + coord->between = AFTER_ITEM;
  8153. + return 1;
  8154. + }
  8155. +
  8156. + coord_inc_item_pos(coord);
  8157. + coord->between = AT_UNIT;
  8158. + return 0;
  8159. + }
  8160. +
  8161. + case AFTER_ITEM:
  8162. + if (coord->item_pos == items - 1)
  8163. + return 1;
  8164. +
  8165. + coord_inc_item_pos(coord);
  8166. + coord->unit_pos = 0;
  8167. + coord->between = AT_UNIT;
  8168. + return 0;
  8169. +
  8170. + case EMPTY_NODE:
  8171. + return 1;
  8172. +
  8173. + case INVALID_COORD:
  8174. + break;
  8175. + }
  8176. +
  8177. + impossible("jmacd-9920", "unreachable");
  8178. + return 0;
  8179. +}
  8180. +
  8181. +/* If the coordinate is between items, shifts it to the left. Returns 0 on
  8182. + success and non-zero if there is no position to the left. */
  8183. +int coord_set_to_left(coord_t *coord)
  8184. +{
  8185. + unsigned items = coord_num_items(coord);
  8186. +
  8187. + if (coord_adjust_items(coord, items, 0) == 1)
  8188. + return 1;
  8189. +
  8190. + switch (coord->between) {
  8191. + case AT_UNIT:
  8192. + return 0;
  8193. +
  8194. + case AFTER_UNIT:
  8195. + coord->between = AT_UNIT;
  8196. + return 0;
  8197. +
  8198. + case AFTER_ITEM:
  8199. + coord->between = AT_UNIT;
  8200. + coord->unit_pos = coord_last_unit_pos(coord);
  8201. + return 0;
  8202. +
  8203. + case BEFORE_UNIT:
  8204. + if (coord->unit_pos > 0) {
  8205. + coord->unit_pos -= 1;
  8206. + coord->between = AT_UNIT;
  8207. + return 0;
  8208. + } else {
  8209. +
  8210. + if (coord->item_pos == 0) {
  8211. + coord->between = BEFORE_ITEM;
  8212. + return 1;
  8213. + }
  8214. +
  8215. + coord->unit_pos = coord_last_unit_pos(coord);
  8216. + coord_dec_item_pos(coord);
  8217. + coord->between = AT_UNIT;
  8218. + return 0;
  8219. + }
  8220. +
  8221. + case BEFORE_ITEM:
  8222. + if (coord->item_pos == 0)
  8223. + return 1;
  8224. +
  8225. + coord_dec_item_pos(coord);
  8226. + coord->unit_pos = coord_last_unit_pos(coord);
  8227. + coord->between = AT_UNIT;
  8228. + return 0;
  8229. +
  8230. + case EMPTY_NODE:
  8231. + return 1;
  8232. +
  8233. + case INVALID_COORD:
  8234. + break;
  8235. + }
  8236. +
  8237. + impossible("jmacd-9920", "unreachable");
  8238. + return 0;
  8239. +}
  8240. +
  8241. +static const char *coord_tween_tostring(between_enum n)
  8242. +{
  8243. + switch (n) {
  8244. + case BEFORE_UNIT:
  8245. + return "before unit";
  8246. + case BEFORE_ITEM:
  8247. + return "before item";
  8248. + case AT_UNIT:
  8249. + return "at unit";
  8250. + case AFTER_UNIT:
  8251. + return "after unit";
  8252. + case AFTER_ITEM:
  8253. + return "after item";
  8254. + case EMPTY_NODE:
  8255. + return "empty node";
  8256. + case INVALID_COORD:
  8257. + return "invalid";
  8258. + default:
  8259. + {
  8260. + static char buf[30];
  8261. +
  8262. + sprintf(buf, "unknown: %i", n);
  8263. + return buf;
  8264. + }
  8265. + }
  8266. +}
  8267. +
  8268. +void print_coord(const char *mes, const coord_t *coord, int node)
  8269. +{
  8270. + if (coord == NULL) {
  8271. + printk("%s: null\n", mes);
  8272. + return;
  8273. + }
  8274. + printk("%s: item_pos = %d, unit_pos %d, tween=%s, iplug=%d\n",
  8275. + mes, coord->item_pos, coord->unit_pos,
  8276. + coord_tween_tostring(coord->between), coord->iplugid);
  8277. +}
  8278. +
  8279. +int
  8280. +item_utmost_child_real_block(const coord_t *coord, sideof side,
  8281. + reiser4_block_nr * blk)
  8282. +{
  8283. + return item_plugin_by_coord(coord)->f.utmost_child_real_block(coord,
  8284. + side,
  8285. + blk);
  8286. +}
  8287. +
  8288. +int item_utmost_child(const coord_t *coord, sideof side, jnode ** child)
  8289. +{
  8290. + return item_plugin_by_coord(coord)->f.utmost_child(coord, side, child);
  8291. +}
  8292. +
  8293. +/* @count bytes of flow @f got written, update correspondingly f->length,
  8294. + f->data and f->key */
  8295. +void move_flow_forward(flow_t *f, unsigned count)
  8296. +{
  8297. + if (f->data)
  8298. + f->data += count;
  8299. + f->length -= count;
  8300. + set_key_offset(&f->key, get_key_offset(&f->key) + count);
  8301. +}
  8302. +
  8303. +/*
  8304. + Local variables:
  8305. + c-indentation-style: "K&R"
  8306. + mode-name: "LC"
  8307. + c-basic-offset: 8
  8308. + tab-width: 8
  8309. + fill-column: 120
  8310. + scroll-step: 1
  8311. + End:
  8312. +*/
  8313. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/coord.h linux-5.16.14/fs/reiser4/coord.h
  8314. --- linux-5.16.14.orig/fs/reiser4/coord.h 1970-01-01 01:00:00.000000000 +0100
  8315. +++ linux-5.16.14/fs/reiser4/coord.h 2022-03-12 13:26:19.644892709 +0100
  8316. @@ -0,0 +1,399 @@
  8317. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  8318. + reiser4/README */
  8319. +
  8320. +/* Coords */
  8321. +
  8322. +#if !defined(__REISER4_COORD_H__)
  8323. +#define __REISER4_COORD_H__
  8324. +
  8325. +#include "forward.h"
  8326. +#include "debug.h"
  8327. +#include "dformat.h"
  8328. +#include "key.h"
  8329. +
  8330. +/* insertions happen between coords in the tree, so we need some means
  8331. + of specifying the sense of betweenness. */
  8332. +typedef enum {
  8333. + BEFORE_UNIT, /* Note: we/init_coord depends on this value being zero. */
  8334. + AT_UNIT,
  8335. + AFTER_UNIT,
  8336. + BEFORE_ITEM,
  8337. + AFTER_ITEM,
  8338. + INVALID_COORD,
  8339. + EMPTY_NODE,
  8340. +} between_enum;
  8341. +
  8342. +/* location of coord w.r.t. its node */
  8343. +typedef enum {
  8344. + COORD_ON_THE_LEFT = -1,
  8345. + COORD_ON_THE_RIGHT = +1,
  8346. + COORD_INSIDE = 0
  8347. +} coord_wrt_node;
  8348. +
  8349. +typedef enum {
  8350. + COORD_CMP_SAME = 0, COORD_CMP_ON_LEFT = -1, COORD_CMP_ON_RIGHT = +1
  8351. +} coord_cmp;
  8352. +
  8353. +struct coord {
  8354. + /* node in a tree */
  8355. + /* 0 */ znode *node;
  8356. +
  8357. + /* position of item within node */
  8358. + /* 4 */ pos_in_node_t item_pos;
  8359. + /* position of unit within item */
  8360. + /* 6 */ pos_in_node_t unit_pos;
  8361. + /* optimization: plugin of item is stored in coord_t. Until this was
  8362. + implemented, item_plugin_by_coord() was major CPU consumer. ->iplugid
  8363. + is invalidated (set to 0xff) on each modification of ->item_pos,
  8364. + and all such modifications are funneled through coord_*_item_pos()
  8365. + functions below.
  8366. + */
  8367. + /* 8 */ char iplugid;
  8368. + /* position of coord w.r.t. to neighboring items and/or units.
  8369. + Values are taken from &between_enum above.
  8370. + */
  8371. + /* 9 */ char between;
  8372. + /* padding. It will be added by the compiler anyway to conform to the
  8373. + * C language alignment requirements. We keep it here to be on the
  8374. + * safe side and to have a clear picture of the memory layout of this
  8375. + * structure. */
  8376. + /* 10 */ __u16 pad;
  8377. + /* 12 */ int offset;
  8378. +#if REISER4_DEBUG
  8379. + unsigned long plug_v;
  8380. + unsigned long body_v;
  8381. +#endif
  8382. +};
  8383. +
  8384. +#define INVALID_PLUGID ((char)((1 << 8) - 1))
  8385. +#define INVALID_OFFSET -1
  8386. +
  8387. +static inline void coord_clear_iplug(coord_t *coord)
  8388. +{
  8389. + assert("nikita-2835", coord != NULL);
  8390. + coord->iplugid = INVALID_PLUGID;
  8391. + coord->offset = INVALID_OFFSET;
  8392. +}
  8393. +
  8394. +static inline int coord_is_iplug_set(const coord_t *coord)
  8395. +{
  8396. + assert("nikita-2836", coord != NULL);
  8397. + return coord->iplugid != INVALID_PLUGID;
  8398. +}
  8399. +
  8400. +static inline void coord_set_item_pos(coord_t *coord, pos_in_node_t pos)
  8401. +{
  8402. + assert("nikita-2478", coord != NULL);
  8403. + coord->item_pos = pos;
  8404. + coord_clear_iplug(coord);
  8405. +}
  8406. +
  8407. +static inline void coord_dec_item_pos(coord_t *coord)
  8408. +{
  8409. + assert("nikita-2480", coord != NULL);
  8410. + --coord->item_pos;
  8411. + coord_clear_iplug(coord);
  8412. +}
  8413. +
  8414. +static inline void coord_inc_item_pos(coord_t *coord)
  8415. +{
  8416. + assert("nikita-2481", coord != NULL);
  8417. + ++coord->item_pos;
  8418. + coord_clear_iplug(coord);
  8419. +}
  8420. +
  8421. +static inline void coord_add_item_pos(coord_t *coord, int delta)
  8422. +{
  8423. + assert("nikita-2482", coord != NULL);
  8424. + coord->item_pos += delta;
  8425. + coord_clear_iplug(coord);
  8426. +}
  8427. +
  8428. +static inline void coord_invalid_item_pos(coord_t *coord)
  8429. +{
  8430. + assert("nikita-2832", coord != NULL);
  8431. + coord->item_pos = (unsigned short)~0;
  8432. + coord_clear_iplug(coord);
  8433. +}
  8434. +
  8435. +/* Reverse a direction. */
  8436. +static inline sideof sideof_reverse(sideof side)
  8437. +{
  8438. + return side == LEFT_SIDE ? RIGHT_SIDE : LEFT_SIDE;
  8439. +}
  8440. +
  8441. +/* NOTE: There is a somewhat odd mixture of the following opposed terms:
  8442. +
  8443. + "first" and "last"
  8444. + "next" and "prev"
  8445. + "before" and "after"
  8446. + "leftmost" and "rightmost"
  8447. +
  8448. + But I think the chosen names are decent the way they are.
  8449. +*/
  8450. +
  8451. +/* COORD INITIALIZERS */
  8452. +
  8453. +/* Initialize an invalid coordinate. */
  8454. +extern void coord_init_invalid(coord_t *coord, const znode * node);
  8455. +
  8456. +extern void coord_init_first_unit_nocheck(coord_t *coord, const znode * node);
  8457. +
  8458. +/* Initialize a coordinate to point at the first unit of the first item. If the
  8459. + node is empty, it is positioned at the EMPTY_NODE. */
  8460. +extern void coord_init_first_unit(coord_t *coord, const znode * node);
  8461. +
  8462. +/* Initialize a coordinate to point at the last unit of the last item. If the
  8463. + node is empty, it is positioned at the EMPTY_NODE. */
  8464. +extern void coord_init_last_unit(coord_t *coord, const znode * node);
  8465. +
  8466. +/* Initialize a coordinate to before the first item. If the node is empty, it is
  8467. + positioned at the EMPTY_NODE. */
  8468. +extern void coord_init_before_first_item(coord_t *coord, const znode * node);
  8469. +
  8470. +/* Initialize a coordinate to after the last item. If the node is empty, it is
  8471. + positioned at the EMPTY_NODE. */
  8472. +extern void coord_init_after_last_item(coord_t *coord, const znode * node);
  8473. +
  8474. +/* Initialize a coordinate to after last unit in the item. Coord must be set
  8475. + already to existing item */
  8476. +void coord_init_after_item_end(coord_t *coord);
  8477. +
  8478. +/* Initialize a coordinate to before the item. Coord must be set already to
  8479. + existing item */
  8480. +void coord_init_before_item(coord_t *);
  8481. +/* Initialize a coordinate to after the item. Coord must be set already to
  8482. + existing item */
  8483. +void coord_init_after_item(coord_t *);
  8484. +
  8485. +/* Calls either coord_init_first_unit or coord_init_last_unit depending on
  8486. + sideof argument. */
  8487. +extern void coord_init_sideof_unit(coord_t *coord, const znode * node,
  8488. + sideof dir);
  8489. +
  8490. +/* Initialize a coordinate by 0s. Used in places where init_coord was used and
  8491. + it was not clear how actually
  8492. + FIXME-VS: added by vs (2002, june, 8) */
  8493. +extern void coord_init_zero(coord_t *coord);
  8494. +
  8495. +/* COORD METHODS */
  8496. +
  8497. +/* after shifting of node content, coord previously set properly may become
  8498. + invalid, try to "normalize" it. */
  8499. +void coord_normalize(coord_t *coord);
  8500. +
  8501. +/* Copy a coordinate. */
  8502. +extern void coord_dup(coord_t *coord, const coord_t *old_coord);
  8503. +
  8504. +/* Copy a coordinate without check. */
  8505. +void coord_dup_nocheck(coord_t *coord, const coord_t *old_coord);
  8506. +
  8507. +unsigned coord_num_units(const coord_t *coord);
  8508. +
  8509. +/* Return the last valid unit number at the present item (i.e.,
  8510. + coord_num_units() - 1). */
  8511. +static inline unsigned coord_last_unit_pos(const coord_t *coord)
  8512. +{
  8513. + return coord_num_units(coord) - 1;
  8514. +}
  8515. +
  8516. +#if REISER4_DEBUG
  8517. +/* For assertions only, checks for a valid coordinate. */
  8518. +extern int coord_check(const coord_t *coord);
  8519. +
  8520. +extern unsigned long znode_times_locked(const znode * z);
  8521. +
  8522. +static inline void coord_update_v(coord_t *coord)
  8523. +{
  8524. + coord->plug_v = coord->body_v = znode_times_locked(coord->node);
  8525. +}
  8526. +#endif
  8527. +
  8528. +extern int coords_equal(const coord_t *c1, const coord_t *c2);
  8529. +
  8530. +extern void print_coord(const char *mes, const coord_t *coord, int print_node);
  8531. +
  8532. +/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if
  8533. + coord_is_after_leftmost return NCOORD_ON_THE_LEFT, otherwise return
  8534. + NCOORD_INSIDE. */
  8535. +extern coord_wrt_node coord_wrt(const coord_t *coord);
  8536. +
  8537. +/* Returns true if the coordinates are positioned at adjacent units, regardless
  8538. + of before-after or item boundaries. */
  8539. +extern int coord_are_neighbors(coord_t *c1, coord_t *c2);
  8540. +
  8541. +/* Assuming two coordinates are positioned in the same node, return
  8542. + NCOORD_CMP_ON_RIGHT, NCOORD_CMP_ON_LEFT, or NCOORD_CMP_SAME depending on c1's
  8543. + position relative to c2. */
  8544. +extern coord_cmp coord_compare(coord_t *c1, coord_t *c2);
  8545. +
  8546. +/* COORD PREDICATES */
  8547. +
  8548. +/* Returns true if the coord was initializewd by coord_init_invalid (). */
  8549. +extern int coord_is_invalid(const coord_t *coord);
  8550. +
  8551. +/* Returns true if the coordinate is positioned at an existing item, not before
  8552. + or after an item. It may be placed at, before, or after any unit within the
  8553. + item, whether existing or not. If this is true you can call methods of the
  8554. + item plugin. */
  8555. +extern int coord_is_existing_item(const coord_t *coord);
  8556. +
  8557. +/* Returns true if the coordinate is positioned after a item, before a item,
  8558. + after the last unit of an item, before the first unit of an item, or at an
  8559. + empty node. */
  8560. +extern int coord_is_between_items(const coord_t *coord);
  8561. +
  8562. +/* Returns true if the coordinate is positioned at an existing unit, not before
  8563. + or after a unit. */
  8564. +extern int coord_is_existing_unit(const coord_t *coord);
  8565. +
  8566. +/* Returns true if the coordinate is positioned at an empty node. */
  8567. +extern int coord_is_empty(const coord_t *coord);
  8568. +
  8569. +/* Returns true if the coordinate is positioned at the first unit of the first
  8570. + item. Not true for empty nodes nor coordinates positioned before the first
  8571. + item. */
  8572. +extern int coord_is_leftmost_unit(const coord_t *coord);
  8573. +
  8574. +/* Returns true if the coordinate is positioned after the last item or after the
  8575. + last unit of the last item or it is an empty node. */
  8576. +extern int coord_is_after_rightmost(const coord_t *coord);
  8577. +
  8578. +/* Returns true if the coordinate is positioned before the first item or it is
  8579. + an empty node. */
  8580. +extern int coord_is_before_leftmost(const coord_t *coord);
  8581. +
  8582. +/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending
  8583. + on sideof argument. */
  8584. +extern int coord_is_after_sideof_unit(coord_t *coord, sideof dir);
  8585. +
  8586. +/* COORD MODIFIERS */
  8587. +
  8588. +/* Advances the coordinate by one unit to the right. If empty, no change. If
  8589. + coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new
  8590. + position is an existing unit. */
  8591. +extern int coord_next_unit(coord_t *coord);
  8592. +
  8593. +/* Advances the coordinate by one item to the right. If empty, no change. If
  8594. + coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new
  8595. + position is an existing item. */
  8596. +extern int coord_next_item(coord_t *coord);
  8597. +
  8598. +/* Advances the coordinate by one unit to the left. If empty, no change. If
  8599. + coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new
  8600. + position is an existing unit. */
  8601. +extern int coord_prev_unit(coord_t *coord);
  8602. +
  8603. +/* Advances the coordinate by one item to the left. If empty, no change. If
  8604. + coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new
  8605. + position is an existing item. */
  8606. +extern int coord_prev_item(coord_t *coord);
  8607. +
  8608. +/* If the coordinate is between items, shifts it to the right. Returns 0 on
  8609. + success and non-zero if there is no position to the right. */
  8610. +extern int coord_set_to_right(coord_t *coord);
  8611. +
  8612. +/* If the coordinate is between items, shifts it to the left. Returns 0 on
  8613. + success and non-zero if there is no position to the left. */
  8614. +extern int coord_set_to_left(coord_t *coord);
  8615. +
  8616. +/* If the coordinate is at an existing unit, set to after that unit. Returns 0
  8617. + on success and non-zero if the unit did not exist. */
  8618. +extern int coord_set_after_unit(coord_t *coord);
  8619. +
  8620. +/* Calls either coord_next_unit or coord_prev_unit depending on sideof
  8621. + argument. */
  8622. +extern int coord_sideof_unit(coord_t *coord, sideof dir);
  8623. +
  8624. +/* iterate over all units in @node */
  8625. +#define for_all_units(coord, node) \
  8626. + for (coord_init_before_first_item((coord), (node)) ; \
  8627. + coord_next_unit(coord) == 0 ;)
  8628. +
  8629. +/* iterate over all items in @node */
  8630. +#define for_all_items(coord, node) \
  8631. + for (coord_init_before_first_item((coord), (node)) ; \
  8632. + coord_next_item(coord) == 0 ;)
  8633. +
  8634. +/* COORD/ITEM METHODS */
  8635. +
  8636. +extern int item_utmost_child_real_block(const coord_t *coord, sideof side,
  8637. + reiser4_block_nr * blk);
  8638. +extern int item_utmost_child(const coord_t *coord, sideof side,
  8639. + jnode ** child);
  8640. +
  8641. +/* a flow is a sequence of bytes being written to or read from the tree. The
  8642. + tree will slice the flow into items while storing it into nodes, but all of
  8643. + that is hidden from anything outside the tree. */
  8644. +
  8645. +struct flow {
  8646. + reiser4_key key; /* key of start of flow's sequence of bytes */
  8647. + loff_t length; /* length of flow's sequence of bytes */
  8648. + char *data; /* start of flow's sequence of bytes */
  8649. + int user; /* if 1 data is user space, 0 - kernel space */
  8650. + rw_op op; /* NIKITA-FIXME-HANS: comment is where? */
  8651. +};
  8652. +
  8653. +void move_flow_forward(flow_t *f, unsigned count);
  8654. +
  8655. +/* &reiser4_item_data - description of data to be inserted or pasted
  8656. +
  8657. + Q: articulate the reasons for the difference between this and flow.
  8658. +
  8659. + A: Becides flow we insert into tree other things: stat data, directory
  8660. + entry, etc. To insert them into tree one has to provide this structure. If
  8661. + one is going to insert flow - he can use insert_flow, where this structure
  8662. + does not have to be created
  8663. +*/
  8664. +struct reiser4_item_data {
  8665. + /* actual data to be inserted. If NULL, ->create_item() will not
  8666. + do xmemcpy itself, leaving this up to the caller. This can
  8667. + save some amount of unnecessary memory copying, for example,
  8668. + during insertion of stat data.
  8669. +
  8670. + */
  8671. + char *data;
  8672. + /* 1 if 'char * data' contains pointer to user space and 0 if it is
  8673. + kernel space */
  8674. + int user;
  8675. + /* amount of data we are going to insert or paste */
  8676. + int length;
  8677. + /* "Arg" is opaque data that is passed down to the
  8678. + ->create_item() method of node layout, which in turn
  8679. + hands it to the ->create_hook() of item being created. This
  8680. + arg is currently used by:
  8681. +
  8682. + . ->create_hook() of internal item
  8683. + (fs/reiser4/plugin/item/internal.c:internal_create_hook()),
  8684. + . ->paste() method of directory item.
  8685. + . ->create_hook() of extent item
  8686. +
  8687. + For internal item, this is left "brother" of new node being
  8688. + inserted and it is used to add new node into sibling list
  8689. + after parent to it was just inserted into parent.
  8690. +
  8691. + While ->arg does look somewhat of unnecessary compication,
  8692. + it actually saves a lot of headache in many places, because
  8693. + all data necessary to insert or paste new data into tree are
  8694. + collected in one place, and this eliminates a lot of extra
  8695. + argument passing and storing everywhere.
  8696. +
  8697. + */
  8698. + void *arg;
  8699. + /* plugin of item we are inserting */
  8700. + item_plugin *iplug;
  8701. +};
  8702. +
  8703. +/* __REISER4_COORD_H__ */
  8704. +#endif
  8705. +
  8706. +/* Make Linus happy.
  8707. + Local variables:
  8708. + c-indentation-style: "K&R"
  8709. + mode-name: "LC"
  8710. + c-basic-offset: 8
  8711. + tab-width: 8
  8712. + fill-column: 120
  8713. + scroll-step: 1
  8714. + End:
  8715. +*/
  8716. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/debug.c linux-5.16.14/fs/reiser4/debug.c
  8717. --- linux-5.16.14.orig/fs/reiser4/debug.c 1970-01-01 01:00:00.000000000 +0100
  8718. +++ linux-5.16.14/fs/reiser4/debug.c 2022-03-12 13:26:19.645892712 +0100
  8719. @@ -0,0 +1,309 @@
  8720. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  8721. + * reiser4/README */
  8722. +
  8723. +/* Debugging facilities. */
  8724. +
  8725. +/*
  8726. + * This file contains generic debugging functions used by reiser4. Roughly
  8727. + * following:
  8728. + *
  8729. + * panicking: reiser4_do_panic(), reiser4_print_prefix().
  8730. + *
  8731. + * locking:
  8732. + * reiser4_schedulable(), reiser4_lock_counters(), print_lock_counters(),
  8733. + * reiser4_no_counters_are_held(), reiser4_commit_check_locks()
  8734. + *
  8735. + * error code monitoring (see comment before RETERR macro):
  8736. + * reiser4_return_err(), reiser4_report_err().
  8737. + *
  8738. + * stack back-tracing: fill_backtrace()
  8739. + *
  8740. + * miscellaneous: reiser4_preempt_point(), call_on_each_assert(),
  8741. + * reiser4_debugtrap().
  8742. + *
  8743. + */
  8744. +
  8745. +#include "reiser4.h"
  8746. +#include "context.h"
  8747. +#include "super.h"
  8748. +#include "txnmgr.h"
  8749. +#include "znode.h"
  8750. +
  8751. +#include <linux/sysfs.h>
  8752. +#include <linux/slab.h>
  8753. +#include <linux/types.h>
  8754. +#include <linux/fs.h>
  8755. +#include <linux/spinlock.h>
  8756. +#include <linux/kallsyms.h>
  8757. +#include <linux/vmalloc.h>
  8758. +#include <linux/ctype.h>
  8759. +#include <linux/sysctl.h>
  8760. +#include <linux/hardirq.h>
  8761. +#include <linux/sched/signal.h> /* signal_pending() */
  8762. +
  8763. +#if 0
  8764. +#if REISER4_DEBUG
  8765. +static void reiser4_report_err(void);
  8766. +#else
  8767. +#define reiser4_report_err() noop
  8768. +#endif
  8769. +#endif /* 0 */
  8770. +
  8771. +/*
  8772. + * global buffer where message given to reiser4_panic is formatted.
  8773. + */
  8774. +static char panic_buf[REISER4_PANIC_MSG_BUFFER_SIZE];
  8775. +
  8776. +/*
  8777. + * lock protecting consistency of panic_buf under concurrent panics
  8778. + */
  8779. +static DEFINE_SPINLOCK(panic_guard);
  8780. +
  8781. +/* Your best friend. Call it on each occasion. This is called by
  8782. + fs/reiser4/debug.h:reiser4_panic(). */
  8783. +void reiser4_do_panic(const char *format/* format string */ , ... /* rest */)
  8784. +{
  8785. + static int in_panic = 0;
  8786. + va_list args;
  8787. +
  8788. + /*
  8789. + * check for recursive panic.
  8790. + */
  8791. + if (in_panic == 0) {
  8792. + in_panic = 1;
  8793. +
  8794. + spin_lock(&panic_guard);
  8795. + va_start(args, format);
  8796. + vsnprintf(panic_buf, sizeof(panic_buf), format, args);
  8797. + va_end(args);
  8798. + printk(KERN_EMERG "reiser4 panicked cowardly: %s", panic_buf);
  8799. + spin_unlock(&panic_guard);
  8800. +
  8801. + /*
  8802. + * if kernel debugger is configured---drop in. Early dropping
  8803. + * into kgdb is not always convenient, because panic message
  8804. + * is not yet printed most of the times. But:
  8805. + *
  8806. + * (1) message can be extracted from printk_buf[]
  8807. + * (declared static inside of printk()), and
  8808. + *
  8809. + * (2) sometimes serial/kgdb combo dies while printing
  8810. + * long panic message, so it's more prudent to break into
  8811. + * debugger earlier.
  8812. + *
  8813. + */
  8814. + DEBUGON(1);
  8815. + }
  8816. + /* to make gcc happy about noreturn attribute */
  8817. + panic("%s", panic_buf);
  8818. +}
  8819. +
  8820. +#if 0
  8821. +void
  8822. +reiser4_print_prefix(const char *level, int reperr, const char *mid,
  8823. + const char *function, const char *file, int lineno)
  8824. +{
  8825. + const char *comm;
  8826. + int pid;
  8827. +
  8828. + if (unlikely(in_interrupt() || in_irq())) {
  8829. + comm = "interrupt";
  8830. + pid = 0;
  8831. + } else {
  8832. + comm = current->comm;
  8833. + pid = current->pid;
  8834. + }
  8835. + printk("%sreiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n",
  8836. + level, comm, pid, function, file, lineno, mid);
  8837. + if (reperr)
  8838. + reiser4_report_err();
  8839. +}
  8840. +#endif /* 0 */
  8841. +
  8842. +/* Preemption point: this should be called periodically during long running
  8843. + operations (carry, allocate, and squeeze are best examples) */
  8844. +int reiser4_preempt_point(void)
  8845. +{
  8846. + assert("nikita-3008", reiser4_schedulable());
  8847. + cond_resched();
  8848. + return signal_pending(current);
  8849. +}
  8850. +
  8851. +#if REISER4_DEBUG
  8852. +/* Debugging aid: return struct where information about locks taken by current
  8853. + thread is accumulated. This can be used to formulate lock ordering
  8854. + constraints and various assertions.
  8855. +
  8856. +*/
  8857. +reiser4_lock_cnt_info *reiser4_lock_counters(void)
  8858. +{
  8859. + reiser4_context *ctx = get_current_context();
  8860. + assert("jmacd-1123", ctx != NULL);
  8861. + return &ctx->locks;
  8862. +}
  8863. +
  8864. +/*
  8865. + * print human readable information about locks held by the reiser4 context.
  8866. + */
  8867. +static void print_lock_counters(const char *prefix,
  8868. + const reiser4_lock_cnt_info * info)
  8869. +{
  8870. + printk("%s: jnode: %i, tree: %i (r:%i,w:%i), dk: %i (r:%i,w:%i)\n"
  8871. + "jload: %i, "
  8872. + "txnh: %i, atom: %i, stack: %i, txnmgr: %i, "
  8873. + "ktxnmgrd: %i, fq: %i\n"
  8874. + "inode: %i, "
  8875. + "cbk_cache: %i (r:%i,w%i), "
  8876. + "eflush: %i, "
  8877. + "zlock: %i,\n"
  8878. + "spin: %i, long: %i inode_sem: (r:%i,w:%i)\n"
  8879. + "d: %i, x: %i, t: %i\n", prefix,
  8880. + info->spin_locked_jnode,
  8881. + info->rw_locked_tree, info->read_locked_tree,
  8882. + info->write_locked_tree,
  8883. + info->rw_locked_dk, info->read_locked_dk, info->write_locked_dk,
  8884. + info->spin_locked_jload,
  8885. + info->spin_locked_txnh,
  8886. + info->spin_locked_atom, info->spin_locked_stack,
  8887. + info->spin_locked_txnmgr, info->spin_locked_ktxnmgrd,
  8888. + info->spin_locked_fq,
  8889. + info->spin_locked_inode,
  8890. + info->rw_locked_cbk_cache,
  8891. + info->read_locked_cbk_cache,
  8892. + info->write_locked_cbk_cache,
  8893. + info->spin_locked_super_eflush,
  8894. + info->spin_locked_zlock,
  8895. + info->spin_locked,
  8896. + info->long_term_locked_znode,
  8897. + info->inode_sem_r, info->inode_sem_w,
  8898. + info->d_refs, info->x_refs, info->t_refs);
  8899. +}
  8900. +
  8901. +/* check that no spinlocks are held */
  8902. +int reiser4_schedulable(void)
  8903. +{
  8904. + if (get_current_context_check() != NULL) {
  8905. + if (!LOCK_CNT_NIL(spin_locked)) {
  8906. + print_lock_counters("in atomic", reiser4_lock_counters());
  8907. + return 0;
  8908. + }
  8909. + }
  8910. + might_sleep();
  8911. + return 1;
  8912. +}
  8913. +/*
  8914. + * return true, iff no locks are held.
  8915. + */
  8916. +int reiser4_no_counters_are_held(void)
  8917. +{
  8918. + reiser4_lock_cnt_info *counters;
  8919. +
  8920. + counters = reiser4_lock_counters();
  8921. + return
  8922. + (counters->spin_locked_zlock == 0) &&
  8923. + (counters->spin_locked_jnode == 0) &&
  8924. + (counters->rw_locked_tree == 0) &&
  8925. + (counters->read_locked_tree == 0) &&
  8926. + (counters->write_locked_tree == 0) &&
  8927. + (counters->rw_locked_dk == 0) &&
  8928. + (counters->read_locked_dk == 0) &&
  8929. + (counters->write_locked_dk == 0) &&
  8930. + (counters->spin_locked_txnh == 0) &&
  8931. + (counters->spin_locked_atom == 0) &&
  8932. + (counters->spin_locked_stack == 0) &&
  8933. + (counters->spin_locked_txnmgr == 0) &&
  8934. + (counters->spin_locked_inode == 0) &&
  8935. + (counters->spin_locked == 0) &&
  8936. + (counters->long_term_locked_znode == 0) &&
  8937. + (counters->inode_sem_r == 0) &&
  8938. + (counters->inode_sem_w == 0) && (counters->d_refs == 0);
  8939. +}
  8940. +
  8941. +/*
  8942. + * return true, iff transaction commit can be done under locks held by the
  8943. + * current thread.
  8944. + */
  8945. +int reiser4_commit_check_locks(void)
  8946. +{
  8947. + reiser4_lock_cnt_info *counters;
  8948. + int inode_sem_r;
  8949. + int inode_sem_w;
  8950. + int result;
  8951. +
  8952. + /*
  8953. + * inode's read/write semaphore is the only reiser4 lock that can be
  8954. + * held during commit.
  8955. + */
  8956. +
  8957. + counters = reiser4_lock_counters();
  8958. + inode_sem_r = counters->inode_sem_r;
  8959. + inode_sem_w = counters->inode_sem_w;
  8960. +
  8961. + counters->inode_sem_r = counters->inode_sem_w = 0;
  8962. + result = reiser4_no_counters_are_held();
  8963. + counters->inode_sem_r = inode_sem_r;
  8964. + counters->inode_sem_w = inode_sem_w;
  8965. + return result;
  8966. +}
  8967. +
  8968. +/*
  8969. + * fill "error site" in the current reiser4 context. See comment before RETERR
  8970. + * macro for more details.
  8971. + */
  8972. +void reiser4_return_err(int code, const char *file, int line)
  8973. +{
  8974. + if (code < 0 && is_in_reiser4_context()) {
  8975. + reiser4_context *ctx = get_current_context();
  8976. +
  8977. + if (ctx != NULL) {
  8978. + ctx->err.code = code;
  8979. + ctx->err.file = file;
  8980. + ctx->err.line = line;
  8981. + }
  8982. + }
  8983. +}
  8984. +
  8985. +#if 0
  8986. +/*
  8987. + * report error information recorder by reiser4_return_err().
  8988. + */
  8989. +static void reiser4_report_err(void)
  8990. +{
  8991. + reiser4_context *ctx = get_current_context_check();
  8992. +
  8993. + if (ctx != NULL) {
  8994. + if (ctx->err.code != 0) {
  8995. + printk("code: %i at %s:%i\n",
  8996. + ctx->err.code, ctx->err.file, ctx->err.line);
  8997. + }
  8998. + }
  8999. +}
  9000. +#endif /* 0 */
  9001. +
  9002. +#endif /* REISER4_DEBUG */
  9003. +
  9004. +#if KERNEL_DEBUGGER
  9005. +
  9006. +/*
  9007. + * this functions just drops into kernel debugger. It is a convenient place to
  9008. + * put breakpoint in.
  9009. + */
  9010. +void reiser4_debugtrap(void)
  9011. +{
  9012. + /* do nothing. Put break point here. */
  9013. +#if defined(CONFIG_KGDB) && !defined(CONFIG_REISER4_FS_MODULE)
  9014. + extern void kgdb_breakpoint(void);
  9015. + //kgdb_breakpoint();
  9016. +#endif
  9017. +}
  9018. +#endif
  9019. +
  9020. +/* Make Linus happy.
  9021. + Local variables:
  9022. + c-indentation-style: "K&R"
  9023. + mode-name: "LC"
  9024. + c-basic-offset: 8
  9025. + tab-width: 8
  9026. + fill-column: 120
  9027. + End:
  9028. +*/
  9029. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/debug.h linux-5.16.14/fs/reiser4/debug.h
  9030. --- linux-5.16.14.orig/fs/reiser4/debug.h 1970-01-01 01:00:00.000000000 +0100
  9031. +++ linux-5.16.14/fs/reiser4/debug.h 2022-03-12 13:26:19.645892712 +0100
  9032. @@ -0,0 +1,344 @@
  9033. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9034. + reiser4/README */
  9035. +
  9036. +/* Declarations of debug macros. */
  9037. +
  9038. +#if !defined(__FS_REISER4_DEBUG_H__)
  9039. +#define __FS_REISER4_DEBUG_H__
  9040. +
  9041. +#include "forward.h"
  9042. +#include "reiser4.h"
  9043. +
  9044. +/**
  9045. + * generic function to produce formatted output, decorating it with
  9046. + * whatever standard prefixes/postfixes we want. "Fun" is a function
  9047. + * that will be actually called, can be printk, panic etc.
  9048. + * This is for use by other debugging macros, not by users.
  9049. + */
  9050. +#define DCALL(lev, fun, reperr, label, format, ...) \
  9051. +({ \
  9052. + fun(lev "reiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n" format "\n" , \
  9053. + current->comm, current->pid, __FUNCTION__, \
  9054. + __FILE__, __LINE__, label, ## __VA_ARGS__); \
  9055. +})
  9056. +
  9057. +/*
  9058. + * cause kernel to crash
  9059. + */
  9060. +#define reiser4_panic(mid, format, ...) \
  9061. + DCALL("", reiser4_do_panic, 1, mid, format , ## __VA_ARGS__)
  9062. +
  9063. +/* print message with indication of current process, file, line and
  9064. + function */
  9065. +#define reiser4_log(label, format, ...) \
  9066. + DCALL(KERN_DEBUG, printk, 0, label, format , ## __VA_ARGS__)
  9067. +
  9068. +#define noop do {; } while (0)
  9069. +
  9070. +#if REISER4_DEBUG
  9071. +/* version of info that only actually prints anything when _d_ebugging
  9072. + is on */
  9073. +#define dinfo(format, ...) printk(format , ## __VA_ARGS__)
  9074. +/* macro to catch logical errors. Put it into `default' clause of
  9075. + switch() statement. */
  9076. +#define impossible(label, format, ...) \
  9077. + reiser4_panic(label, "impossible: " format , ## __VA_ARGS__)
  9078. +/* assert assures that @cond is true. If it is not, reiser4_panic() is
  9079. + called. Use this for checking logical consistency and _never_ call
  9080. + this to check correctness of external data: disk blocks and user-input . */
  9081. +#define assert(label, cond) \
  9082. +({ \
  9083. + /* call_on_each_assert(); */ \
  9084. + if (cond) { \
  9085. + /* put negated check to avoid using !(cond) that would lose \
  9086. + * warnings for things like assert(a = b); */ \
  9087. + ; \
  9088. + } else { \
  9089. + DEBUGON(1); \
  9090. + reiser4_panic(label, "assertion failed: %s", #cond); \
  9091. + } \
  9092. +})
  9093. +
  9094. +/* like assertion, but @expr is evaluated even if REISER4_DEBUG is off. */
  9095. +#define check_me(label, expr) assert(label, (expr))
  9096. +
  9097. +#define ON_DEBUG(exp) exp
  9098. +
  9099. +extern int reiser4_schedulable(void);
  9100. +extern void call_on_each_assert(void);
  9101. +
  9102. +#else
  9103. +
  9104. +#define dinfo(format, args...) noop
  9105. +#define impossible(label, format, args...) noop
  9106. +#define assert(label, cond) noop
  9107. +#define check_me(label, expr) ((void) (expr))
  9108. +#define ON_DEBUG(exp)
  9109. +#define reiser4_schedulable() might_sleep()
  9110. +
  9111. +/* REISER4_DEBUG */
  9112. +#endif
  9113. +
  9114. +#if REISER4_DEBUG
  9115. +/* per-thread information about lock acquired by this thread. Used by lock
  9116. + * ordering checking in spin_macros.h */
  9117. +typedef struct reiser4_lock_cnt_info {
  9118. + int rw_locked_tree;
  9119. + int read_locked_tree;
  9120. + int write_locked_tree;
  9121. +
  9122. + int rw_locked_dk;
  9123. + int read_locked_dk;
  9124. + int write_locked_dk;
  9125. +
  9126. + int rw_locked_cbk_cache;
  9127. + int read_locked_cbk_cache;
  9128. + int write_locked_cbk_cache;
  9129. +
  9130. + int spin_locked_zlock;
  9131. + int spin_locked_jnode;
  9132. + int spin_locked_jload;
  9133. + int spin_locked_txnh;
  9134. + int spin_locked_atom;
  9135. + int spin_locked_stack;
  9136. + int spin_locked_txnmgr;
  9137. + int spin_locked_ktxnmgrd;
  9138. + int spin_locked_fq;
  9139. + int spin_locked_inode;
  9140. + int spin_locked_super_eflush;
  9141. + int spin_locked;
  9142. + int long_term_locked_znode;
  9143. +
  9144. + int inode_sem_r;
  9145. + int inode_sem_w;
  9146. +
  9147. + int d_refs;
  9148. + int x_refs;
  9149. + int t_refs;
  9150. +} reiser4_lock_cnt_info;
  9151. +
  9152. +extern struct reiser4_lock_cnt_info *reiser4_lock_counters(void);
  9153. +#define IN_CONTEXT(a, b) (is_in_reiser4_context() ? (a) : (b))
  9154. +
  9155. +/* increment lock-counter @counter, if present */
  9156. +#define LOCK_CNT_INC(counter) \
  9157. + IN_CONTEXT(++(reiser4_lock_counters()->counter), 0)
  9158. +
  9159. +/* decrement lock-counter @counter, if present */
  9160. +#define LOCK_CNT_DEC(counter) \
  9161. + IN_CONTEXT(--(reiser4_lock_counters()->counter), 0)
  9162. +
  9163. +/* check that lock-counter is zero. This is for use in assertions */
  9164. +#define LOCK_CNT_NIL(counter) \
  9165. + IN_CONTEXT(reiser4_lock_counters()->counter == 0, 1)
  9166. +
  9167. +/* check that lock-counter is greater than zero. This is for use in
  9168. + * assertions */
  9169. +#define LOCK_CNT_GTZ(counter) \
  9170. + IN_CONTEXT(reiser4_lock_counters()->counter > 0, 1)
  9171. +#define LOCK_CNT_LT(counter,n) \
  9172. + IN_CONTEXT(reiser4_lock_counters()->counter < n, 1)
  9173. +
  9174. +#else /* REISER4_DEBUG */
  9175. +
  9176. +/* no-op versions on the above */
  9177. +
  9178. +typedef struct reiser4_lock_cnt_info {
  9179. +} reiser4_lock_cnt_info;
  9180. +
  9181. +#define reiser4_lock_counters() ((reiser4_lock_cnt_info *)NULL)
  9182. +#define LOCK_CNT_INC(counter) noop
  9183. +#define LOCK_CNT_DEC(counter) noop
  9184. +#define LOCK_CNT_NIL(counter) (1)
  9185. +#define LOCK_CNT_GTZ(counter) (1)
  9186. +#define LOCK_CNT_LT(counter, n) (1)
  9187. +
  9188. +#endif /* REISER4_DEBUG */
  9189. +
  9190. +#define assert_spin_not_locked(lock) BUG_ON(0)
  9191. +#define assert_rw_write_locked(lock) BUG_ON(0)
  9192. +#define assert_rw_read_locked(lock) BUG_ON(0)
  9193. +#define assert_rw_locked(lock) BUG_ON(0)
  9194. +#define assert_rw_not_write_locked(lock) BUG_ON(0)
  9195. +#define assert_rw_not_read_locked(lock) BUG_ON(0)
  9196. +#define assert_rw_not_locked(lock) BUG_ON(0)
  9197. +
  9198. +/* flags controlling debugging behavior. Are set through debug_flags=N mount
  9199. + option. */
  9200. +typedef enum {
  9201. + /* print a lot of information during panic. When this is on all jnodes
  9202. + * are listed. This can be *very* large output. Usually you don't want
  9203. + * this. Especially over serial line. */
  9204. + REISER4_VERBOSE_PANIC = 0x00000001,
  9205. + /* print a lot of information during umount */
  9206. + REISER4_VERBOSE_UMOUNT = 0x00000002,
  9207. + /* print gathered statistics on umount */
  9208. + REISER4_STATS_ON_UMOUNT = 0x00000004,
  9209. + /* check node consistency */
  9210. + REISER4_CHECK_NODE = 0x00000008
  9211. +} reiser4_debug_flags;
  9212. +
  9213. +extern int is_in_reiser4_context(void);
  9214. +
  9215. +/*
  9216. + * evaluate expression @e only if with reiser4 context
  9217. + */
  9218. +#define ON_CONTEXT(e) do { \
  9219. + if (is_in_reiser4_context()) { \
  9220. + e; \
  9221. + } } while (0)
  9222. +
  9223. +/*
  9224. + * evaluate expression @e only when within reiser4_context and debugging is
  9225. + * on.
  9226. + */
  9227. +#define ON_DEBUG_CONTEXT(e) ON_DEBUG(ON_CONTEXT(e))
  9228. +
  9229. +/*
  9230. + * complain about unexpected function result and crash. Used in "default"
  9231. + * branches of switch statements and alike to assert that invalid results are
  9232. + * not silently ignored.
  9233. + */
  9234. +#define wrong_return_value(label, function) \
  9235. + impossible(label, "wrong return value from " function)
  9236. +
  9237. +/* Issue different types of reiser4 messages to the console */
  9238. +#define warning(label, format, ...) \
  9239. + DCALL(KERN_WARNING, \
  9240. + printk, 1, label, "WARNING: " format , ## __VA_ARGS__)
  9241. +#define notice(label, format, ...) \
  9242. + DCALL(KERN_NOTICE, \
  9243. + printk, 1, label, "NOTICE: " format , ## __VA_ARGS__)
  9244. +
  9245. +/* mark not yet implemented functionality */
  9246. +#define not_yet(label, format, ...) \
  9247. + reiser4_panic(label, "NOT YET IMPLEMENTED: " format , ## __VA_ARGS__)
  9248. +
  9249. +extern void reiser4_do_panic(const char *format, ...)
  9250. + __attribute__ ((noreturn, format(printf, 1, 2)));
  9251. +
  9252. +extern int reiser4_preempt_point(void);
  9253. +extern void reiser4_print_stats(void);
  9254. +
  9255. +#if REISER4_DEBUG
  9256. +extern int reiser4_no_counters_are_held(void);
  9257. +extern int reiser4_commit_check_locks(void);
  9258. +#else
  9259. +#define reiser4_no_counters_are_held() (1)
  9260. +#define reiser4_commit_check_locks() (1)
  9261. +#endif
  9262. +
  9263. +/* true if @i is power-of-two. Useful for rate-limited warnings, etc. */
  9264. +#define IS_POW(i) \
  9265. +({ \
  9266. + typeof(i) __i; \
  9267. + \
  9268. + __i = (i); \
  9269. + !(__i & (__i - 1)); \
  9270. +})
  9271. +
  9272. +#define KERNEL_DEBUGGER (1)
  9273. +
  9274. +#if KERNEL_DEBUGGER
  9275. +
  9276. +extern void reiser4_debugtrap(void);
  9277. +
  9278. +/*
  9279. + * Check condition @cond and drop into kernel debugger (kgdb) if it's true. If
  9280. + * kgdb is not compiled in, do nothing.
  9281. + */
  9282. +#define DEBUGON(cond) \
  9283. +({ \
  9284. + if (unlikely(cond)) \
  9285. + reiser4_debugtrap(); \
  9286. +})
  9287. +#else
  9288. +#define DEBUGON(cond) noop
  9289. +#endif
  9290. +
  9291. +/*
  9292. + * Error code tracing facility. (Idea is borrowed from XFS code.)
  9293. + *
  9294. + * Suppose some strange and/or unexpected code is returned from some function
  9295. + * (for example, write(2) returns -EEXIST). It is possible to place a
  9296. + * breakpoint in the reiser4_write(), but it is too late here. How to find out
  9297. + * in what particular place -EEXIST was generated first?
  9298. + *
  9299. + * In reiser4 all places where actual error codes are produced (that is,
  9300. + * statements of the form
  9301. + *
  9302. + * return -EFOO; // (1), or
  9303. + *
  9304. + * result = -EFOO; // (2)
  9305. + *
  9306. + * are replaced with
  9307. + *
  9308. + * return RETERR(-EFOO); // (1a), and
  9309. + *
  9310. + * result = RETERR(-EFOO); // (2a) respectively
  9311. + *
  9312. + * RETERR() macro fills a backtrace in reiser4_context. This back-trace is
  9313. + * printed in error and warning messages. Moreover, it's possible to put a
  9314. + * conditional breakpoint in reiser4_return_err (low-level function called
  9315. + * by RETERR() to do the actual work) to break into debugger immediately
  9316. + * when particular error happens.
  9317. + *
  9318. + */
  9319. +
  9320. +#if REISER4_DEBUG
  9321. +
  9322. +/*
  9323. + * data-type to store information about where error happened ("error site").
  9324. + */
  9325. +typedef struct err_site {
  9326. + int code; /* error code */
  9327. + const char *file; /* source file, filled by __FILE__ */
  9328. + int line; /* source file line, filled by __LINE__ */
  9329. +} err_site;
  9330. +
  9331. +extern void reiser4_return_err(int code, const char *file, int line);
  9332. +
  9333. +/*
  9334. + * fill &get_current_context()->err_site with error information.
  9335. + */
  9336. +#define RETERR(code) \
  9337. +({ \
  9338. + typeof(code) __code; \
  9339. + \
  9340. + __code = (code); \
  9341. + reiser4_return_err(__code, __FILE__, __LINE__); \
  9342. + __code; \
  9343. +})
  9344. +
  9345. +#else
  9346. +
  9347. +/*
  9348. + * no-op versions of the above
  9349. + */
  9350. +
  9351. +typedef struct err_site {
  9352. +} err_site;
  9353. +#define RETERR(code) code
  9354. +#endif
  9355. +
  9356. +#if REISER4_LARGE_KEY
  9357. +/*
  9358. + * conditionally compile arguments only if REISER4_LARGE_KEY is on.
  9359. + */
  9360. +#define ON_LARGE_KEY(...) __VA_ARGS__
  9361. +#else
  9362. +#define ON_LARGE_KEY(...)
  9363. +#endif
  9364. +
  9365. +/* __FS_REISER4_DEBUG_H__ */
  9366. +#endif
  9367. +
  9368. +/* Make Linus happy.
  9369. + Local variables:
  9370. + c-indentation-style: "K&R"
  9371. + mode-name: "LC"
  9372. + c-basic-offset: 8
  9373. + tab-width: 8
  9374. + fill-column: 120
  9375. + End:
  9376. +*/
  9377. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/dformat.h linux-5.16.14/fs/reiser4/dformat.h
  9378. --- linux-5.16.14.orig/fs/reiser4/dformat.h 1970-01-01 01:00:00.000000000 +0100
  9379. +++ linux-5.16.14/fs/reiser4/dformat.h 2022-03-12 13:26:19.645892712 +0100
  9380. @@ -0,0 +1,75 @@
  9381. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9382. + reiser4/README */
  9383. +
  9384. +/* Formats of on-disk data and conversion functions. */
  9385. +
  9386. +/* put all item formats in the files describing the particular items,
  9387. + our model is, everything you need to do to add an item to reiser4,
  9388. + (excepting the changes to the plugin that uses the item which go
  9389. + into the file defining that plugin), you put into one file. */
  9390. +/* Data on disk are stored in little-endian format.
  9391. + To declare fields of on-disk structures, use d8, d16, d32 and d64.
  9392. + d??tocpu() and cputod??() to convert. */
  9393. +
  9394. +#if !defined(__FS_REISER4_DFORMAT_H__)
  9395. +#define __FS_REISER4_DFORMAT_H__
  9396. +
  9397. +#include "debug.h"
  9398. +
  9399. +#include <asm/byteorder.h>
  9400. +#include <asm/unaligned.h>
  9401. +#include <linux/types.h>
  9402. +
  9403. +typedef __u8 d8;
  9404. +typedef __le16 d16;
  9405. +typedef __le32 d32;
  9406. +typedef __le64 d64;
  9407. +
  9408. +#define PACKED __attribute__((packed))
  9409. +
  9410. +/* data-type for block number */
  9411. +typedef __u64 reiser4_block_nr;
  9412. +
  9413. +static_assert(sizeof(reiser4_block_nr) == 8);
  9414. +
  9415. +/* data-type for block number on disk, disk format */
  9416. +typedef __le64 reiser4_dblock_nr;
  9417. +
  9418. +/**
  9419. + * disk_addr_eq - compare disk addresses
  9420. + * @b1: pointer to block number ot compare
  9421. + * @b2: pointer to block number ot compare
  9422. + *
  9423. + * Returns true if if disk addresses are the same
  9424. + */
  9425. +static inline int disk_addr_eq(const reiser4_block_nr * b1,
  9426. + const reiser4_block_nr * b2)
  9427. +{
  9428. + assert("nikita-1033", b1 != NULL);
  9429. + assert("nikita-1266", b2 != NULL);
  9430. +
  9431. + return !memcmp(b1, b2, sizeof *b1);
  9432. +}
  9433. +
  9434. +/* structure of master reiser4 super block */
  9435. +typedef struct reiser4_master_sb {
  9436. + char magic[16]; /* "ReIsEr4" */
  9437. + __le16 disk_plugin_id; /* id of disk layout plugin */
  9438. + __le16 blocksize;
  9439. + char uuid[16]; /* unique id */
  9440. + char label[16]; /* filesystem label */
  9441. + __le64 diskmap; /* location of the diskmap. 0 if not present */
  9442. +} reiser4_master_sb;
  9443. +
  9444. +/* __FS_REISER4_DFORMAT_H__ */
  9445. +#endif
  9446. +
  9447. +/*
  9448. + * Local variables:
  9449. + * c-indentation-style: "K&R"
  9450. + * mode-name: "LC"
  9451. + * c-basic-offset: 8
  9452. + * tab-width: 8
  9453. + * fill-column: 79
  9454. + * End:
  9455. + */
  9456. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/discard.c linux-5.16.14/fs/reiser4/discard.c
  9457. --- linux-5.16.14.orig/fs/reiser4/discard.c 1970-01-01 01:00:00.000000000 +0100
  9458. +++ linux-5.16.14/fs/reiser4/discard.c 2022-03-12 13:26:19.645892712 +0100
  9459. @@ -0,0 +1,179 @@
  9460. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9461. + * reiser4/README */
  9462. +
  9463. +/* TRIM/discard interoperation subsystem for reiser4. */
  9464. +
  9465. +/*
  9466. + * This subsystem is responsible for populating an atom's ->discard_set and
  9467. + * (later) converting it into a series of discard calls to the kernel.
  9468. + *
  9469. + * The discard is an in-kernel interface for notifying the storage
  9470. + * hardware about blocks that are being logically freed by the filesystem.
  9471. + * This is done via calling the blkdev_issue_discard() function. There are
  9472. + * restrictions on block ranges: they should constitute at least one erase unit
  9473. + * in length and be correspondingly aligned. Otherwise a discard request will
  9474. + * be ignored.
  9475. + *
  9476. + * The erase unit size is kept in struct queue_limits as discard_granularity.
  9477. + * The offset from the partition start to the first erase unit is kept in
  9478. + * struct queue_limits as discard_alignment.
  9479. + *
  9480. + * At atom level, we record numbers of all blocks that happen to be deallocated
  9481. + * during the transaction. Then we read the generated set, filter out any blocks
  9482. + * that have since been allocated again and issue discards for everything still
  9483. + * valid. This is what discard.[ch] is here for.
  9484. + *
  9485. + * However, simply iterating through the recorded extents is not enough:
  9486. + * - if a single extent is smaller than the erase unit, then this particular
  9487. + * extent won't be discarded even if it is surrounded by enough free blocks
  9488. + * to constitute a whole erase unit;
  9489. + * - we won't be able to merge small adjacent extents forming an extent long
  9490. + * enough to be discarded.
  9491. + *
  9492. + * MECHANISM:
  9493. + *
  9494. + * During the transaction deallocated extents are recorded in atom's delete
  9495. + * set. In reiser4, there are two methods to deallocate a block:
  9496. + * 1. deferred deallocation, enabled by BA_DEFER flag to reiser4_dealloc_block().
  9497. + * In this mode, blocks are stored to delete set instead of being marked free
  9498. + * immediately. After committing the transaction, the delete set is "applied"
  9499. + * by the block allocator and all these blocks are marked free in memory
  9500. + * (see reiser4_post_write_back_hook()).
  9501. + * Space management plugins also read the delete set to update on-disk
  9502. + * allocation records (see reiser4_pre_commit_hook()).
  9503. + * 2. immediate deallocation (the opposite).
  9504. + * In this mode, blocks are marked free immediately. This is used by the
  9505. + * journal subsystem to manage space used by the journal records, so these
  9506. + * allocations are not visible to the space management plugins and never hit
  9507. + * the disk.
  9508. + *
  9509. + * When discard is enabled, all immediate deallocations become deferred. This
  9510. + * is OK because journal's allocations happen after reiser4_pre_commit_hook()
  9511. + * where the on-disk space allocation records are updated. So, in this mode
  9512. + * the atom's delete set becomes "the discard set" -- list of blocks that have
  9513. + * to be considered for discarding.
  9514. + *
  9515. + * Discarding is performed before completing deferred deallocations, hence all
  9516. + * extents in the discard set are still marked as allocated and cannot contain
  9517. + * any data. Thus we can avoid any checks for blocks directly present in the
  9518. + * discard set.
  9519. + *
  9520. + * For now, we don't perform "padding" of extents to erase unit boundaries.
  9521. + * This means if extents are not aligned with the device's erase unit lattice,
  9522. + * the partial erase units at head and tail of extents are truncated by kernel
  9523. + * (in blkdev_issue_discard()).
  9524. + *
  9525. + * So, at commit time the following actions take place:
  9526. + * - delete sets are merged to form the discard set;
  9527. + * - elements of the discard set are sorted;
  9528. + * - the discard set is iterated, joining any adjacent extents;
  9529. + * - for each extent, a single call to blkdev_issue_discard() is done.
  9530. + */
  9531. +
  9532. +#include "discard.h"
  9533. +#include "context.h"
  9534. +#include "debug.h"
  9535. +#include "txnmgr.h"
  9536. +#include "super.h"
  9537. +
  9538. +#include <linux/slab.h>
  9539. +#include <linux/fs.h>
  9540. +#include <linux/blkdev.h>
  9541. +
  9542. +static int __discard_extent(struct block_device *bdev, sector_t start,
  9543. + sector_t len)
  9544. +{
  9545. + assert("intelfx-21", bdev != NULL);
  9546. +
  9547. + return blkdev_issue_discard(bdev, start, len, reiser4_ctx_gfp_mask_get(),
  9548. + 0);
  9549. +}
  9550. +
  9551. +static int discard_extent(txn_atom *atom UNUSED_ARG,
  9552. + const reiser4_block_nr* start,
  9553. + const reiser4_block_nr* len,
  9554. + void *data UNUSED_ARG)
  9555. +{
  9556. + struct super_block *sb = reiser4_get_current_sb();
  9557. + struct block_device *bdev = sb->s_bdev;
  9558. +
  9559. + sector_t extent_start_sec, extent_len_sec;
  9560. +
  9561. + const int sec_per_blk = sb->s_blocksize >> 9;
  9562. +
  9563. + /* we assume block = N * sector */
  9564. + assert("intelfx-7", sec_per_blk > 0);
  9565. +
  9566. + /* convert extent to sectors */
  9567. + extent_start_sec = *start * sec_per_blk;
  9568. + extent_len_sec = *len * sec_per_blk;
  9569. +
  9570. + /* discard the extent, don't pad it to erase unit boundaries for now */
  9571. + return __discard_extent(bdev, extent_start_sec, extent_len_sec);
  9572. +}
  9573. +
  9574. +int discard_atom(txn_atom *atom, struct list_head *processed_set)
  9575. +{
  9576. + int ret;
  9577. + struct list_head discard_set;
  9578. +
  9579. + if (!reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  9580. + spin_unlock_atom(atom);
  9581. + return 0;
  9582. + }
  9583. +
  9584. + assert("intelfx-28", atom != NULL);
  9585. + assert("intelfx-59", processed_set != NULL);
  9586. +
  9587. + if (list_empty(&atom->discard.delete_set)) {
  9588. + /* Nothing left to discard. */
  9589. + spin_unlock_atom(atom);
  9590. + return 0;
  9591. + }
  9592. +
  9593. + /* Take the delete sets from the atom in order to release atom spinlock. */
  9594. + blocknr_list_init(&discard_set);
  9595. + blocknr_list_merge(&atom->discard.delete_set, &discard_set);
  9596. + spin_unlock_atom(atom);
  9597. +
  9598. + /* Sort the discard list, joining adjacent and overlapping extents. */
  9599. + blocknr_list_sort_and_join(&discard_set);
  9600. +
  9601. + /* Perform actual dirty work. */
  9602. + ret = blocknr_list_iterator(NULL, &discard_set, &discard_extent, NULL, 0);
  9603. +
  9604. + /* Add processed extents to the temporary list. */
  9605. + blocknr_list_merge(&discard_set, processed_set);
  9606. +
  9607. + if (ret != 0) {
  9608. + return ret;
  9609. + }
  9610. +
  9611. + /* Let's do this again for any new extents in the atom's discard set. */
  9612. + return -E_REPEAT;
  9613. +}
  9614. +
  9615. +void discard_atom_post(txn_atom *atom, struct list_head *processed_set)
  9616. +{
  9617. + assert("intelfx-60", atom != NULL);
  9618. + assert("intelfx-61", processed_set != NULL);
  9619. +
  9620. + if (!reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  9621. + spin_unlock_atom(atom);
  9622. + return;
  9623. + }
  9624. +
  9625. + blocknr_list_merge(processed_set, &atom->discard.delete_set);
  9626. + spin_unlock_atom(atom);
  9627. +}
  9628. +
  9629. +/* Make Linus happy.
  9630. + Local variables:
  9631. + c-indentation-style: "K&R"
  9632. + mode-name: "LC"
  9633. + c-basic-offset: 8
  9634. + tab-width: 8
  9635. + fill-column: 120
  9636. + scroll-step: 1
  9637. + End:
  9638. +*/
  9639. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/discard.h linux-5.16.14/fs/reiser4/discard.h
  9640. --- linux-5.16.14.orig/fs/reiser4/discard.h 1970-01-01 01:00:00.000000000 +0100
  9641. +++ linux-5.16.14/fs/reiser4/discard.h 2022-03-12 13:26:19.646892714 +0100
  9642. @@ -0,0 +1,42 @@
  9643. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9644. + * reiser4/README */
  9645. +
  9646. +/* TRIM/discard interoperation subsystem for reiser4. */
  9647. +
  9648. +#if !defined(__FS_REISER4_DISCARD_H__)
  9649. +#define __FS_REISER4_DISCARD_H__
  9650. +
  9651. +#include "forward.h"
  9652. +#include "dformat.h"
  9653. +
  9654. +/**
  9655. + * Issue discard requests for all block extents recorded in @atom's delete sets,
  9656. + * if discard is enabled. The extents processed are removed from the @atom's
  9657. + * delete sets and stored in @processed_set.
  9658. + *
  9659. + * @atom must be locked on entry and is unlocked on exit.
  9660. + * @processed_set must be initialized with blocknr_list_init().
  9661. + */
  9662. +extern int discard_atom(txn_atom *atom, struct list_head *processed_set);
  9663. +
  9664. +/**
  9665. + * Splices @processed_set back to @atom's delete set.
  9666. + * Must be called after discard_atom() loop, using the same @processed_set.
  9667. + *
  9668. + * @atom must be locked on entry and is unlocked on exit.
  9669. + * @processed_set must be the same as passed to discard_atom().
  9670. + */
  9671. +extern void discard_atom_post(txn_atom *atom, struct list_head *processed_set);
  9672. +
  9673. +/* __FS_REISER4_DISCARD_H__ */
  9674. +#endif
  9675. +
  9676. +/* Make Linus happy.
  9677. + Local variables:
  9678. + c-indentation-style: "K&R"
  9679. + mode-name: "LC"
  9680. + c-basic-offset: 8
  9681. + tab-width: 8
  9682. + fill-column: 120
  9683. + End:
  9684. +*/
  9685. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/dscale.c linux-5.16.14/fs/reiser4/dscale.c
  9686. --- linux-5.16.14.orig/fs/reiser4/dscale.c 1970-01-01 01:00:00.000000000 +0100
  9687. +++ linux-5.16.14/fs/reiser4/dscale.c 2022-03-12 13:26:19.646892714 +0100
  9688. @@ -0,0 +1,192 @@
  9689. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9690. + * reiser4/README */
  9691. +
  9692. +/* Scalable on-disk integers */
  9693. +
  9694. +/*
  9695. + * Various on-disk structures contain integer-like structures. Stat-data
  9696. + * contain [yes, "data" is plural, check the dictionary] file size, link
  9697. + * count; extent unit contains extent width etc. To accommodate for general
  9698. + * case enough space is reserved to keep largest possible value. 64 bits in
  9699. + * all cases above. But in overwhelming majority of cases numbers actually
  9700. + * stored in these fields will be comparatively small and reserving 8 bytes is
  9701. + * a waste of precious disk bandwidth.
  9702. + *
  9703. + * Scalable integers are one way to solve this problem. dscale_write()
  9704. + * function stores __u64 value in the given area consuming from 1 to 9 bytes,
  9705. + * depending on the magnitude of the value supplied. dscale_read() reads value
  9706. + * previously stored by dscale_write().
  9707. + *
  9708. + * dscale_write() produces format not completely unlike of UTF: two highest
  9709. + * bits of the first byte are used to store "tag". One of 4 possible tag
  9710. + * values is chosen depending on the number being encoded:
  9711. + *
  9712. + * 0 ... 0x3f => 0 [table 1]
  9713. + * 0x40 ... 0x3fff => 1
  9714. + * 0x4000 ... 0x3fffffff => 2
  9715. + * 0x40000000 ... 0xffffffffffffffff => 3
  9716. + *
  9717. + * (see dscale_range() function)
  9718. + *
  9719. + * Values in the range 0x40000000 ... 0xffffffffffffffff require 8 full bytes
  9720. + * to be stored, so in this case there is no place in the first byte to store
  9721. + * tag. For such values tag is stored in an extra 9th byte.
  9722. + *
  9723. + * As _highest_ bits are used for the test (which is natural) scaled integers
  9724. + * are stored in BIG-ENDIAN format in contrast with the rest of reiser4 which
  9725. + * uses LITTLE-ENDIAN.
  9726. + *
  9727. + */
  9728. +
  9729. +#include "debug.h"
  9730. +#include "dscale.h"
  9731. +
  9732. +/* return tag of scaled integer stored at @address */
  9733. +static int gettag(const unsigned char *address)
  9734. +{
  9735. + /* tag is stored in two highest bits */
  9736. + return (*address) >> 6;
  9737. +}
  9738. +
  9739. +/* clear tag from value. Clear tag embedded into @value. */
  9740. +static void cleartag(__u64 *value, int tag)
  9741. +{
  9742. + /*
  9743. + * W-w-what ?!
  9744. + *
  9745. + * Actually, this is rather simple: @value passed here was read by
  9746. + * dscale_read(), converted from BIG-ENDIAN, and padded to __u64 by
  9747. + * zeroes. Tag is still stored in the highest (arithmetically)
  9748. + * non-zero bits of @value, but relative position of tag within __u64
  9749. + * depends on @tag.
  9750. + *
  9751. + * For example if @tag is 0, it's stored 2 highest bits of lowest
  9752. + * byte, and its offset (counting from lowest bit) is 8 - 2 == 6 bits.
  9753. + *
  9754. + * If tag is 1, it's stored in two highest bits of 2nd lowest byte,
  9755. + * and it's offset if (2 * 8) - 2 == 14 bits.
  9756. + *
  9757. + * See table 1 above for details.
  9758. + *
  9759. + * All these cases are captured by the formula:
  9760. + */
  9761. + *value &= ~(3 << (((1 << tag) << 3) - 2));
  9762. + /*
  9763. + * That is, clear two (3 == 0t11) bits at the offset
  9764. + *
  9765. + * 8 * (2 ^ tag) - 2,
  9766. + *
  9767. + * that is, two highest bits of (2 ^ tag)-th byte of @value.
  9768. + */
  9769. +}
  9770. +
  9771. +/* return tag for @value. See table 1 above for details. */
  9772. +static int dscale_range(__u64 value)
  9773. +{
  9774. + if (value > 0x3fffffff)
  9775. + return 3;
  9776. + if (value > 0x3fff)
  9777. + return 2;
  9778. + if (value > 0x3f)
  9779. + return 1;
  9780. + return 0;
  9781. +}
  9782. +
  9783. +/* restore value stored at @adderss by dscale_write() and return number of
  9784. + * bytes consumed */
  9785. +int dscale_read(unsigned char *address, __u64 *value)
  9786. +{
  9787. + int tag;
  9788. +
  9789. + /* read tag */
  9790. + tag = gettag(address);
  9791. + switch (tag) {
  9792. + case 3:
  9793. + /* In this case tag is stored in an extra byte, skip this byte
  9794. + * and decode value stored in the next 8 bytes.*/
  9795. + *value = __be64_to_cpu(get_unaligned((__be64 *)(address + 1)));
  9796. + /* worst case: 8 bytes for value itself plus one byte for
  9797. + * tag. */
  9798. + return 9;
  9799. + case 0:
  9800. + *value = get_unaligned(address);
  9801. + break;
  9802. + case 1:
  9803. + *value = __be16_to_cpu(get_unaligned((__be16 *)address));
  9804. + break;
  9805. + case 2:
  9806. + *value = __be32_to_cpu(get_unaligned((__be32 *)address));
  9807. + break;
  9808. + default:
  9809. + return RETERR(-EIO);
  9810. + }
  9811. + /* clear tag embedded into @value */
  9812. + cleartag(value, tag);
  9813. + /* number of bytes consumed is (2 ^ tag)---see table 1. */
  9814. + return 1 << tag;
  9815. +}
  9816. +
  9817. +/* number of bytes consumed */
  9818. +int dscale_bytes_to_read(unsigned char *address)
  9819. +{
  9820. + int tag;
  9821. +
  9822. + tag = gettag(address);
  9823. + switch (tag) {
  9824. + case 0:
  9825. + case 1:
  9826. + case 2:
  9827. + return 1 << tag;
  9828. + case 3:
  9829. + return 9;
  9830. + default:
  9831. + return RETERR(-EIO);
  9832. + }
  9833. +}
  9834. +
  9835. +/* store @value at @address and return number of bytes consumed */
  9836. +int dscale_write(unsigned char *address, __u64 value)
  9837. +{
  9838. + int tag;
  9839. + int shift;
  9840. + __be64 v;
  9841. + unsigned char *valarr;
  9842. +
  9843. + tag = dscale_range(value);
  9844. + v = __cpu_to_be64(value);
  9845. + valarr = (unsigned char *)&v;
  9846. + shift = (tag == 3) ? 1 : 0;
  9847. + memcpy(address + shift, valarr + sizeof v - (1 << tag), 1 << tag);
  9848. + *address |= (tag << 6);
  9849. + return shift + (1 << tag);
  9850. +}
  9851. +
  9852. +/* number of bytes required to store @value */
  9853. +int dscale_bytes_to_write(__u64 value)
  9854. +{
  9855. + int bytes;
  9856. +
  9857. + bytes = 1 << dscale_range(value);
  9858. + if (bytes == 8)
  9859. + ++bytes;
  9860. + return bytes;
  9861. +}
  9862. +
  9863. +/* returns true if @value and @other require the same number of bytes to be
  9864. + * stored. Used by detect when data structure (like stat-data) has to be
  9865. + * expanded or contracted. */
  9866. +int dscale_fit(__u64 value, __u64 other)
  9867. +{
  9868. + return dscale_range(value) == dscale_range(other);
  9869. +}
  9870. +
  9871. +/* Make Linus happy.
  9872. + Local variables:
  9873. + c-indentation-style: "K&R"
  9874. + mode-name: "LC"
  9875. + c-basic-offset: 8
  9876. + tab-width: 8
  9877. + fill-column: 120
  9878. + scroll-step: 1
  9879. + End:
  9880. +*/
  9881. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/dscale.h linux-5.16.14/fs/reiser4/dscale.h
  9882. --- linux-5.16.14.orig/fs/reiser4/dscale.h 1970-01-01 01:00:00.000000000 +0100
  9883. +++ linux-5.16.14/fs/reiser4/dscale.h 2022-03-12 13:26:19.646892714 +0100
  9884. @@ -0,0 +1,28 @@
  9885. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9886. + * reiser4/README */
  9887. +
  9888. +/* Scalable on-disk integers. See dscale.h for details. */
  9889. +
  9890. +#if !defined(__FS_REISER4_DSCALE_H__)
  9891. +#define __FS_REISER4_DSCALE_H__
  9892. +
  9893. +#include "dformat.h"
  9894. +
  9895. +extern int dscale_read(unsigned char *address, __u64 *value);
  9896. +extern int dscale_write(unsigned char *address, __u64 value);
  9897. +extern int dscale_bytes_to_read(unsigned char *address);
  9898. +extern int dscale_bytes_to_write(__u64 value);
  9899. +extern int dscale_fit(__u64 value, __u64 other);
  9900. +
  9901. +/* __FS_REISER4_DSCALE_H__ */
  9902. +#endif
  9903. +
  9904. +/* Make Linus happy.
  9905. + Local variables:
  9906. + c-indentation-style: "K&R"
  9907. + mode-name: "LC"
  9908. + c-basic-offset: 8
  9909. + tab-width: 8
  9910. + fill-column: 120
  9911. + End:
  9912. +*/
  9913. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/entd.c linux-5.16.14/fs/reiser4/entd.c
  9914. --- linux-5.16.14.orig/fs/reiser4/entd.c 1970-01-01 01:00:00.000000000 +0100
  9915. +++ linux-5.16.14/fs/reiser4/entd.c 2022-03-12 13:26:19.646892714 +0100
  9916. @@ -0,0 +1,361 @@
  9917. +/* Copyright 2003, 2004 by Hans Reiser, licensing governed by
  9918. + * reiser4/README */
  9919. +
  9920. +/* Ent daemon. */
  9921. +
  9922. +#include "debug.h"
  9923. +#include "txnmgr.h"
  9924. +#include "tree.h"
  9925. +#include "entd.h"
  9926. +#include "super.h"
  9927. +#include "context.h"
  9928. +#include "reiser4.h"
  9929. +#include "vfs_ops.h"
  9930. +#include "page_cache.h"
  9931. +#include "inode.h"
  9932. +
  9933. +#include <linux/sched.h> /* struct task_struct */
  9934. +#include <linux/suspend.h>
  9935. +#include <linux/kernel.h>
  9936. +#include <linux/writeback.h>
  9937. +#include <linux/time.h> /* INITIAL_JIFFIES */
  9938. +#include <linux/backing-dev.h> /* bdi_write_congested */
  9939. +#include <linux/wait.h>
  9940. +#include <linux/kthread.h>
  9941. +#include <linux/freezer.h>
  9942. +
  9943. +#define DEF_PRIORITY 12
  9944. +#define MAX_ENTD_ITERS 10
  9945. +
  9946. +static void entd_flush(struct super_block *, struct wbq *);
  9947. +static int entd(void *arg);
  9948. +
  9949. +/*
  9950. + * set ->comm field of end thread to make its state visible to the user level
  9951. + */
  9952. +#define entd_set_comm(state) \
  9953. + snprintf(current->comm, sizeof(current->comm), \
  9954. + "ent:%s%s", super->s_id, (state))
  9955. +
  9956. +/**
  9957. + * reiser4_init_entd - initialize entd context and start kernel daemon
  9958. + * @super: super block to start ent thread for
  9959. + *
  9960. + * Creates entd contexts, starts kernel thread and waits until it
  9961. + * initializes.
  9962. + */
  9963. +int reiser4_init_entd(struct super_block *super)
  9964. +{
  9965. + entd_context *ctx;
  9966. +
  9967. + assert("nikita-3104", super != NULL);
  9968. +
  9969. + ctx = get_entd_context(super);
  9970. +
  9971. + memset(ctx, 0, sizeof *ctx);
  9972. + spin_lock_init(&ctx->guard);
  9973. + init_waitqueue_head(&ctx->wait);
  9974. +#if REISER4_DEBUG
  9975. + INIT_LIST_HEAD(&ctx->flushers_list);
  9976. +#endif
  9977. + /* lists of writepage requests */
  9978. + INIT_LIST_HEAD(&ctx->todo_list);
  9979. + INIT_LIST_HEAD(&ctx->done_list);
  9980. + /* start entd */
  9981. + ctx->tsk = kthread_run(entd, super, "ent:%s", super->s_id);
  9982. + if (IS_ERR(ctx->tsk))
  9983. + return PTR_ERR(ctx->tsk);
  9984. + return 0;
  9985. +}
  9986. +
  9987. +static void put_wbq(struct wbq *rq)
  9988. +{
  9989. + iput(rq->mapping->host);
  9990. + complete(&rq->completion);
  9991. +}
  9992. +
  9993. +/* ent should be locked */
  9994. +static struct wbq *__get_wbq(entd_context * ent)
  9995. +{
  9996. + struct wbq *wbq;
  9997. +
  9998. + if (list_empty(&ent->todo_list))
  9999. + return NULL;
  10000. +
  10001. + ent->nr_todo_reqs--;
  10002. + wbq = list_entry(ent->todo_list.next, struct wbq, link);
  10003. + list_del_init(&wbq->link);
  10004. + return wbq;
  10005. +}
  10006. +
  10007. +/* ent thread function */
  10008. +static int entd(void *arg)
  10009. +{
  10010. + struct super_block *super;
  10011. + entd_context *ent;
  10012. + int done = 0;
  10013. +
  10014. + super = arg;
  10015. + /* do_fork() just copies task_struct into the new
  10016. + thread. ->fs_context shouldn't be copied of course. This shouldn't
  10017. + be a problem for the rest of the code though.
  10018. + */
  10019. + current->journal_info = NULL;
  10020. +
  10021. + ent = get_entd_context(super);
  10022. +
  10023. + while (!done) {
  10024. + try_to_freeze();
  10025. +
  10026. + spin_lock(&ent->guard);
  10027. + while (ent->nr_todo_reqs != 0) {
  10028. + struct wbq *rq;
  10029. +
  10030. + assert("", list_empty(&ent->done_list));
  10031. +
  10032. + /* take request from the queue head */
  10033. + rq = __get_wbq(ent);
  10034. + assert("", rq != NULL);
  10035. + ent->cur_request = rq;
  10036. + spin_unlock(&ent->guard);
  10037. +
  10038. + entd_set_comm("!");
  10039. + entd_flush(super, rq);
  10040. +
  10041. + put_wbq(rq);
  10042. +
  10043. + /*
  10044. + * wakeup all requestors and iput their inodes
  10045. + */
  10046. + spin_lock(&ent->guard);
  10047. + while (!list_empty(&ent->done_list)) {
  10048. + rq = list_entry(ent->done_list.next, struct wbq, link);
  10049. + list_del_init(&rq->link);
  10050. + ent->nr_done_reqs--;
  10051. + spin_unlock(&ent->guard);
  10052. + assert("", rq->written == 1);
  10053. + put_wbq(rq);
  10054. + spin_lock(&ent->guard);
  10055. + }
  10056. + }
  10057. + spin_unlock(&ent->guard);
  10058. +
  10059. + entd_set_comm(".");
  10060. +
  10061. + {
  10062. + DEFINE_WAIT(__wait);
  10063. +
  10064. + do {
  10065. + prepare_to_wait(&ent->wait, &__wait, TASK_INTERRUPTIBLE);
  10066. + if (kthread_should_stop()) {
  10067. + done = 1;
  10068. + break;
  10069. + }
  10070. + if (ent->nr_todo_reqs != 0)
  10071. + break;
  10072. + schedule();
  10073. + } while (0);
  10074. + finish_wait(&ent->wait, &__wait);
  10075. + }
  10076. + }
  10077. + BUG_ON(ent->nr_todo_reqs != 0);
  10078. + return 0;
  10079. +}
  10080. +
  10081. +/**
  10082. + * reiser4_done_entd - stop entd kernel thread
  10083. + * @super: super block to stop ent thread for
  10084. + *
  10085. + * It is called on umount. Sends stop signal to entd and wait until it handles
  10086. + * it.
  10087. + */
  10088. +void reiser4_done_entd(struct super_block *super)
  10089. +{
  10090. + entd_context *ent;
  10091. +
  10092. + assert("nikita-3103", super != NULL);
  10093. +
  10094. + ent = get_entd_context(super);
  10095. + assert("zam-1055", ent->tsk != NULL);
  10096. + kthread_stop(ent->tsk);
  10097. +}
  10098. +
  10099. +/* called at the beginning of jnode_flush to register flusher thread with ent
  10100. + * daemon */
  10101. +void reiser4_enter_flush(struct super_block *super)
  10102. +{
  10103. + entd_context *ent;
  10104. +
  10105. + assert("zam-1029", super != NULL);
  10106. + ent = get_entd_context(super);
  10107. +
  10108. + assert("zam-1030", ent != NULL);
  10109. +
  10110. + spin_lock(&ent->guard);
  10111. + ent->flushers++;
  10112. +#if REISER4_DEBUG
  10113. + list_add(&get_current_context()->flushers_link, &ent->flushers_list);
  10114. +#endif
  10115. + spin_unlock(&ent->guard);
  10116. +}
  10117. +
  10118. +/* called at the end of jnode_flush */
  10119. +void reiser4_leave_flush(struct super_block *super)
  10120. +{
  10121. + entd_context *ent;
  10122. + int wake_up_ent;
  10123. +
  10124. + assert("zam-1027", super != NULL);
  10125. + ent = get_entd_context(super);
  10126. +
  10127. + assert("zam-1028", ent != NULL);
  10128. +
  10129. + spin_lock(&ent->guard);
  10130. + ent->flushers--;
  10131. + wake_up_ent = (ent->flushers == 0 && ent->nr_todo_reqs != 0);
  10132. +#if REISER4_DEBUG
  10133. + list_del_init(&get_current_context()->flushers_link);
  10134. +#endif
  10135. + spin_unlock(&ent->guard);
  10136. + if (wake_up_ent)
  10137. + wake_up_process(ent->tsk);
  10138. +}
  10139. +
  10140. +#define ENTD_CAPTURE_APAGE_BURST SWAP_CLUSTER_MAX
  10141. +
  10142. +static void entd_flush(struct super_block *super, struct wbq *rq)
  10143. +{
  10144. + reiser4_context ctx;
  10145. +
  10146. + init_stack_context(&ctx, super);
  10147. + ctx.entd = 1;
  10148. + ctx.gfp_mask = GFP_NOFS;
  10149. +
  10150. + rq->wbc->range_start = page_offset(rq->page);
  10151. + rq->wbc->range_end = rq->wbc->range_start +
  10152. + (ENTD_CAPTURE_APAGE_BURST << PAGE_SHIFT);
  10153. +
  10154. +
  10155. + rq->mapping->a_ops->writepages(rq->mapping, rq->wbc);
  10156. +
  10157. + if (rq->wbc->nr_to_write > 0) {
  10158. + long result;
  10159. + struct bdi_writeback *wb;
  10160. + struct wb_writeback_work work = {
  10161. + .sb = super,
  10162. + .sync_mode = WB_SYNC_NONE,
  10163. + .nr_pages = LONG_MAX,
  10164. + .range_cyclic = 0,
  10165. + .reason = WB_REASON_VMSCAN,
  10166. + };
  10167. + rq->wbc->sync_mode = work.sync_mode,
  10168. + rq->wbc->range_cyclic = work.range_cyclic,
  10169. + rq->wbc->range_start = 0;
  10170. + rq->wbc->range_end = LLONG_MAX;
  10171. + /*
  10172. + * we don't need to pin superblock for writeback:
  10173. + * this is implicitly pinned by write_page_by_ent
  10174. + * (via igrab), so that shutdown_super() will wait
  10175. + * (on reiser4_put_super) for entd completion.
  10176. + */
  10177. + wb = &inode_to_bdi(rq->mapping->host)->wb;
  10178. +
  10179. + spin_lock(&wb->list_lock);
  10180. + result = generic_writeback_sb_inodes(super,
  10181. + wb,
  10182. + rq->wbc,
  10183. + &work,
  10184. + true);
  10185. + spin_unlock(&wb->list_lock);
  10186. + }
  10187. + rq->wbc->nr_to_write = ENTD_CAPTURE_APAGE_BURST;
  10188. +
  10189. + reiser4_writeout(super, rq->wbc);
  10190. + context_set_commit_async(&ctx);
  10191. + reiser4_exit_context(&ctx);
  10192. +}
  10193. +
  10194. +/**
  10195. + * write_page_by_ent - ask entd thread to flush this page as part of slum
  10196. + * @page: page to be written
  10197. + * @wbc: writeback control passed to reiser4_writepage
  10198. + *
  10199. + * Creates a request, puts it on entd list of requests, wakeups entd if
  10200. + * necessary, waits until entd completes with the request.
  10201. + */
  10202. +int write_page_by_ent(struct page *page, struct writeback_control *wbc)
  10203. +{
  10204. + struct super_block *sb;
  10205. + struct inode *inode;
  10206. + entd_context *ent;
  10207. + struct wbq rq;
  10208. +
  10209. + assert("", PageLocked(page));
  10210. + assert("", page->mapping != NULL);
  10211. +
  10212. + sb = page->mapping->host->i_sb;
  10213. + ent = get_entd_context(sb);
  10214. + assert("", ent && ent->done == 0);
  10215. +
  10216. + /*
  10217. + * we are going to unlock page and ask ent thread to write the
  10218. + * page. Re-dirty page before unlocking so that if ent thread fails to
  10219. + * write it - it will remain dirty
  10220. + */
  10221. + set_page_dirty_notag(page);
  10222. + account_page_redirty(page);
  10223. +
  10224. + /*
  10225. + * pin inode in memory, unlock page, entd_flush will iput. We can not
  10226. + * iput here becasue we can not allow delete_inode to be called here
  10227. + */
  10228. + inode = igrab(page->mapping->host);
  10229. + unlock_page(page);
  10230. + if (inode == NULL)
  10231. + /* inode is getting freed */
  10232. + return 0;
  10233. +
  10234. + /* init wbq */
  10235. + INIT_LIST_HEAD(&rq.link);
  10236. + rq.magic = WBQ_MAGIC;
  10237. + rq.wbc = wbc;
  10238. + rq.page = page;
  10239. + rq.mapping = inode->i_mapping;
  10240. + rq.node = NULL;
  10241. + rq.written = 0;
  10242. + init_completion(&rq.completion);
  10243. +
  10244. + /* add request to entd's list of writepage requests */
  10245. + spin_lock(&ent->guard);
  10246. + ent->nr_todo_reqs++;
  10247. + list_add_tail(&rq.link, &ent->todo_list);
  10248. + if (ent->nr_todo_reqs == 1)
  10249. + wake_up_process(ent->tsk);
  10250. +
  10251. + spin_unlock(&ent->guard);
  10252. +
  10253. + /* wait until entd finishes */
  10254. + wait_for_completion(&rq.completion);
  10255. +
  10256. + if (rq.written)
  10257. + /* Eventually ENTD has written the page to disk. */
  10258. + return 0;
  10259. + return 0;
  10260. +}
  10261. +
  10262. +int wbq_available(void)
  10263. +{
  10264. + struct super_block *sb = reiser4_get_current_sb();
  10265. + entd_context *ent = get_entd_context(sb);
  10266. + return ent->nr_todo_reqs;
  10267. +}
  10268. +
  10269. +/*
  10270. + * Local variables:
  10271. + * c-indentation-style: "K&R"
  10272. + * mode-name: "LC"
  10273. + * c-basic-offset: 8
  10274. + * tab-width: 8
  10275. + * fill-column: 79
  10276. + * End:
  10277. + */
  10278. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/entd.h linux-5.16.14/fs/reiser4/entd.h
  10279. --- linux-5.16.14.orig/fs/reiser4/entd.h 1970-01-01 01:00:00.000000000 +0100
  10280. +++ linux-5.16.14/fs/reiser4/entd.h 2022-03-12 13:26:19.646892714 +0100
  10281. @@ -0,0 +1,90 @@
  10282. +/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
  10283. +
  10284. +/* Ent daemon. */
  10285. +
  10286. +#ifndef __ENTD_H__
  10287. +#define __ENTD_H__
  10288. +
  10289. +#include "context.h"
  10290. +
  10291. +#include <linux/fs.h>
  10292. +#include <linux/completion.h>
  10293. +#include <linux/wait.h>
  10294. +#include <linux/spinlock.h>
  10295. +#include <linux/sched.h> /* for struct task_struct */
  10296. +
  10297. +#define WBQ_MAGIC 0x7876dc76
  10298. +
  10299. +/* write-back request. */
  10300. +struct wbq {
  10301. + int magic;
  10302. + struct list_head link; /* list head of this list is in entd context */
  10303. + struct writeback_control *wbc;
  10304. + struct page *page;
  10305. + struct address_space *mapping;
  10306. + struct completion completion;
  10307. + jnode *node; /* set if ent thread captured requested page */
  10308. + int written; /* set if ent thread wrote requested page */
  10309. +};
  10310. +
  10311. +/* ent-thread context. This is used to synchronize starting/stopping ent
  10312. + * threads. */
  10313. +typedef struct entd_context {
  10314. + /* wait queue that ent thread waits on for more work. It's
  10315. + * signaled by write_page_by_ent(). */
  10316. + wait_queue_head_t wait;
  10317. + /* spinlock protecting other fields */
  10318. + spinlock_t guard;
  10319. + /* ent thread */
  10320. + struct task_struct *tsk;
  10321. + /* set to indicate that ent thread should leave. */
  10322. + int done;
  10323. + /* counter of active flushers */
  10324. + int flushers;
  10325. + /*
  10326. + * when reiser4_writepage asks entd to write a page - it adds struct
  10327. + * wbq to this list
  10328. + */
  10329. + struct list_head todo_list;
  10330. + /* number of elements on the above list */
  10331. + int nr_todo_reqs;
  10332. +
  10333. + struct wbq *cur_request;
  10334. + /*
  10335. + * when entd writes a page it moves write-back request from todo_list
  10336. + * to done_list. This list is used at the end of entd iteration to
  10337. + * wakeup requestors and iput inodes.
  10338. + */
  10339. + struct list_head done_list;
  10340. + /* number of elements on the above list */
  10341. + int nr_done_reqs;
  10342. +
  10343. +#if REISER4_DEBUG
  10344. + /* list of all active flushers */
  10345. + struct list_head flushers_list;
  10346. +#endif
  10347. +} entd_context;
  10348. +
  10349. +extern int reiser4_init_entd(struct super_block *);
  10350. +extern void reiser4_done_entd(struct super_block *);
  10351. +
  10352. +extern void reiser4_enter_flush(struct super_block *);
  10353. +extern void reiser4_leave_flush(struct super_block *);
  10354. +
  10355. +extern int write_page_by_ent(struct page *, struct writeback_control *);
  10356. +extern int wbq_available(void);
  10357. +extern void ent_writes_page(struct super_block *, struct page *);
  10358. +
  10359. +extern jnode *get_jnode_by_wbq(struct super_block *, struct wbq *);
  10360. +/* __ENTD_H__ */
  10361. +#endif
  10362. +
  10363. +/* Make Linus happy.
  10364. + Local variables:
  10365. + c-indentation-style: "K&R"
  10366. + mode-name: "LC"
  10367. + c-basic-offset: 8
  10368. + tab-width: 8
  10369. + fill-column: 120
  10370. + End:
  10371. +*/
  10372. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/eottl.c linux-5.16.14/fs/reiser4/eottl.c
  10373. --- linux-5.16.14.orig/fs/reiser4/eottl.c 1970-01-01 01:00:00.000000000 +0100
  10374. +++ linux-5.16.14/fs/reiser4/eottl.c 2022-03-12 13:26:19.647892716 +0100
  10375. @@ -0,0 +1,510 @@
  10376. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  10377. + reiser4/README */
  10378. +
  10379. +#include "forward.h"
  10380. +#include "debug.h"
  10381. +#include "key.h"
  10382. +#include "coord.h"
  10383. +#include "plugin/item/item.h"
  10384. +#include "plugin/node/node.h"
  10385. +#include "znode.h"
  10386. +#include "block_alloc.h"
  10387. +#include "tree_walk.h"
  10388. +#include "tree_mod.h"
  10389. +#include "carry.h"
  10390. +#include "tree.h"
  10391. +#include "super.h"
  10392. +
  10393. +#include <linux/types.h> /* for __u?? */
  10394. +
  10395. +/*
  10396. + * Extents on the twig level (EOTTL) handling.
  10397. + *
  10398. + * EOTTL poses some problems to the tree traversal, that are better explained
  10399. + * by example.
  10400. + *
  10401. + * Suppose we have block B1 on the twig level with the following items:
  10402. + *
  10403. + * 0. internal item I0 with key (0:0:0:0) (locality, key-type, object-id,
  10404. + * offset)
  10405. + * 1. extent item E1 with key (1:4:100:0), having 10 blocks of 4k each
  10406. + * 2. internal item I2 with key (10:0:0:0)
  10407. + *
  10408. + * We are trying to insert item with key (5:0:0:0). Lookup finds node B1, and
  10409. + * then intra-node lookup is done. This lookup finished on the E1, because the
  10410. + * key we are looking for is larger than the key of E1 and is smaller than key
  10411. + * the of I2.
  10412. + *
  10413. + * Here search is stuck.
  10414. + *
  10415. + * After some thought it is clear what is wrong here: extents on the twig level
  10416. + * break some basic property of the *search* tree (on the pretext, that they
  10417. + * restore property of balanced tree).
  10418. + *
  10419. + * Said property is the following: if in the internal node of the search tree
  10420. + * we have [ ... Key1 Pointer Key2 ... ] then, all data that are or will be
  10421. + * keyed in the tree with the Key such that Key1 <= Key < Key2 are accessible
  10422. + * through the Pointer.
  10423. + *
  10424. + * This is not true, when Pointer is Extent-Pointer, simply because extent
  10425. + * cannot expand indefinitely to the right to include any item with
  10426. + *
  10427. + * Key1 <= Key <= Key2.
  10428. + *
  10429. + * For example, our E1 extent is only responsible for the data with keys
  10430. + *
  10431. + * (1:4:100:0) <= key <= (1:4:100:0xffffffffffffffff), and
  10432. + *
  10433. + * so, key range
  10434. + *
  10435. + * ( (1:4:100:0xffffffffffffffff), (10:0:0:0) )
  10436. + *
  10437. + * is orphaned: there is no way to get there from the tree root.
  10438. + *
  10439. + * In other words, extent pointers are different than normal child pointers as
  10440. + * far as search tree is concerned, and this creates such problems.
  10441. + *
  10442. + * Possible solution for this problem is to insert our item into node pointed
  10443. + * to by I2. There are some problems through:
  10444. + *
  10445. + * (1) I2 can be in a different node.
  10446. + * (2) E1 can be immediately followed by another extent E2.
  10447. + *
  10448. + * (1) is solved by calling reiser4_get_right_neighbor() and accounting
  10449. + * for locks/coords as necessary.
  10450. + *
  10451. + * (2) is more complex. Solution here is to insert new empty leaf node and
  10452. + * insert internal item between E1 and E2 pointing to said leaf node. This is
  10453. + * further complicated by possibility that E2 is in a different node, etc.
  10454. + *
  10455. + * Problems:
  10456. + *
  10457. + * (1) if there was internal item I2 immediately on the right of an extent E1
  10458. + * we and we decided to insert new item S1 into node N2 pointed to by I2, then
  10459. + * key of S1 will be less than smallest key in the N2. Normally, search key
  10460. + * checks that key we are looking for is in the range of keys covered by the
  10461. + * node key is being looked in. To work around of this situation, while
  10462. + * preserving useful consistency check new flag CBK_TRUST_DK was added to the
  10463. + * cbk falgs bitmask. This flag is automatically set on entrance to the
  10464. + * coord_by_key() and is only cleared when we are about to enter situation
  10465. + * described above.
  10466. + *
  10467. + * (2) If extent E1 is immediately followed by another extent E2 and we are
  10468. + * searching for the key that is between E1 and E2 we only have to insert new
  10469. + * empty leaf node when coord_by_key was called for insertion, rather than just
  10470. + * for lookup. To distinguish these cases, new flag CBK_FOR_INSERT was added to
  10471. + * the cbk falgs bitmask. This flag is automatically set by coord_by_key calls
  10472. + * performed by insert_by_key() and friends.
  10473. + *
  10474. + * (3) Insertion of new empty leaf node (possibly) requires balancing. In any
  10475. + * case it requires modification of node content which is only possible under
  10476. + * write lock. It may well happen that we only have read lock on the node where
  10477. + * new internal pointer is to be inserted (common case: lookup of non-existent
  10478. + * stat-data that fells between two extents). If only read lock is held, tree
  10479. + * traversal is restarted with lock_level modified so that next time we hit
  10480. + * this problem, write lock will be held. Once we have write lock, balancing
  10481. + * will be performed.
  10482. + */
  10483. +
  10484. +/**
  10485. + * is_next_item_internal - check whether next item is internal
  10486. + * @coord: coordinate of extent item in twig node
  10487. + * @key: search key
  10488. + * @lh: twig node lock handle
  10489. + *
  10490. + * Looks at the unit next to @coord. If it is an internal one - 1 is returned,
  10491. + * @coord is set to that unit. If that unit is in right neighbor, @lh is moved
  10492. + * to that node, @coord is set to its first unit. If next item is not internal
  10493. + * or does not exist then 0 is returned, @coord and @lh are left unchanged. 2
  10494. + * is returned if search restart has to be done.
  10495. + */
  10496. +static int
  10497. +is_next_item_internal(coord_t *coord, const reiser4_key * key,
  10498. + lock_handle * lh)
  10499. +{
  10500. + coord_t next;
  10501. + lock_handle rn;
  10502. + int result;
  10503. +
  10504. + coord_dup(&next, coord);
  10505. + if (coord_next_unit(&next) == 0) {
  10506. + /* next unit is in this node */
  10507. + if (item_is_internal(&next)) {
  10508. + coord_dup(coord, &next);
  10509. + return 1;
  10510. + }
  10511. + assert("vs-3", item_is_extent(&next));
  10512. + return 0;
  10513. + }
  10514. +
  10515. + /*
  10516. + * next unit either does not exist or is in right neighbor. If it is in
  10517. + * right neighbor we have to check right delimiting key because
  10518. + * concurrent thread could get their first and insert item with a key
  10519. + * smaller than @key
  10520. + */
  10521. + read_lock_dk(current_tree);
  10522. + result = keycmp(key, znode_get_rd_key(coord->node));
  10523. + read_unlock_dk(current_tree);
  10524. + assert("vs-6", result != EQUAL_TO);
  10525. + if (result == GREATER_THAN)
  10526. + return 2;
  10527. +
  10528. + /* lock right neighbor */
  10529. + init_lh(&rn);
  10530. + result = reiser4_get_right_neighbor(&rn, coord->node,
  10531. + znode_is_wlocked(coord->node) ?
  10532. + ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
  10533. + GN_CAN_USE_UPPER_LEVELS);
  10534. + if (result == -E_NO_NEIGHBOR) {
  10535. + /* we are on the rightmost edge of the tree */
  10536. + done_lh(&rn);
  10537. + return 0;
  10538. + }
  10539. +
  10540. + if (result) {
  10541. + assert("vs-4", result < 0);
  10542. + done_lh(&rn);
  10543. + return result;
  10544. + }
  10545. +
  10546. + /*
  10547. + * check whether concurrent thread managed to insert item with a key
  10548. + * smaller than @key
  10549. + */
  10550. + read_lock_dk(current_tree);
  10551. + result = keycmp(key, znode_get_ld_key(rn.node));
  10552. + read_unlock_dk(current_tree);
  10553. + assert("vs-6", result != EQUAL_TO);
  10554. + if (result == GREATER_THAN) {
  10555. + done_lh(&rn);
  10556. + return 2;
  10557. + }
  10558. +
  10559. + result = zload(rn.node);
  10560. + if (result) {
  10561. + assert("vs-5", result < 0);
  10562. + done_lh(&rn);
  10563. + return result;
  10564. + }
  10565. +
  10566. + coord_init_first_unit(&next, rn.node);
  10567. + if (item_is_internal(&next)) {
  10568. + /*
  10569. + * next unit is in right neighbor and it is an unit of internal
  10570. + * item. Unlock coord->node. Move @lh to right neighbor. @coord
  10571. + * is set to the first unit of right neighbor.
  10572. + */
  10573. + coord_dup(coord, &next);
  10574. + zrelse(rn.node);
  10575. + done_lh(lh);
  10576. + move_lh(lh, &rn);
  10577. + return 1;
  10578. + }
  10579. +
  10580. + /*
  10581. + * next unit is unit of extent item. Return without chaning @lh and
  10582. + * @coord.
  10583. + */
  10584. + assert("vs-6", item_is_extent(&next));
  10585. + zrelse(rn.node);
  10586. + done_lh(&rn);
  10587. + return 0;
  10588. +}
  10589. +
  10590. +/**
  10591. + * rd_key - calculate key of an item next to the given one
  10592. + * @coord: position in a node
  10593. + * @key: storage for result key
  10594. + *
  10595. + * @coord is set between items or after the last item in a node. Calculate key
  10596. + * of item to the right of @coord.
  10597. + */
  10598. +static reiser4_key *rd_key(const coord_t *coord, reiser4_key *key)
  10599. +{
  10600. + coord_t dup;
  10601. +
  10602. + assert("nikita-2281", coord_is_between_items(coord));
  10603. + coord_dup(&dup, coord);
  10604. +
  10605. + if (coord_set_to_right(&dup) == 0)
  10606. + /* next item is in this node. Return its key. */
  10607. + unit_key_by_coord(&dup, key);
  10608. + else {
  10609. + /*
  10610. + * next item either does not exist or is in right
  10611. + * neighbor. Return znode's right delimiting key.
  10612. + */
  10613. + read_lock_dk(current_tree);
  10614. + *key = *znode_get_rd_key(coord->node);
  10615. + read_unlock_dk(current_tree);
  10616. + }
  10617. + return key;
  10618. +}
  10619. +
  10620. +/**
  10621. + * add_empty_leaf - insert empty leaf between two extents
  10622. + * @insert_coord: position in twig node between two extents
  10623. + * @lh: twig node lock handle
  10624. + * @key: left delimiting key of new node
  10625. + * @rdkey: right delimiting key of new node
  10626. + *
  10627. + * Inserts empty leaf node between two extent items. It is necessary when we
  10628. + * have to insert an item on leaf level between two extents (items on the twig
  10629. + * level).
  10630. + */
  10631. +static int
  10632. +add_empty_leaf(coord_t *insert_coord, lock_handle *lh,
  10633. + const reiser4_key *key, const reiser4_key *rdkey)
  10634. +{
  10635. + int result;
  10636. + carry_pool *pool;
  10637. + carry_level *todo;
  10638. + reiser4_item_data *item;
  10639. + carry_insert_data *cdata;
  10640. + carry_op *op;
  10641. + znode *node;
  10642. + reiser4_tree *tree;
  10643. +
  10644. + assert("vs-49827", znode_contains_key_lock(insert_coord->node, key));
  10645. + tree = znode_get_tree(insert_coord->node);
  10646. + node = reiser4_new_node(insert_coord->node, LEAF_LEVEL);
  10647. + if (IS_ERR(node))
  10648. + return PTR_ERR(node);
  10649. +
  10650. + /* setup delimiting keys for node being inserted */
  10651. + write_lock_dk(tree);
  10652. + znode_set_ld_key(node, key);
  10653. + znode_set_rd_key(node, rdkey);
  10654. + ON_DEBUG(node->creator = current);
  10655. + ON_DEBUG(node->first_key = *key);
  10656. + write_unlock_dk(tree);
  10657. +
  10658. + ZF_SET(node, JNODE_ORPHAN);
  10659. +
  10660. + /*
  10661. + * allocate carry_pool, 3 carry_level-s, reiser4_item_data and
  10662. + * carry_insert_data
  10663. + */
  10664. + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) +
  10665. + sizeof(*item) + sizeof(*cdata));
  10666. + if (IS_ERR(pool))
  10667. + return PTR_ERR(pool);
  10668. + todo = (carry_level *) (pool + 1);
  10669. + init_carry_level(todo, pool);
  10670. +
  10671. + item = (reiser4_item_data *) (todo + 3);
  10672. + cdata = (carry_insert_data *) (item + 1);
  10673. +
  10674. + op = reiser4_post_carry(todo, COP_INSERT, insert_coord->node, 0);
  10675. + if (!IS_ERR(op)) {
  10676. + cdata->coord = insert_coord;
  10677. + cdata->key = key;
  10678. + cdata->data = item;
  10679. + op->u.insert.d = cdata;
  10680. + op->u.insert.type = COPT_ITEM_DATA;
  10681. + build_child_ptr_data(node, item);
  10682. + item->arg = NULL;
  10683. + /* have @insert_coord to be set at inserted item after
  10684. + insertion is done */
  10685. + todo->track_type = CARRY_TRACK_CHANGE;
  10686. + todo->tracked = lh;
  10687. +
  10688. + result = reiser4_carry(todo, NULL);
  10689. + if (result == 0) {
  10690. + /*
  10691. + * pin node in memory. This is necessary for
  10692. + * znode_make_dirty() below.
  10693. + */
  10694. + result = zload(node);
  10695. + if (result == 0) {
  10696. + lock_handle local_lh;
  10697. +
  10698. + /*
  10699. + * if we inserted new child into tree we have
  10700. + * to mark it dirty so that flush will be able
  10701. + * to process it.
  10702. + */
  10703. + init_lh(&local_lh);
  10704. + result = longterm_lock_znode(&local_lh, node,
  10705. + ZNODE_WRITE_LOCK,
  10706. + ZNODE_LOCK_LOPRI);
  10707. + if (result == 0) {
  10708. + znode_make_dirty(node);
  10709. +
  10710. + /*
  10711. + * when internal item pointing to @node
  10712. + * was inserted into twig node
  10713. + * create_hook_internal did not connect
  10714. + * it properly because its right
  10715. + * neighbor was not known. Do it
  10716. + * here
  10717. + */
  10718. + write_lock_tree(tree);
  10719. + assert("nikita-3312",
  10720. + znode_is_right_connected(node));
  10721. + assert("nikita-2984",
  10722. + node->right == NULL);
  10723. + ZF_CLR(node, JNODE_RIGHT_CONNECTED);
  10724. + write_unlock_tree(tree);
  10725. + result =
  10726. + connect_znode(insert_coord, node);
  10727. + ON_DEBUG(if (result == 0) check_dkeys(node););
  10728. +
  10729. + done_lh(lh);
  10730. + move_lh(lh, &local_lh);
  10731. + assert("vs-1676", node_is_empty(node));
  10732. + coord_init_first_unit(insert_coord,
  10733. + node);
  10734. + } else {
  10735. + warning("nikita-3136",
  10736. + "Cannot lock child");
  10737. + }
  10738. + done_lh(&local_lh);
  10739. + zrelse(node);
  10740. + }
  10741. + }
  10742. + } else
  10743. + result = PTR_ERR(op);
  10744. + zput(node);
  10745. + done_carry_pool(pool);
  10746. + return result;
  10747. +}
  10748. +
  10749. +/**
  10750. + * handle_eottl - handle extent-on-the-twig-level cases in tree traversal
  10751. + * @h: search handle
  10752. + * @outcome: flag saying whether search has to restart or is done
  10753. + *
  10754. + * Handles search on twig level. If this function completes search itself then
  10755. + * it returns 1. If search has to go one level down then 0 is returned. If
  10756. + * error happens then LOOKUP_DONE is returned via @outcome and error code is
  10757. + * saved in @h->result.
  10758. + */
  10759. +int handle_eottl(cbk_handle *h, int *outcome)
  10760. +{
  10761. + int result;
  10762. + reiser4_key key;
  10763. + coord_t *coord;
  10764. +
  10765. + coord = h->coord;
  10766. +
  10767. + if (h->level != TWIG_LEVEL ||
  10768. + (coord_is_existing_item(coord) && item_is_internal(coord))) {
  10769. + /* Continue to traverse tree downward. */
  10770. + return 0;
  10771. + }
  10772. +
  10773. + /*
  10774. + * make sure that @h->coord is set to twig node and that it is either
  10775. + * set to extent item or after extent item
  10776. + */
  10777. + assert("vs-356", h->level == TWIG_LEVEL);
  10778. + assert("vs-357", ({
  10779. + coord_t lcoord;
  10780. + coord_dup(&lcoord, coord);
  10781. + check_me("vs-733", coord_set_to_left(&lcoord) == 0);
  10782. + item_is_extent(&lcoord);
  10783. + }
  10784. + ));
  10785. +
  10786. + if (*outcome == NS_FOUND) {
  10787. + /* we have found desired key on twig level in extent item */
  10788. + h->result = CBK_COORD_FOUND;
  10789. + *outcome = LOOKUP_DONE;
  10790. + return 1;
  10791. + }
  10792. +
  10793. + if (!(h->flags & CBK_FOR_INSERT)) {
  10794. + /* tree traversal is not for insertion. Just return
  10795. + CBK_COORD_NOTFOUND. */
  10796. + h->result = CBK_COORD_NOTFOUND;
  10797. + *outcome = LOOKUP_DONE;
  10798. + return 1;
  10799. + }
  10800. +
  10801. + /* take a look at the item to the right of h -> coord */
  10802. + result = is_next_item_internal(coord, h->key, h->active_lh);
  10803. + if (unlikely(result < 0)) {
  10804. + h->error = "get_right_neighbor failed";
  10805. + h->result = result;
  10806. + *outcome = LOOKUP_DONE;
  10807. + return 1;
  10808. + }
  10809. + if (result == 0) {
  10810. + /*
  10811. + * item to the right is also an extent one. Allocate a new node
  10812. + * and insert pointer to it after item h -> coord.
  10813. + *
  10814. + * This is a result of extents being located at the twig
  10815. + * level. For explanation, see comment just above
  10816. + * is_next_item_internal().
  10817. + */
  10818. + znode *loaded;
  10819. +
  10820. + if (cbk_lock_mode(h->level, h) != ZNODE_WRITE_LOCK) {
  10821. + /*
  10822. + * we got node read locked, restart coord_by_key to
  10823. + * have write lock on twig level
  10824. + */
  10825. + h->lock_level = TWIG_LEVEL;
  10826. + h->lock_mode = ZNODE_WRITE_LOCK;
  10827. + *outcome = LOOKUP_REST;
  10828. + return 1;
  10829. + }
  10830. +
  10831. + loaded = coord->node;
  10832. + result =
  10833. + add_empty_leaf(coord, h->active_lh, h->key,
  10834. + rd_key(coord, &key));
  10835. + if (result) {
  10836. + h->error = "could not add empty leaf";
  10837. + h->result = result;
  10838. + *outcome = LOOKUP_DONE;
  10839. + return 1;
  10840. + }
  10841. + /* added empty leaf is locked (h->active_lh), its parent node
  10842. + is unlocked, h->coord is set as EMPTY */
  10843. + assert("vs-13", coord->between == EMPTY_NODE);
  10844. + assert("vs-14", znode_is_write_locked(coord->node));
  10845. + assert("vs-15",
  10846. + WITH_DATA(coord->node, node_is_empty(coord->node)));
  10847. + assert("vs-16", jnode_is_leaf(ZJNODE(coord->node)));
  10848. + assert("vs-17", coord->node == h->active_lh->node);
  10849. + *outcome = LOOKUP_DONE;
  10850. + h->result = CBK_COORD_NOTFOUND;
  10851. + return 1;
  10852. + } else if (result == 1) {
  10853. + /*
  10854. + * this is special case mentioned in the comment on
  10855. + * tree.h:cbk_flags. We have found internal item immediately on
  10856. + * the right of extent, and we are going to insert new item
  10857. + * there. Key of item we are going to insert is smaller than
  10858. + * leftmost key in the node pointed to by said internal item
  10859. + * (otherwise search wouldn't come to the extent in the first
  10860. + * place).
  10861. + *
  10862. + * This is a result of extents being located at the twig
  10863. + * level. For explanation, see comment just above
  10864. + * is_next_item_internal().
  10865. + */
  10866. + h->flags &= ~CBK_TRUST_DK;
  10867. + } else {
  10868. + assert("vs-8", result == 2);
  10869. + *outcome = LOOKUP_REST;
  10870. + return 1;
  10871. + }
  10872. + assert("vs-362", WITH_DATA(coord->node, item_is_internal(coord)));
  10873. + return 0;
  10874. +}
  10875. +
  10876. +/*
  10877. + * Local variables:
  10878. + * c-indentation-style: "K&R"
  10879. + * mode-name: "LC"
  10880. + * c-basic-offset: 8
  10881. + * tab-width: 8
  10882. + * fill-column: 120
  10883. + * scroll-step: 1
  10884. + * End:
  10885. + */
  10886. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/estimate.c linux-5.16.14/fs/reiser4/estimate.c
  10887. --- linux-5.16.14.orig/fs/reiser4/estimate.c 1970-01-01 01:00:00.000000000 +0100
  10888. +++ linux-5.16.14/fs/reiser4/estimate.c 2022-03-12 13:26:19.647892716 +0100
  10889. @@ -0,0 +1,129 @@
  10890. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  10891. + reiser4/README */
  10892. +
  10893. +#include "debug.h"
  10894. +#include "dformat.h"
  10895. +#include "tree.h"
  10896. +#include "carry.h"
  10897. +#include "inode.h"
  10898. +#include "plugin/cluster.h"
  10899. +#include "plugin/item/ctail.h"
  10900. +
  10901. +/* This returns how many nodes might get dirty and added nodes if @children
  10902. + nodes are dirtied
  10903. +
  10904. + Amount of internals which will get dirty or get allocated we estimate as 5%
  10905. + of the childs + 1 balancing. 1 balancing is 2 neighbours, 2 new blocks and
  10906. + the current block on the leaf level, 2 neighbour nodes + the current (or 1
  10907. + neighbour and 1 new and the current) on twig level, 2 neighbour nodes on
  10908. + upper levels and 1 for a new root. So 5 for leaf level, 3 for twig level,
  10909. + 2 on upper + 1 for root.
  10910. +
  10911. + Do not calculate the current node of the lowest level here - this is overhead
  10912. + only.
  10913. +
  10914. + children is almost always 1 here. Exception is flow insertion
  10915. +*/
  10916. +static reiser4_block_nr
  10917. +max_balance_overhead(reiser4_block_nr childen, tree_level tree_height)
  10918. +{
  10919. + reiser4_block_nr ten_percent;
  10920. +
  10921. + ten_percent = ((103 * childen) >> 10);
  10922. +
  10923. + /* If we have too many balancings at the time, tree height can raise on
  10924. + more then 1. Assume that if tree_height is 5, it can raise on 1 only.
  10925. + */
  10926. + return ((tree_height < 5 ? 5 : tree_height) * 2 + (4 + ten_percent));
  10927. +}
  10928. +
  10929. +/* this returns maximal possible number of nodes which can be modified plus
  10930. + number of new nodes which can be required to perform insertion of one item
  10931. + into the tree */
  10932. +/* it is only called when tree height changes, or gets initialized */
  10933. +reiser4_block_nr calc_estimate_one_insert(tree_level height)
  10934. +{
  10935. + return 1 + max_balance_overhead(1, height);
  10936. +}
  10937. +
  10938. +reiser4_block_nr estimate_one_insert_item(reiser4_tree * tree)
  10939. +{
  10940. + return tree->estimate_one_insert;
  10941. +}
  10942. +
  10943. +/* this returns maximal possible number of nodes which can be modified plus
  10944. + number of new nodes which can be required to perform insertion of one unit
  10945. + into an item in the tree */
  10946. +reiser4_block_nr estimate_one_insert_into_item(reiser4_tree * tree)
  10947. +{
  10948. + /* estimate insert into item just like item insertion */
  10949. + return tree->estimate_one_insert;
  10950. +}
  10951. +
  10952. +reiser4_block_nr estimate_one_item_removal(reiser4_tree * tree)
  10953. +{
  10954. + /* on item removal reiser4 does not try to pack nodes more complact, so,
  10955. + only one node may be dirtied on leaf level */
  10956. + return tree->estimate_one_insert;
  10957. +}
  10958. +
  10959. +/* on leaf level insert_flow may add CARRY_FLOW_NEW_NODES_LIMIT new nodes and
  10960. + dirty 3 existing nodes (insert point and both its neighbors).
  10961. + Max_balance_overhead should estimate number of blocks which may change/get
  10962. + added on internal levels */
  10963. +reiser4_block_nr estimate_insert_flow(tree_level height)
  10964. +{
  10965. + return 3 + CARRY_FLOW_NEW_NODES_LIMIT + max_balance_overhead(3 +
  10966. + CARRY_FLOW_NEW_NODES_LIMIT,
  10967. + height);
  10968. +}
  10969. +
  10970. +/* returnes max number of nodes can be occupied by disk cluster */
  10971. +static reiser4_block_nr estimate_cluster(struct inode *inode, int unprepped)
  10972. +{
  10973. + int per_cluster;
  10974. + per_cluster = (unprepped ? 1 : cluster_nrpages(inode));
  10975. + return 3 + per_cluster +
  10976. + max_balance_overhead(3 + per_cluster,
  10977. + REISER4_MAX_ZTREE_HEIGHT);
  10978. +}
  10979. +
  10980. +/* how many nodes might get dirty and added
  10981. + during insertion of a disk cluster */
  10982. +reiser4_block_nr estimate_insert_cluster(struct inode *inode)
  10983. +{
  10984. + return estimate_cluster(inode, 1); /* 24 */
  10985. +}
  10986. +
  10987. +/* how many nodes might get dirty and added
  10988. + during update of a (prepped or unprepped) disk cluster */
  10989. +reiser4_block_nr estimate_update_cluster(struct inode *inode)
  10990. +{
  10991. + return estimate_cluster(inode, 0); /* 44, for 64K-cluster */
  10992. +}
  10993. +
  10994. +/* How many nodes occupied by a disk cluster might get dirty.
  10995. + Note that this estimation is not precise (i.e. disk cluster
  10996. + can occupy more nodes).
  10997. + Q: Why we don't use precise estimation?
  10998. + A: 1.Because precise estimation is fairly bad: 65536 nodes
  10999. + for 64K logical cluster, it means 256M of dead space on
  11000. + a partition
  11001. + 2.It is a very rare case when disk cluster occupies more
  11002. + nodes then this estimation returns.
  11003. +*/
  11004. +reiser4_block_nr estimate_dirty_cluster(struct inode *inode)
  11005. +{
  11006. + return cluster_nrpages(inode) + 4;
  11007. +}
  11008. +
  11009. +/* Make Linus happy.
  11010. + Local variables:
  11011. + c-indentation-style: "K&R"
  11012. + mode-name: "LC"
  11013. + c-basic-offset: 8
  11014. + tab-width: 8
  11015. + fill-column: 120
  11016. + scroll-step: 1
  11017. + End:
  11018. +*/
  11019. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/export_ops.c linux-5.16.14/fs/reiser4/export_ops.c
  11020. --- linux-5.16.14.orig/fs/reiser4/export_ops.c 1970-01-01 01:00:00.000000000 +0100
  11021. +++ linux-5.16.14/fs/reiser4/export_ops.c 2022-03-12 13:26:19.647892716 +0100
  11022. @@ -0,0 +1,325 @@
  11023. +/* Copyright 2005 by Hans Reiser, licensing governed by
  11024. + * reiser4/README */
  11025. +
  11026. +#include "inode.h"
  11027. +#include "plugin/plugin.h"
  11028. +
  11029. +/*
  11030. + * Supported file-handle types
  11031. + */
  11032. +typedef enum {
  11033. + FH_WITH_PARENT = 0x10, /* file handle with parent */
  11034. + FH_WITHOUT_PARENT = 0x11 /* file handle without parent */
  11035. +} reiser4_fhtype;
  11036. +
  11037. +#define NFSERROR (255)
  11038. +
  11039. +/* initialize place-holder for object */
  11040. +static void object_on_wire_init(reiser4_object_on_wire *o)
  11041. +{
  11042. + o->plugin = NULL;
  11043. +}
  11044. +
  11045. +/* finish with @o */
  11046. +static void object_on_wire_done(reiser4_object_on_wire *o)
  11047. +{
  11048. + if (o->plugin != NULL)
  11049. + o->plugin->wire.done(o);
  11050. +}
  11051. +
  11052. +/*
  11053. + * read serialized object identity from @addr and store information about
  11054. + * object in @obj. This is dual to encode_inode().
  11055. + */
  11056. +static char *decode_inode(struct super_block *s, char *addr,
  11057. + reiser4_object_on_wire * obj)
  11058. +{
  11059. + file_plugin *fplug;
  11060. +
  11061. + /* identifier of object plugin is stored in the first two bytes,
  11062. + * followed by... */
  11063. + fplug = file_plugin_by_disk_id(reiser4_get_tree(s), (d16 *) addr);
  11064. + if (fplug != NULL) {
  11065. + addr += sizeof(d16);
  11066. + obj->plugin = fplug;
  11067. + assert("nikita-3520", fplug->wire.read != NULL);
  11068. + /* plugin specific encoding of object identity. */
  11069. + addr = fplug->wire.read(addr, obj);
  11070. + } else
  11071. + addr = ERR_PTR(RETERR(-EINVAL));
  11072. + return addr;
  11073. +}
  11074. +
  11075. +static struct dentry *reiser4_get_dentry(struct super_block *super,
  11076. + void *data);
  11077. +/**
  11078. + * reiser4_decode_fh: decode on-wire object - helper function
  11079. + * for fh_to_dentry, fh_to_parent export operations;
  11080. + * @super: super block;
  11081. + * @addr: onwire object to be decoded;
  11082. + *
  11083. + * Returns dentry referring to the object being decoded.
  11084. + */
  11085. +static struct dentry *reiser4_decode_fh(struct super_block * super,
  11086. + char * addr)
  11087. +{
  11088. + reiser4_object_on_wire object;
  11089. +
  11090. + object_on_wire_init(&object);
  11091. +
  11092. + addr = decode_inode(super, addr, &object);
  11093. + if (!IS_ERR(addr)) {
  11094. + struct dentry *d;
  11095. + d = reiser4_get_dentry(super, &object);
  11096. + if (d != NULL && !IS_ERR(d))
  11097. + /* FIXME check for -ENOMEM */
  11098. + reiser4_get_dentry_fsdata(d)->stateless = 1;
  11099. + addr = (char *)d;
  11100. + }
  11101. + object_on_wire_done(&object);
  11102. + return (void *)addr;
  11103. +}
  11104. +
  11105. +static struct dentry *reiser4_fh_to_dentry(struct super_block *sb,
  11106. + struct fid *fid,
  11107. + int fh_len, int fh_type)
  11108. +{
  11109. + reiser4_context *ctx;
  11110. + struct dentry *d;
  11111. +
  11112. + assert("edward-1536",
  11113. + fh_type == FH_WITH_PARENT || fh_type == FH_WITHOUT_PARENT);
  11114. +
  11115. + ctx = reiser4_init_context(sb);
  11116. + if (IS_ERR(ctx))
  11117. + return (struct dentry *)ctx;
  11118. +
  11119. + d = reiser4_decode_fh(sb, (char *)fid->raw);
  11120. +
  11121. + reiser4_exit_context(ctx);
  11122. + return d;
  11123. +}
  11124. +
  11125. +static struct dentry *reiser4_fh_to_parent(struct super_block *sb,
  11126. + struct fid *fid,
  11127. + int fh_len, int fh_type)
  11128. +{
  11129. + char * addr;
  11130. + struct dentry * d;
  11131. + reiser4_context *ctx;
  11132. + file_plugin *fplug;
  11133. +
  11134. + if (fh_type == FH_WITHOUT_PARENT)
  11135. + return NULL;
  11136. + assert("edward-1537", fh_type == FH_WITH_PARENT);
  11137. +
  11138. + ctx = reiser4_init_context(sb);
  11139. + if (IS_ERR(ctx))
  11140. + return (struct dentry *)ctx;
  11141. + addr = (char *)fid->raw;
  11142. + /* extract 2-bytes file plugin id */
  11143. + fplug = file_plugin_by_disk_id(reiser4_get_tree(sb), (d16 *)addr);
  11144. + if (fplug == NULL) {
  11145. + d = ERR_PTR(RETERR(-EINVAL));
  11146. + goto exit;
  11147. + }
  11148. + addr += sizeof(d16);
  11149. + /* skip previously encoded object */
  11150. + addr = fplug->wire.read(addr, NULL /* skip */);
  11151. + if (IS_ERR(addr)) {
  11152. + d = (struct dentry *)addr;
  11153. + goto exit;
  11154. + }
  11155. + /* @extract and decode parent object */
  11156. + d = reiser4_decode_fh(sb, addr);
  11157. + exit:
  11158. + reiser4_exit_context(ctx);
  11159. + return d;
  11160. +}
  11161. +
  11162. +/*
  11163. + * Object serialization support.
  11164. + *
  11165. + * To support knfsd file system provides export_operations that are used to
  11166. + * construct and interpret NFS file handles. As a generalization of this,
  11167. + * reiser4 object plugins have serialization support: it provides methods to
  11168. + * create on-wire representation of identity of reiser4 object, and
  11169. + * re-create/locate object given its on-wire identity.
  11170. + *
  11171. + */
  11172. +
  11173. +/*
  11174. + * return number of bytes that on-wire representation of @inode's identity
  11175. + * consumes.
  11176. + */
  11177. +static int encode_inode_size(struct inode *inode)
  11178. +{
  11179. + assert("nikita-3514", inode != NULL);
  11180. + assert("nikita-3515", inode_file_plugin(inode) != NULL);
  11181. + assert("nikita-3516", inode_file_plugin(inode)->wire.size != NULL);
  11182. +
  11183. + return inode_file_plugin(inode)->wire.size(inode) + sizeof(d16);
  11184. +}
  11185. +
  11186. +/*
  11187. + * store on-wire representation of @inode's identity at the area beginning at
  11188. + * @start.
  11189. + */
  11190. +static char *encode_inode(struct inode *inode, char *start)
  11191. +{
  11192. + assert("nikita-3517", inode != NULL);
  11193. + assert("nikita-3518", inode_file_plugin(inode) != NULL);
  11194. + assert("nikita-3519", inode_file_plugin(inode)->wire.write != NULL);
  11195. +
  11196. + /*
  11197. + * first, store two-byte identifier of object plugin, then
  11198. + */
  11199. + save_plugin_id(file_plugin_to_plugin(inode_file_plugin(inode)),
  11200. + (d16 *) start);
  11201. + start += sizeof(d16);
  11202. + /*
  11203. + * call plugin to serialize object's identity
  11204. + */
  11205. + return inode_file_plugin(inode)->wire.write(inode, start);
  11206. +}
  11207. +
  11208. +/* this returns number of 32 bit long numbers encoded in @lenp. 255 is
  11209. + * returned if file handle can not be stored */
  11210. +/**
  11211. + * reiser4_encode_fh - encode_fh of export operations
  11212. + * @dentry:
  11213. + * @fh:
  11214. + * @lenp:
  11215. + * @need_parent:
  11216. + *
  11217. + */
  11218. +static int
  11219. +reiser4_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
  11220. + struct inode *parent)
  11221. +{
  11222. + char *addr;
  11223. + int need;
  11224. + int delta;
  11225. + int result;
  11226. + bool need_parent;
  11227. + reiser4_context *ctx;
  11228. +
  11229. + /*
  11230. + * knfsd asks as to serialize @inode, and, optionally its
  11231. + * parent @parent (if it is non-NULL).
  11232. + *
  11233. + * encode_inode() and encode_inode_size() is used to build
  11234. + * representation of object and its parent. All hard work is done by
  11235. + * object plugins.
  11236. + */
  11237. + need_parent = (parent != NULL);
  11238. + addr = (char *)fh;
  11239. +
  11240. + need = encode_inode_size(inode);
  11241. + if (need < 0)
  11242. + return NFSERROR;
  11243. + if (need_parent) {
  11244. + delta = encode_inode_size(parent);
  11245. + if (delta < 0)
  11246. + return NFSERROR;
  11247. + need += delta;
  11248. + }
  11249. +
  11250. + ctx = reiser4_init_context(inode->i_sb);
  11251. + if (IS_ERR(ctx))
  11252. + return PTR_ERR(ctx);
  11253. +
  11254. + if (need <= sizeof(__u32) * (*lenp)) {
  11255. + addr = encode_inode(inode, addr);
  11256. + if (need_parent)
  11257. + addr = encode_inode(parent, addr);
  11258. +
  11259. + /* store in lenp number of 32bit words required for file
  11260. + * handle. */
  11261. + *lenp = (need + sizeof(__u32) - 1) >> 2;
  11262. + result = need_parent ? FH_WITH_PARENT : FH_WITHOUT_PARENT;
  11263. + } else
  11264. + /* no enough space in file handle */
  11265. + result = NFSERROR;
  11266. + reiser4_exit_context(ctx);
  11267. + return result;
  11268. +}
  11269. +
  11270. +/**
  11271. + * reiser4_get_dentry_parent - get_parent of export operations
  11272. + * @child:
  11273. + *
  11274. + */
  11275. +static struct dentry *reiser4_get_dentry_parent(struct dentry *child)
  11276. +{
  11277. + struct inode *dir;
  11278. + dir_plugin *dplug;
  11279. + struct dentry *result;
  11280. + reiser4_context *ctx;
  11281. +
  11282. + assert("nikita-3527", child != NULL);
  11283. +
  11284. + dir = child->d_inode;
  11285. + assert("nikita-3529", dir != NULL);
  11286. +
  11287. + ctx = reiser4_init_context(dir->i_sb);
  11288. + if (IS_ERR(ctx))
  11289. + return (void *)ctx;
  11290. +
  11291. + dplug = inode_dir_plugin(dir);
  11292. + assert("nikita-3531", ergo(dplug != NULL, dplug->get_parent != NULL));
  11293. +
  11294. + if (unlikely(dplug == NULL)) {
  11295. + reiser4_exit_context(ctx);
  11296. + return ERR_PTR(RETERR(-ENOTDIR));
  11297. + }
  11298. + result = dplug->get_parent(dir);
  11299. + reiser4_exit_context(ctx);
  11300. + return result;
  11301. +}
  11302. +
  11303. +/**
  11304. + * reiser4_get_dentry - get_dentry of export operations
  11305. + * @super:
  11306. + * @data:
  11307. + *
  11308. + *
  11309. + */
  11310. +static struct dentry *reiser4_get_dentry(struct super_block *super, void *data)
  11311. +{
  11312. + reiser4_object_on_wire *o;
  11313. +
  11314. + assert("nikita-3522", super != NULL);
  11315. + assert("nikita-3523", data != NULL);
  11316. + /*
  11317. + * this is only supposed to be called by
  11318. + *
  11319. + * reiser4_decode_fh->find_exported_dentry
  11320. + *
  11321. + * so, reiser4_context should be here already.
  11322. + */
  11323. + assert("nikita-3526", is_in_reiser4_context());
  11324. +
  11325. + o = (reiser4_object_on_wire *)data;
  11326. + assert("nikita-3524", o->plugin != NULL);
  11327. + assert("nikita-3525", o->plugin->wire.get != NULL);
  11328. +
  11329. + return o->plugin->wire.get(super, o);
  11330. +}
  11331. +
  11332. +struct export_operations reiser4_export_operations = {
  11333. + .encode_fh = reiser4_encode_fh,
  11334. + .fh_to_dentry = reiser4_fh_to_dentry,
  11335. + .fh_to_parent = reiser4_fh_to_parent,
  11336. + .get_parent = reiser4_get_dentry_parent,
  11337. +};
  11338. +
  11339. +/*
  11340. + * Local variables:
  11341. + * c-indentation-style: "K&R"
  11342. + * mode-name: "LC"
  11343. + * c-basic-offset: 8
  11344. + * tab-width: 8
  11345. + * fill-column: 79
  11346. + * End:
  11347. + */
  11348. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/flush.c linux-5.16.14/fs/reiser4/flush.c
  11349. --- linux-5.16.14.orig/fs/reiser4/flush.c 1970-01-01 01:00:00.000000000 +0100
  11350. +++ linux-5.16.14/fs/reiser4/flush.c 2022-03-12 13:26:19.650892724 +0100
  11351. @@ -0,0 +1,3522 @@
  11352. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  11353. + reiser4/README */
  11354. +
  11355. +/* The design document for this file is at http://www.namesys.com/v4/v4.html. */
  11356. +
  11357. +#include "forward.h"
  11358. +#include "debug.h"
  11359. +#include "dformat.h"
  11360. +#include "key.h"
  11361. +#include "coord.h"
  11362. +#include "plugin/item/item.h"
  11363. +#include "plugin/plugin.h"
  11364. +#include "plugin/object.h"
  11365. +#include "txnmgr.h"
  11366. +#include "jnode.h"
  11367. +#include "znode.h"
  11368. +#include "block_alloc.h"
  11369. +#include "tree_walk.h"
  11370. +#include "carry.h"
  11371. +#include "tree.h"
  11372. +#include "vfs_ops.h"
  11373. +#include "inode.h"
  11374. +#include "page_cache.h"
  11375. +#include "wander.h"
  11376. +#include "super.h"
  11377. +#include "entd.h"
  11378. +#include "reiser4.h"
  11379. +#include "flush.h"
  11380. +#include "writeout.h"
  11381. +
  11382. +#include <asm/atomic.h>
  11383. +#include <linux/fs.h> /* for struct super_block */
  11384. +#include <linux/mm.h> /* for struct page */
  11385. +#include <linux/bio.h> /* for struct bio */
  11386. +#include <linux/pagemap.h>
  11387. +#include <linux/blkdev.h>
  11388. +
  11389. +/* IMPLEMENTATION NOTES */
  11390. +
  11391. +/* PARENT-FIRST: Some terminology: A parent-first traversal is a way of
  11392. + assigning a total order to the nodes of the tree in which the parent is
  11393. + placed before its children, which are ordered (recursively) in left-to-right
  11394. + order. When we speak of a "parent-first preceder", it describes the node that
  11395. + "came before in forward parent-first order". When we speak of a "parent-first
  11396. + follower", it describes the node that "comes next in parent-first order"
  11397. + (alternatively the node that "came before in reverse parent-first order").
  11398. +
  11399. + The following pseudo-code prints the nodes of a tree in forward parent-first
  11400. + order:
  11401. +
  11402. + void parent_first (node)
  11403. + {
  11404. + print_node (node);
  11405. + if (node->level > leaf) {
  11406. + for (i = 0; i < num_children; i += 1) {
  11407. + parent_first (node->child[i]);
  11408. + }
  11409. + }
  11410. + }
  11411. +*/
  11412. +
  11413. +/* JUST WHAT ARE WE TRYING TO OPTIMIZE, HERE? The idea is to optimize block
  11414. + allocation so that a left-to-right scan of the tree's data (i.e., the leaves
  11415. + in left-to-right order) can be accomplished with sequential reads, which
  11416. + results in reading nodes in their parent-first order. This is a
  11417. + read-optimization aspect of the flush algorithm, and there is also a
  11418. + write-optimization aspect, which is that we wish to make large sequential
  11419. + writes to the disk by allocating or reallocating blocks so that they can be
  11420. + written in sequence. Sometimes the read-optimization and write-optimization
  11421. + goals conflict with each other, as we discuss in more detail below.
  11422. +*/
  11423. +
  11424. +/* STATE BITS: The flush code revolves around the state of the jnodes it covers.
  11425. + Here are the relevant jnode->state bits and their relevence to flush:
  11426. +
  11427. + JNODE_DIRTY: If a node is dirty, it must be flushed. But in order to be
  11428. + written it must be allocated first. In order to be considered allocated,
  11429. + the jnode must have exactly one of { JNODE_OVRWR, JNODE_RELOC } set. These
  11430. + two bits are exclusive, and all dirtied jnodes eventually have one of these
  11431. + bits set during each transaction.
  11432. +
  11433. + JNODE_CREATED: The node was freshly created in its transaction and has no
  11434. + previous block address, so it is unconditionally assigned to be relocated,
  11435. + although this is mainly for code-convenience. It is not being 'relocated'
  11436. + from anything, but in almost every regard it is treated as part of the
  11437. + relocate set. The JNODE_CREATED bit remains set even after JNODE_RELOC is
  11438. + set, so the actual relocate can be distinguished from the
  11439. + created-and-allocated set easily: relocate-set members (belonging to the
  11440. + preserve-set) have (JNODE_RELOC) set and created-set members which have no
  11441. + previous location to preserve have (JNODE_RELOC | JNODE_CREATED) set.
  11442. +
  11443. + JNODE_OVRWR: The node belongs to atom's overwrite set. The flush algorithm
  11444. + made the decision to maintain the pre-existing location for this node and
  11445. + it will be written to the wandered-log.
  11446. +
  11447. + JNODE_RELOC: The flush algorithm made the decision to relocate this block
  11448. + (if it was not created, see note above). A block with JNODE_RELOC set is
  11449. + eligible for early-flushing and may be submitted during flush_empty_queues.
  11450. + When the JNODE_RELOC bit is set on a znode, the parent node's internal item
  11451. + is modified and the znode is rehashed.
  11452. +
  11453. + JNODE_SQUEEZABLE: Before shifting everything left, the flush algorithm
  11454. + scans the node and calls plugin->f.squeeze() method for its items. By this
  11455. + technology we update disk clusters of cryptcompress objects. Also if
  11456. + leftmost point that was found by flush scan has this flag (races with
  11457. + write(), rare case) the flush algorythm makes the decision to pass it to
  11458. + squalloc() in spite of its flushprepped status for squeezing, not for
  11459. + repeated allocation.
  11460. +
  11461. + JNODE_FLUSH_QUEUED: This bit is set when a call to flush enters the jnode
  11462. + into its flush queue. This means the jnode is not on any clean or dirty
  11463. + list, instead it is moved to one of the flush queue (see flush_queue.h)
  11464. + object private list. This prevents multiple concurrent flushes from
  11465. + attempting to start flushing from the same node.
  11466. +
  11467. + (DEAD STATE BIT) JNODE_FLUSH_BUSY: This bit was set during the bottom-up
  11468. + squeeze-and-allocate on a node while its children are actively being
  11469. + squeezed and allocated. This flag was created to avoid submitting a write
  11470. + request for a node while its children are still being allocated and
  11471. + squeezed. Then flush queue was re-implemented to allow unlimited number of
  11472. + nodes be queued. This flag support was commented out in source code because
  11473. + we decided that there was no reason to submit queued nodes before
  11474. + jnode_flush() finishes. However, current code calls fq_write() during a
  11475. + slum traversal and may submit "busy nodes" to disk. Probably we can
  11476. + re-enable the JNODE_FLUSH_BUSY bit support in future.
  11477. +
  11478. + With these state bits, we describe a test used frequently in the code below,
  11479. + jnode_is_flushprepped()(and the spin-lock-taking jnode_check_flushprepped()).
  11480. + The test for "flushprepped" returns true if any of the following are true:
  11481. +
  11482. + - The node is not dirty
  11483. + - The node has JNODE_RELOC set
  11484. + - The node has JNODE_OVRWR set
  11485. +
  11486. + If either the node is not dirty or it has already been processed by flush
  11487. + (and assigned JNODE_OVRWR or JNODE_RELOC), then it is prepped. If
  11488. + jnode_is_flushprepped() returns true then flush has work to do on that node.
  11489. +*/
  11490. +
  11491. +/* FLUSH_PREP_ONCE_PER_TRANSACTION: Within a single transaction a node is never
  11492. + flushprepped twice (unless an explicit call to flush_unprep is made as
  11493. + described in detail below). For example a node is dirtied, allocated, and
  11494. + then early-flushed to disk and set clean. Before the transaction commits, the
  11495. + page is dirtied again and, due to memory pressure, the node is flushed again.
  11496. + The flush algorithm will not relocate the node to a new disk location, it
  11497. + will simply write it to the same, previously relocated position again.
  11498. +*/
  11499. +
  11500. +/* THE BOTTOM-UP VS. TOP-DOWN ISSUE: This code implements a bottom-up algorithm
  11501. + where we start at a leaf node and allocate in parent-first order by iterating
  11502. + to the right. At each step of the iteration, we check for the right neighbor.
  11503. + Before advancing to the right neighbor, we check if the current position and
  11504. + the right neighbor share the same parent. If they do not share the same
  11505. + parent, the parent is allocated before the right neighbor.
  11506. +
  11507. + This process goes recursively up the tree and squeeze nodes level by level as
  11508. + long as the right neighbor and the current position have different parents,
  11509. + then it allocates the right-neighbors-with-different-parents on the way back
  11510. + down. This process is described in more detail in
  11511. + flush_squalloc_changed_ancestor and the recursive function
  11512. + squalloc_one_changed_ancestor. But the purpose here is not to discuss the
  11513. + specifics of the bottom-up approach as it is to contrast the bottom-up and
  11514. + top-down approaches.
  11515. +
  11516. + The top-down algorithm was implemented earlier (April-May 2002). In the
  11517. + top-down approach, we find a starting point by scanning left along each level
  11518. + past dirty nodes, then going up and repeating the process until the left node
  11519. + and the parent node are clean. We then perform a parent-first traversal from
  11520. + the starting point, which makes allocating in parent-first order trivial.
  11521. + After one subtree has been allocated in this manner, we move to the right,
  11522. + try moving upward, then repeat the parent-first traversal.
  11523. +
  11524. + Both approaches have problems that need to be addressed. Both are
  11525. + approximately the same amount of code, but the bottom-up approach has
  11526. + advantages in the order it acquires locks which, at the very least, make it
  11527. + the better approach. At first glance each one makes the other one look
  11528. + simpler, so it is important to remember a few of the problems with each one.
  11529. +
  11530. + Main problem with the top-down approach: When you encounter a clean child
  11531. + during the parent-first traversal, what do you do? You would like to avoid
  11532. + searching through a large tree of nodes just to find a few dirty leaves at
  11533. + the bottom, and there is not an obvious solution. One of the advantages of
  11534. + the top-down approach is that during the parent-first traversal you check
  11535. + every child of a parent to see if it is dirty. In this way, the top-down
  11536. + approach easily handles the main problem of the bottom-up approach:
  11537. + unallocated children.
  11538. +
  11539. + The unallocated children problem is that before writing a node to disk we
  11540. + must make sure that all of its children are allocated. Otherwise, the writing
  11541. + the node means extra I/O because the node will have to be written again when
  11542. + the child is finally allocated.
  11543. +
  11544. + WE HAVE NOT YET ELIMINATED THE UNALLOCATED CHILDREN PROBLEM. Except for bugs,
  11545. + this should not cause any file system corruption, it only degrades I/O
  11546. + performance because a node may be written when it is sure to be written at
  11547. + least one more time in the same transaction when the remaining children are
  11548. + allocated. What follows is a description of how we will solve the problem.
  11549. +*/
  11550. +
  11551. +/* HANDLING UNALLOCATED CHILDREN: During flush we may allocate a parent node,
  11552. + then proceeding in parent first order, allocate some of its left-children,
  11553. + then encounter a clean child in the middle of the parent. We do not allocate
  11554. + the clean child, but there may remain unallocated (dirty) children to the
  11555. + right of the clean child. If we were to stop flushing at this moment and
  11556. + write everything to disk, the parent might still contain unallocated
  11557. + children.
  11558. +
  11559. + We could try to allocate all the descendents of every node that we allocate,
  11560. + but this is not necessary. Doing so could result in allocating the entire
  11561. + tree: if the root node is allocated then every unallocated node would have to
  11562. + be allocated before flushing. Actually, we do not have to write a node just
  11563. + because we allocate it. It is possible to allocate but not write a node
  11564. + during flush, when it still has unallocated children. However, this approach
  11565. + is probably not optimal for the following reason.
  11566. +
  11567. + The flush algorithm is designed to allocate nodes in parent-first order in an
  11568. + attempt to optimize reads that occur in the same order. Thus we are
  11569. + read-optimizing for a left-to-right scan through all the leaves in the
  11570. + system, and we are hoping to write-optimize at the same time because those
  11571. + nodes will be written together in batch. What happens, however, if we assign
  11572. + a block number to a node in its read-optimized order but then avoid writing
  11573. + it because it has unallocated children? In that situation, we lose out on the
  11574. + write-optimization aspect because a node will have to be written again to the
  11575. + its location on the device, later, which likely means seeking back to that
  11576. + location.
  11577. +
  11578. + So there are tradeoffs. We can choose either:
  11579. +
  11580. + A. Allocate all unallocated children to preserve both write-optimization and
  11581. + read-optimization, but this is not always desirable because it may mean
  11582. + having to allocate and flush very many nodes at once.
  11583. +
  11584. + B. Defer writing nodes with unallocated children, keep their read-optimized
  11585. + locations, but sacrifice write-optimization because those nodes will be
  11586. + written again.
  11587. +
  11588. + C. Defer writing nodes with unallocated children, but do not keep their
  11589. + read-optimized locations. Instead, choose to write-optimize them later, when
  11590. + they are written. To facilitate this, we "undo" the read-optimized allocation
  11591. + that was given to the node so that later it can be write-optimized, thus
  11592. + "unpreparing" the flush decision. This is a case where we disturb the
  11593. + FLUSH_PREP_ONCE_PER_TRANSACTION rule described above. By a call to
  11594. + flush_unprep() we will: if the node was wandered, unset the JNODE_OVRWR bit;
  11595. + if the node was relocated, unset the JNODE_RELOC bit, non-deferred-deallocate
  11596. + its block location, and set the JNODE_CREATED bit, effectively setting the
  11597. + node back to an unallocated state.
  11598. +
  11599. + We will take the following approach in v4.0: for twig nodes we will always
  11600. + finish allocating unallocated children (A). For nodes with (level > TWIG)
  11601. + we will defer writing and choose write-optimization (C).
  11602. +
  11603. + To summarize, there are several parts to a solution that avoids the problem
  11604. + with unallocated children:
  11605. +
  11606. + FIXME-ZAM: Still no one approach is implemented to eliminate the
  11607. + "UNALLOCATED CHILDREN" problem because there was an experiment which was done
  11608. + showed that we have 1-2 nodes with unallocated children for thousands of
  11609. + written nodes. The experiment was simple like coping/deletion of linux kernel
  11610. + sources. However the problem can arise in more complex tests. I think we have
  11611. + jnode_io_hook to insert a check for unallocated children and see what kind of
  11612. + problem we have.
  11613. +
  11614. + 1. When flush reaches a stopping point (e.g. a clean node) it should continue
  11615. + calling squeeze-and-allocate on any remaining unallocated children.
  11616. + FIXME: Difficulty to implement: should be simple -- amounts to adding a while
  11617. + loop to jnode_flush, see comments in that function.
  11618. +
  11619. + 2. When flush reaches flush_empty_queue(), some of the (level > TWIG) nodes
  11620. + may still have unallocated children. If the twig level has unallocated
  11621. + children it is an assertion failure. If a higher-level node has unallocated
  11622. + children, then it should be explicitly de-allocated by a call to
  11623. + flush_unprep().
  11624. + FIXME: Difficulty to implement: should be simple.
  11625. +
  11626. + 3. (CPU-Optimization) Checking whether a node has unallocated children may
  11627. + consume more CPU cycles than we would like, and it is possible (but medium
  11628. + complexity) to optimize this somewhat in the case where large sub-trees are
  11629. + flushed. The following observation helps: if both the left- and
  11630. + right-neighbor of a node are processed by the flush algorithm then the node
  11631. + itself is guaranteed to have all of its children allocated. However, the cost
  11632. + of this check may not be so expensive after all: it is not needed for leaves
  11633. + and flush can guarantee this property for twigs. That leaves only (level >
  11634. + TWIG) nodes that have to be checked, so this optimization only helps if at
  11635. + least three (level > TWIG) nodes are flushed in one pass, and the savings
  11636. + will be very small unless there are many more (level > TWIG) nodes. But if
  11637. + there are many (level > TWIG) nodes then the number of blocks being written
  11638. + will be very large, so the savings may be insignificant. That said, the idea
  11639. + is to maintain both the left and right edges of nodes that are processed in
  11640. + flush. When flush_empty_queue() is called, a relatively simple test will
  11641. + tell whether the (level > TWIG) node is on the edge. If it is on the edge,
  11642. + the slow check is necessary, but if it is in the interior then it can be
  11643. + assumed to have all of its children allocated. FIXME: medium complexity to
  11644. + implement, but simple to verify given that we must have a slow check anyway.
  11645. +
  11646. + 4. (Optional) This part is optional, not for v4.0--flush should work
  11647. + independently of whether this option is used or not. Called RAPID_SCAN, the
  11648. + idea is to amend the left-scan operation to take unallocated children into
  11649. + account. Normally, the left-scan operation goes left as long as adjacent
  11650. + nodes are dirty up until some large maximum value (FLUSH_SCAN_MAXNODES) at
  11651. + which point it stops and begins flushing. But scan-left may stop at a
  11652. + position where there are unallocated children to the left with the same
  11653. + parent. When RAPID_SCAN is enabled, the ordinary scan-left operation stops
  11654. + after FLUSH_RELOCATE_THRESHOLD, which is much smaller than
  11655. + FLUSH_SCAN_MAXNODES, then procedes with a rapid scan. The rapid scan skips
  11656. + all the interior children of a node--if the leftmost child of a twig is
  11657. + dirty, check its left neighbor (the rightmost child of the twig to the left).
  11658. + If the left neighbor of the leftmost child is also dirty, then continue the
  11659. + scan at the left twig and repeat. This option will cause flush to allocate
  11660. + more twigs in a single pass, but it also has the potential to write many more
  11661. + nodes than would otherwise be written without the RAPID_SCAN option.
  11662. + RAPID_SCAN was partially implemented, code removed August 12, 2002 by JMACD.
  11663. +*/
  11664. +
  11665. +/* FLUSH CALLED ON NON-LEAF LEVEL. Most of our design considerations assume that
  11666. + the starting point for flush is a leaf node, but actually the flush code
  11667. + cares very little about whether or not this is true. It is possible that all
  11668. + the leaf nodes are flushed and dirty parent nodes still remain, in which case
  11669. + jnode_flush() is called on a non-leaf argument. Flush doesn't care--it treats
  11670. + the argument node as if it were a leaf, even when it is not. This is a simple
  11671. + approach, and there may be a more optimal policy but until a problem with
  11672. + this approach is discovered, simplest is probably best.
  11673. +
  11674. + NOTE: In this case, the ordering produced by flush is parent-first only if
  11675. + you ignore the leaves. This is done as a matter of simplicity and there is
  11676. + only one (shaky) justification. When an atom commits, it flushes all leaf
  11677. + level nodes first, followed by twigs, and so on. With flushing done in this
  11678. + order, if flush is eventually called on a non-leaf node it means that
  11679. + (somehow) we reached a point where all leaves are clean and only internal
  11680. + nodes need to be flushed. If that it the case, then it means there were no
  11681. + leaves that were the parent-first preceder/follower of the parent. This is
  11682. + expected to be a rare case, which is why we do nothing special about it.
  11683. + However, memory pressure may pass an internal node to flush when there are
  11684. + still dirty leaf nodes that need to be flushed, which could prove our
  11685. + original assumptions "inoperative". If this needs to be fixed, then
  11686. + scan_left/right should have special checks for the non-leaf levels. For
  11687. + example, instead of passing from a node to the left neighbor, it should pass
  11688. + from the node to the left neighbor's rightmost descendent (if dirty).
  11689. +
  11690. +*/
  11691. +
  11692. +/* UNIMPLEMENTED AS YET: REPACKING AND RESIZING. We walk the tree in 4MB-16MB
  11693. + chunks, dirtying everything and putting it into a transaction. We tell the
  11694. + allocator to allocate the blocks as far as possible towards one end of the
  11695. + logical device--the left (starting) end of the device if we are walking from
  11696. + left to right, the right end of the device if we are walking from right to
  11697. + left. We then make passes in alternating directions, and as we do this the
  11698. + device becomes sorted such that tree order and block number order fully
  11699. + correlate.
  11700. +
  11701. + Resizing is done by shifting everything either all the way to the left or all
  11702. + the way to the right, and then reporting the last block.
  11703. +*/
  11704. +
  11705. +/* RELOCATE DECISIONS: The code makes a decision to relocate in several places.
  11706. + This descibes the policy from the highest level:
  11707. +
  11708. + The FLUSH_RELOCATE_THRESHOLD parameter: If we count this many consecutive
  11709. + nodes on the leaf level during flush-scan (right, left), then we
  11710. + unconditionally decide to relocate leaf nodes.
  11711. +
  11712. + Otherwise, there are two contexts in which we make a decision to relocate:
  11713. +
  11714. + 1. The REVERSE PARENT-FIRST context: Implemented in reverse_allocate
  11715. + During the initial stages of flush, after scan-right completes, we want to
  11716. + ask the question: should we relocate this leaf node and thus dirty the parent
  11717. + node. Then if the node is a leftmost child its parent is its own parent-first
  11718. + preceder, thus we repeat the question at the next level up, and so on. In
  11719. + these cases we are moving in the reverse-parent first direction.
  11720. +
  11721. + There is another case which is considered the reverse direction, which comes
  11722. + at the end of a twig in reverse_relocate_end_of_twig(). As we finish
  11723. + processing a twig we may reach a point where there is a clean twig to the
  11724. + right with a dirty leftmost child. In this case, we may wish to relocate the
  11725. + child by testing if it should be relocated relative to its parent.
  11726. +
  11727. + 2. The FORWARD PARENT-FIRST context: Testing for forward relocation is done
  11728. + in allocate_znode. What distinguishes the forward parent-first case from the
  11729. + reverse-parent first case is that the preceder has already been allocated in
  11730. + the forward case, whereas in the reverse case we don't know what the preceder
  11731. + is until we finish "going in reverse". That simplifies the forward case
  11732. + considerably, and there we actually use the block allocator to determine
  11733. + whether, e.g., a block closer to the preceder is available.
  11734. +*/
  11735. +
  11736. +/* SQUEEZE_LEFT_EDGE: Unimplemented idea for future consideration. The idea is,
  11737. + once we finish scan-left and find a starting point, if the parent's left
  11738. + neighbor is dirty then squeeze the parent's left neighbor and the parent.
  11739. + This may change the flush-starting-node's parent. Repeat until the child's
  11740. + parent is stable. If the child is a leftmost child, repeat this left-edge
  11741. + squeezing operation at the next level up. Note that we cannot allocate
  11742. + extents during this or they will be out of parent-first order. There is also
  11743. + some difficult coordinate maintenence issues. We can't do a tree search to
  11744. + find coordinates again (because we hold locks), we have to determine them
  11745. + from the two nodes being squeezed. Looks difficult, but has potential to
  11746. + increase space utilization. */
  11747. +
  11748. +/* Flush-scan helper functions. */
  11749. +static void scan_init(flush_scan * scan);
  11750. +static void scan_done(flush_scan * scan);
  11751. +
  11752. +/* Flush-scan algorithm. */
  11753. +static int scan_left(flush_scan * scan, flush_scan * right, jnode * node,
  11754. + unsigned limit);
  11755. +static int scan_right(flush_scan * scan, jnode * node, unsigned limit);
  11756. +static int scan_common(flush_scan * scan, flush_scan * other);
  11757. +static int scan_formatted(flush_scan * scan);
  11758. +static int scan_unformatted(flush_scan * scan, flush_scan * other);
  11759. +static int scan_by_coord(flush_scan * scan);
  11760. +
  11761. +/* Initial flush-point ancestor allocation. */
  11762. +static int alloc_pos_and_ancestors(flush_pos_t *pos);
  11763. +static int alloc_one_ancestor(const coord_t *coord, flush_pos_t *pos);
  11764. +static int set_preceder(const coord_t *coord_in, flush_pos_t *pos);
  11765. +
  11766. +/* Main flush algorithm.
  11767. + Note on abbreviation: "squeeze and allocate" == "squalloc". */
  11768. +static int squalloc(flush_pos_t *pos);
  11769. +
  11770. +/* Flush squeeze implementation. */
  11771. +static int squeeze_right_non_twig(znode * left, znode * right);
  11772. +static int shift_one_internal_unit(znode * left, znode * right);
  11773. +
  11774. +/* Flush reverse parent-first relocation routines. */
  11775. +static int reverse_allocate_parent(jnode * node,
  11776. + const coord_t *parent_coord,
  11777. + flush_pos_t *pos);
  11778. +
  11779. +/* Flush allocate write-queueing functions: */
  11780. +static int allocate_znode(znode * node, const coord_t *parent_coord,
  11781. + flush_pos_t *pos);
  11782. +static int lock_parent_and_allocate_znode(znode *, flush_pos_t *);
  11783. +
  11784. +/* Flush helper functions: */
  11785. +static int jnode_lock_parent_coord(jnode * node,
  11786. + coord_t *coord,
  11787. + lock_handle * parent_lh,
  11788. + load_count * parent_zh,
  11789. + znode_lock_mode mode, int try);
  11790. +static int neighbor_in_slum(znode * node, lock_handle * right_lock, sideof side,
  11791. + znode_lock_mode mode, int check_dirty, int expected);
  11792. +static int znode_same_parents(znode * a, znode * b);
  11793. +
  11794. +static int znode_check_flushprepped(znode * node)
  11795. +{
  11796. + return jnode_check_flushprepped(ZJNODE(node));
  11797. +}
  11798. +static void update_znode_dkeys(znode * left, znode * right);
  11799. +
  11800. +/* Flush position functions */
  11801. +static void pos_init(flush_pos_t *pos);
  11802. +static int pos_valid(flush_pos_t *pos);
  11803. +static void pos_done(flush_pos_t *pos);
  11804. +static int pos_stop(flush_pos_t *pos);
  11805. +
  11806. +/* check that @org is first jnode extent unit, if extent is unallocated,
  11807. + * because all jnodes of unallocated extent are dirty and of the same atom. */
  11808. +#define checkchild(scan) \
  11809. +assert("nikita-3435", \
  11810. + ergo(scan->direction == LEFT_SIDE && \
  11811. + (scan->parent_coord.node->level == TWIG_LEVEL) && \
  11812. + jnode_is_unformatted(scan->node) && \
  11813. + extent_is_unallocated(&scan->parent_coord), \
  11814. + extent_unit_index(&scan->parent_coord) == index_jnode(scan->node)))
  11815. +
  11816. +/* This flush_cnt variable is used to track the number of concurrent flush
  11817. + operations, useful for debugging. It is initialized in txnmgr.c out of
  11818. + laziness (because flush has no static initializer function...) */
  11819. +ON_DEBUG(atomic_t flush_cnt;)
  11820. +
  11821. +/* check fs backing device for write congestion */
  11822. +static int check_write_congestion(void)
  11823. +{
  11824. + struct super_block *sb;
  11825. + struct backing_dev_info *bdi;
  11826. +
  11827. + sb = reiser4_get_current_sb();
  11828. + bdi = inode_to_bdi(reiser4_get_super_fake(sb));
  11829. + return bdi_write_congested(bdi);
  11830. +}
  11831. +
  11832. +/* conditionally write flush queue */
  11833. +static int write_prepped_nodes(flush_pos_t *pos)
  11834. +{
  11835. + int ret;
  11836. +
  11837. + assert("zam-831", pos);
  11838. + assert("zam-832", pos->fq);
  11839. +
  11840. + if (!(pos->flags & JNODE_FLUSH_WRITE_BLOCKS))
  11841. + return 0;
  11842. +
  11843. + if (check_write_congestion())
  11844. + return 0;
  11845. +
  11846. + ret = reiser4_write_fq(pos->fq, pos->nr_written,
  11847. + WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM);
  11848. + return ret;
  11849. +}
  11850. +
  11851. +/* Proper release all flush pos. resources then move flush position to new
  11852. + locked node */
  11853. +static void move_flush_pos(flush_pos_t *pos, lock_handle * new_lock,
  11854. + load_count * new_load, const coord_t *new_coord)
  11855. +{
  11856. + assert("zam-857", new_lock->node == new_load->node);
  11857. +
  11858. + if (new_coord) {
  11859. + assert("zam-858", new_coord->node == new_lock->node);
  11860. + coord_dup(&pos->coord, new_coord);
  11861. + } else {
  11862. + coord_init_first_unit(&pos->coord, new_lock->node);
  11863. + }
  11864. +
  11865. + if (pos->child) {
  11866. + jput(pos->child);
  11867. + pos->child = NULL;
  11868. + }
  11869. +
  11870. + move_load_count(&pos->load, new_load);
  11871. + done_lh(&pos->lock);
  11872. + move_lh(&pos->lock, new_lock);
  11873. +}
  11874. +
  11875. +/* delete empty node which link from the parent still exists. */
  11876. +static int delete_empty_node(znode * node)
  11877. +{
  11878. + reiser4_key smallest_removed;
  11879. +
  11880. + assert("zam-1019", node != NULL);
  11881. + assert("zam-1020", node_is_empty(node));
  11882. + assert("zam-1023", znode_is_wlocked(node));
  11883. +
  11884. + return reiser4_delete_node(node, &smallest_removed, NULL, 1);
  11885. +}
  11886. +
  11887. +/* Prepare flush position for alloc_pos_and_ancestors() and squalloc() */
  11888. +static int prepare_flush_pos(flush_pos_t *pos, jnode * org)
  11889. +{
  11890. + int ret;
  11891. + load_count load;
  11892. + lock_handle lock;
  11893. +
  11894. + init_lh(&lock);
  11895. + init_load_count(&load);
  11896. +
  11897. + if (jnode_is_znode(org)) {
  11898. + ret = longterm_lock_znode(&lock, JZNODE(org),
  11899. + ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI);
  11900. + if (ret)
  11901. + return ret;
  11902. +
  11903. + ret = incr_load_count_znode(&load, JZNODE(org));
  11904. + if (ret)
  11905. + return ret;
  11906. +
  11907. + pos->state =
  11908. + (jnode_get_level(org) ==
  11909. + LEAF_LEVEL) ? POS_ON_LEAF : POS_ON_INTERNAL;
  11910. + move_flush_pos(pos, &lock, &load, NULL);
  11911. + } else {
  11912. + coord_t parent_coord;
  11913. + ret = jnode_lock_parent_coord(org, &parent_coord, &lock,
  11914. + &load, ZNODE_WRITE_LOCK, 0);
  11915. + if (ret)
  11916. + goto done;
  11917. + if (!item_is_extent(&parent_coord)) {
  11918. + /* file was converted to tail, org became HB, we found
  11919. + internal item */
  11920. + ret = -EAGAIN;
  11921. + goto done;
  11922. + }
  11923. +
  11924. + pos->state = POS_ON_EPOINT;
  11925. + move_flush_pos(pos, &lock, &load, &parent_coord);
  11926. + pos->child = jref(org);
  11927. + if (extent_is_unallocated(&parent_coord)
  11928. + && extent_unit_index(&parent_coord) != index_jnode(org)) {
  11929. + /* @org is not first child of its parent unit. This may
  11930. + happen because longerm lock of its parent node was
  11931. + released between scan_left and scan_right. For now
  11932. + work around this having flush to repeat */
  11933. + ret = -EAGAIN;
  11934. + }
  11935. + }
  11936. +
  11937. +done:
  11938. + done_load_count(&load);
  11939. + done_lh(&lock);
  11940. + return ret;
  11941. +}
  11942. +
  11943. +static txmod_plugin *get_txmod_plugin(void)
  11944. +{
  11945. + struct super_block *sb = reiser4_get_current_sb();
  11946. + return txmod_plugin_by_id(get_super_private(sb)->txmod);
  11947. +}
  11948. +
  11949. +/* TODO LIST (no particular order): */
  11950. +/* I have labelled most of the legitimate FIXME comments in this file with
  11951. + letters to indicate which issue they relate to. There are a few miscellaneous
  11952. + FIXMEs with specific names mentioned instead that need to be
  11953. + inspected/resolved. */
  11954. +/* B. There is an issue described in reverse_allocate having to do with an
  11955. + imprecise is_preceder? check having to do with partially-dirty extents. The
  11956. + code that sets preceder hints and computes the preceder is basically
  11957. + untested. Careful testing needs to be done that preceder calculations are
  11958. + done correctly, since if it doesn't affect correctness we will not catch this
  11959. + stuff during regular testing. */
  11960. +/* C. EINVAL, E_DEADLOCK, E_NO_NEIGHBOR, ENOENT handling. It is unclear which of
  11961. + these are considered expected but unlikely conditions. Flush currently
  11962. + returns 0 (i.e., success but no progress, i.e., restart) whenever it receives
  11963. + any of these in jnode_flush(). Many of the calls that may produce one of
  11964. + these return values (i.e., longterm_lock_znode, reiser4_get_parent,
  11965. + reiser4_get_neighbor, ...) check some of these values themselves and, for
  11966. + instance, stop flushing instead of resulting in a restart. If any of these
  11967. + results are true error conditions then flush will go into a busy-loop, as we
  11968. + noticed during testing when a corrupt tree caused find_child_ptr to return
  11969. + ENOENT. It needs careful thought and testing of corner conditions.
  11970. +*/
  11971. +/* D. Atomicity of flush_prep against deletion and flush concurrency. Suppose a
  11972. + created block is assigned a block number then early-flushed to disk. It is
  11973. + dirtied again and flush is called again. Concurrently, that block is deleted,
  11974. + and the de-allocation of its block number does not need to be deferred, since
  11975. + it is not part of the preserve set (i.e., it didn't exist before the
  11976. + transaction). I think there may be a race condition where flush writes the
  11977. + dirty, created block after the non-deferred deallocated block number is
  11978. + re-allocated, making it possible to write deleted data on top of non-deleted
  11979. + data. Its just a theory, but it needs to be thought out. */
  11980. +/* F. bio_alloc() failure is not handled gracefully. */
  11981. +/* G. Unallocated children. */
  11982. +/* H. Add a WANDERED_LIST to the atom to clarify the placement of wandered
  11983. + blocks. */
  11984. +/* I. Rename flush-scan to scan-point, (flush-pos to flush-point?) */
  11985. +
  11986. +/* JNODE_FLUSH: MAIN ENTRY POINT */
  11987. +/* This is the main entry point for flushing a jnode and its dirty neighborhood
  11988. + (dirty neighborhood is named "slum"). Jnode_flush() is called if reiser4 has
  11989. + to write dirty blocks to disk, it happens when Linux VM decides to reduce
  11990. + number of dirty pages or as a part of transaction commit.
  11991. +
  11992. + Our objective here is to prep and flush the slum the jnode belongs to. We
  11993. + want to squish the slum together, and allocate the nodes in it as we squish
  11994. + because allocation of children affects squishing of parents.
  11995. +
  11996. + The "argument" @node tells flush where to start. From there, flush finds the
  11997. + left edge of the slum, and calls squalloc (in which nodes are squeezed and
  11998. + allocated). To find a "better place" to start squalloc first we perform a
  11999. + flush_scan.
  12000. +
  12001. + Flush-scanning may be performed in both left and right directions, but for
  12002. + different purposes. When scanning to the left, we are searching for a node
  12003. + that precedes a sequence of parent-first-ordered nodes which we will then
  12004. + flush in parent-first order. During flush-scanning, we also take the
  12005. + opportunity to count the number of consecutive leaf nodes. If this number is
  12006. + past some threshold (FLUSH_RELOCATE_THRESHOLD), then we make a decision to
  12007. + reallocate leaf nodes (thus favoring write-optimization).
  12008. +
  12009. + Since the flush argument node can be anywhere in a sequence of dirty leaves,
  12010. + there may also be dirty nodes to the right of the argument. If the scan-left
  12011. + operation does not count at least FLUSH_RELOCATE_THRESHOLD nodes then we
  12012. + follow it with a right-scan operation to see whether there is, in fact,
  12013. + enough nodes to meet the relocate threshold. Each right- and left-scan
  12014. + operation uses a single flush_scan object.
  12015. +
  12016. + After left-scan and possibly right-scan, we prepare a flush_position object
  12017. + with the starting flush point or parent coordinate, which was determined
  12018. + using scan-left.
  12019. +
  12020. + Next we call the main flush routine, squalloc, which iterates along the leaf
  12021. + level, squeezing and allocating nodes (and placing them into the flush
  12022. + queue).
  12023. +
  12024. + After squalloc returns we take extra steps to ensure that all the children
  12025. + of the final twig node are allocated--this involves repeating squalloc
  12026. + until we finish at a twig with no unallocated children.
  12027. +
  12028. + Finally, we call flush_empty_queue to submit write-requests to disk. If we
  12029. + encounter any above-twig nodes during flush_empty_queue that still have
  12030. + unallocated children, we flush_unprep them.
  12031. +
  12032. + Flush treats several "failure" cases as non-failures, essentially causing
  12033. + them to start over. E_DEADLOCK is one example.
  12034. + FIXME:(C) EINVAL, E_NO_NEIGHBOR, ENOENT: these should probably be handled
  12035. + properly rather than restarting, but there are a bunch of cases to audit.
  12036. +*/
  12037. +
  12038. +static int
  12039. +jnode_flush(jnode * node, long nr_to_write, long *nr_written,
  12040. + flush_queue_t *fq, int flags)
  12041. +{
  12042. + long ret = 0;
  12043. + flush_scan *right_scan;
  12044. + flush_scan *left_scan;
  12045. + flush_pos_t *flush_pos;
  12046. + int todo;
  12047. + struct super_block *sb;
  12048. + reiser4_super_info_data *sbinfo;
  12049. + jnode *leftmost_in_slum = NULL;
  12050. +
  12051. + assert("jmacd-76619", lock_stack_isclean(get_current_lock_stack()));
  12052. + assert("nikita-3022", reiser4_schedulable());
  12053. +
  12054. + assert("nikita-3185",
  12055. + get_current_super_private()->delete_mutex_owner != current);
  12056. +
  12057. + /* allocate right_scan, left_scan and flush_pos */
  12058. + right_scan =
  12059. + kmalloc(2 * sizeof(*right_scan) + sizeof(*flush_pos),
  12060. + reiser4_ctx_gfp_mask_get());
  12061. + if (right_scan == NULL)
  12062. + return RETERR(-ENOMEM);
  12063. + left_scan = right_scan + 1;
  12064. + flush_pos = (flush_pos_t *) (left_scan + 1);
  12065. +
  12066. + sb = reiser4_get_current_sb();
  12067. + sbinfo = get_super_private(sb);
  12068. +
  12069. + /* Flush-concurrency debug code */
  12070. +#if REISER4_DEBUG
  12071. + atomic_inc(&flush_cnt);
  12072. +#endif
  12073. +
  12074. + reiser4_enter_flush(sb);
  12075. +
  12076. + /* Initialize a flush position. */
  12077. + pos_init(flush_pos);
  12078. +
  12079. + flush_pos->nr_written = nr_written;
  12080. + flush_pos->fq = fq;
  12081. + flush_pos->flags = flags;
  12082. + flush_pos->nr_to_write = nr_to_write;
  12083. +
  12084. + scan_init(right_scan);
  12085. + scan_init(left_scan);
  12086. +
  12087. + /* First scan left and remember the leftmost scan position. If the
  12088. + leftmost position is unformatted we remember its parent_coord. We
  12089. + scan until counting FLUSH_SCAN_MAXNODES.
  12090. +
  12091. + If starting @node is unformatted, at the beginning of left scan its
  12092. + parent (twig level node, containing extent item) will be long term
  12093. + locked and lock handle will be stored in the
  12094. + @right_scan->parent_lock. This lock is used to start the rightward
  12095. + scan without redoing the tree traversal (necessary to find parent)
  12096. + and, hence, is kept during leftward scan. As a result, we have to
  12097. + use try-lock when taking long term locks during the leftward scan.
  12098. + */
  12099. + ret = scan_left(left_scan, right_scan,
  12100. + node, sbinfo->flush.scan_maxnodes);
  12101. + if (ret != 0)
  12102. + goto failed;
  12103. +
  12104. + leftmost_in_slum = jref(left_scan->node);
  12105. + scan_done(left_scan);
  12106. +
  12107. + /* Then possibly go right to decide if we will use a policy of
  12108. + relocating leaves. This is only done if we did not scan past (and
  12109. + count) enough nodes during the leftward scan. If we do scan right,
  12110. + we only care to go far enough to establish that at least
  12111. + FLUSH_RELOCATE_THRESHOLD number of nodes are being flushed. The scan
  12112. + limit is the difference between left_scan.count and the threshold. */
  12113. +
  12114. + todo = sbinfo->flush.relocate_threshold - left_scan->count;
  12115. + /* scan right is inherently deadlock prone, because we are
  12116. + * (potentially) holding a lock on the twig node at this moment.
  12117. + * FIXME: this is incorrect comment: lock is not held */
  12118. + if (todo > 0) {
  12119. + ret = scan_right(right_scan, node, (unsigned)todo);
  12120. + if (ret != 0)
  12121. + goto failed;
  12122. + }
  12123. +
  12124. + /* Only the right-scan count is needed, release any rightward locks
  12125. + right away. */
  12126. + scan_done(right_scan);
  12127. +
  12128. + /* ... and the answer is: we should relocate leaf nodes if at least
  12129. + FLUSH_RELOCATE_THRESHOLD nodes were found. */
  12130. + flush_pos->leaf_relocate = JF_ISSET(node, JNODE_REPACK) ||
  12131. + (left_scan->count + right_scan->count >=
  12132. + sbinfo->flush.relocate_threshold);
  12133. +
  12134. + /* Funny business here. We set the 'point' in the flush_position at
  12135. + prior to starting squalloc regardless of whether the first point is
  12136. + formatted or unformatted. Without this there would be an invariant,
  12137. + in the rest of the code, that if the flush_position is unformatted
  12138. + then flush_position->point is NULL and
  12139. + flush_position->parent_{lock,coord} is set, and if the flush_position
  12140. + is formatted then flush_position->point is non-NULL and no parent
  12141. + info is set.
  12142. +
  12143. + This seems lazy, but it makes the initial calls to
  12144. + reverse_allocate (which ask "is it the pos->point the leftmost
  12145. + child of its parent") much easier because we know the first child
  12146. + already. Nothing is broken by this, but the reasoning is subtle.
  12147. + Holding an extra reference on a jnode during flush can cause us to
  12148. + see nodes with HEARD_BANSHEE during squalloc, because nodes are not
  12149. + removed from sibling lists until they have zero reference count.
  12150. + Flush would never observe a HEARD_BANSHEE node on the left-edge of
  12151. + flush, nodes are only deleted to the right. So if nothing is broken,
  12152. + why fix it?
  12153. +
  12154. + NOTE-NIKITA actually, flush can meet HEARD_BANSHEE node at any
  12155. + point and in any moment, because of the concurrent file system
  12156. + activity (for example, truncate). */
  12157. +
  12158. + /* Check jnode state after flush_scan completed. Having a lock on this
  12159. + node or its parent (in case of unformatted) helps us in case of
  12160. + concurrent flushing. */
  12161. + if (jnode_check_flushprepped(leftmost_in_slum)
  12162. + && !jnode_convertible(leftmost_in_slum)) {
  12163. + ret = 0;
  12164. + goto failed;
  12165. + }
  12166. +
  12167. + /* Now setup flush_pos using scan_left's endpoint. */
  12168. + ret = prepare_flush_pos(flush_pos, leftmost_in_slum);
  12169. + if (ret)
  12170. + goto failed;
  12171. +
  12172. + if (znode_get_level(flush_pos->coord.node) == LEAF_LEVEL
  12173. + && node_is_empty(flush_pos->coord.node)) {
  12174. + znode *empty = flush_pos->coord.node;
  12175. +
  12176. + assert("zam-1022", !ZF_ISSET(empty, JNODE_HEARD_BANSHEE));
  12177. + ret = delete_empty_node(empty);
  12178. + goto failed;
  12179. + }
  12180. +
  12181. + if (jnode_check_flushprepped(leftmost_in_slum)
  12182. + && !jnode_convertible(leftmost_in_slum)) {
  12183. + ret = 0;
  12184. + goto failed;
  12185. + }
  12186. +
  12187. + /* Set pos->preceder and (re)allocate pos and its ancestors if it is
  12188. + needed */
  12189. + ret = alloc_pos_and_ancestors(flush_pos);
  12190. + if (ret)
  12191. + goto failed;
  12192. +
  12193. + /* Do the main rightward-bottom-up squeeze and allocate loop. */
  12194. + ret = squalloc(flush_pos);
  12195. + pos_stop(flush_pos);
  12196. + if (ret)
  12197. + goto failed;
  12198. +
  12199. + /* FIXME_NFQUCMPD: Here, handle the twig-special case for unallocated
  12200. + children. First, the pos_stop() and pos_valid() routines should be
  12201. + modified so that pos_stop() sets a flush_position->stop flag to 1
  12202. + without releasing the current position immediately--instead release
  12203. + it in pos_done(). This is a better implementation than the current
  12204. + one anyway.
  12205. +
  12206. + It is not clear that all fields of the flush_position should not be
  12207. + released, but at the very least the parent_lock, parent_coord, and
  12208. + parent_load should remain held because they are hold the last twig
  12209. + when pos_stop() is called.
  12210. +
  12211. + When we reach this point in the code, if the parent_coord is set to
  12212. + after the last item then we know that flush reached the end of a twig
  12213. + (and according to the new flush queueing design, we will return now).
  12214. + If parent_coord is not past the last item, we should check if the
  12215. + current twig has any unallocated children to the right (we are not
  12216. + concerned with unallocated children to the left--in that case the
  12217. + twig itself should not have been allocated). If the twig has
  12218. + unallocated children to the right, set the parent_coord to that
  12219. + position and then repeat the call to squalloc.
  12220. +
  12221. + Testing for unallocated children may be defined in two ways: if any
  12222. + internal item has a fake block number, it is unallocated; if any
  12223. + extent item is unallocated then all of its children are unallocated.
  12224. + But there is a more aggressive approach: if there are any dirty
  12225. + children of the twig to the right of the current position, we may
  12226. + wish to relocate those nodes now. Checking for potential relocation
  12227. + is more expensive as it requires knowing whether there are any dirty
  12228. + children that are not unallocated. The extent_needs_allocation should
  12229. + be used after setting the correct preceder.
  12230. +
  12231. + When we reach the end of a twig at this point in the code, if the
  12232. + flush can continue (when the queue is ready) it will need some
  12233. + information on the future starting point. That should be stored away
  12234. + in the flush_handle using a seal, I believe. Holding a jref() on the
  12235. + future starting point may break other code that deletes that node.
  12236. + */
  12237. +
  12238. + /* FIXME_NFQUCMPD: Also, we don't want to do any flushing when flush is
  12239. + called above the twig level. If the VM calls flush above the twig
  12240. + level, do nothing and return (but figure out why this happens). The
  12241. + txnmgr should be modified to only flush its leaf-level dirty list.
  12242. + This will do all the necessary squeeze and allocate steps but leave
  12243. + unallocated branches and possibly unallocated twigs (when the twig's
  12244. + leftmost child is not dirty). After flushing the leaf level, the
  12245. + remaining unallocated nodes should be given write-optimized
  12246. + locations. (Possibly, the remaining unallocated twigs should be
  12247. + allocated just before their leftmost child.)
  12248. + */
  12249. +
  12250. + /* Any failure reaches this point. */
  12251. +failed:
  12252. +
  12253. + switch (ret) {
  12254. + case -E_REPEAT:
  12255. + case -EINVAL:
  12256. + case -E_DEADLOCK:
  12257. + case -E_NO_NEIGHBOR:
  12258. + case -ENOENT:
  12259. + /* FIXME(C): Except for E_DEADLOCK, these should probably be
  12260. + handled properly in each case. They already are handled in
  12261. + many cases. */
  12262. + /* Something bad happened, but difficult to avoid... Try again!
  12263. + */
  12264. + ret = 0;
  12265. + }
  12266. +
  12267. + if (leftmost_in_slum)
  12268. + jput(leftmost_in_slum);
  12269. +
  12270. + pos_done(flush_pos);
  12271. + scan_done(left_scan);
  12272. + scan_done(right_scan);
  12273. + kfree(right_scan);
  12274. +
  12275. + ON_DEBUG(atomic_dec(&flush_cnt));
  12276. +
  12277. + reiser4_leave_flush(sb);
  12278. +
  12279. + return ret;
  12280. +}
  12281. +
  12282. +/* The reiser4 flush subsystem can be turned into "rapid flush mode" means that
  12283. + * flusher should submit all prepped nodes immediately without keeping them in
  12284. + * flush queues for long time. The reason for rapid flush mode is to free
  12285. + * memory as fast as possible. */
  12286. +
  12287. +#if REISER4_USE_RAPID_FLUSH
  12288. +
  12289. +/**
  12290. + * submit all prepped nodes if rapid flush mode is set,
  12291. + * turn rapid flush mode off.
  12292. + */
  12293. +
  12294. +static int rapid_flush(flush_pos_t *pos)
  12295. +{
  12296. + if (!wbq_available())
  12297. + return 0;
  12298. +
  12299. + return write_prepped_nodes(pos);
  12300. +}
  12301. +
  12302. +#else
  12303. +
  12304. +#define rapid_flush(pos) (0)
  12305. +
  12306. +#endif /* REISER4_USE_RAPID_FLUSH */
  12307. +
  12308. +static jnode *find_flush_start_jnode(jnode *start, txn_atom * atom,
  12309. + flush_queue_t *fq, int *nr_queued,
  12310. + int flags)
  12311. +{
  12312. + jnode * node;
  12313. +
  12314. + if (start != NULL) {
  12315. + spin_lock_jnode(start);
  12316. + if (!jnode_is_flushprepped(start)) {
  12317. + assert("zam-1056", start->atom == atom);
  12318. + node = start;
  12319. + goto enter;
  12320. + }
  12321. + spin_unlock_jnode(start);
  12322. + }
  12323. + /*
  12324. + * In this loop we process all already prepped (RELOC or OVRWR) and
  12325. + * dirtied again nodes. The atom spin lock is not released until all
  12326. + * dirty nodes processed or not prepped node found in the atom dirty
  12327. + * lists.
  12328. + */
  12329. + while ((node = find_first_dirty_jnode(atom, flags))) {
  12330. + spin_lock_jnode(node);
  12331. +enter:
  12332. + assert("zam-881", JF_ISSET(node, JNODE_DIRTY));
  12333. + assert("zam-898", !JF_ISSET(node, JNODE_OVRWR));
  12334. +
  12335. + if (JF_ISSET(node, JNODE_WRITEBACK)) {
  12336. + /* move node to the end of atom's writeback list */
  12337. + list_move_tail(&node->capture_link, ATOM_WB_LIST(atom));
  12338. +
  12339. + /*
  12340. + * jnode is not necessarily on dirty list: if it was
  12341. + * dirtied when it was on flush queue - it does not get
  12342. + * moved to dirty list
  12343. + */
  12344. + ON_DEBUG(count_jnode(atom, node, NODE_LIST(node),
  12345. + WB_LIST, 1));
  12346. +
  12347. + } else if (jnode_is_znode(node)
  12348. + && znode_above_root(JZNODE(node))) {
  12349. + /*
  12350. + * A special case for znode-above-root. The above-root
  12351. + * (fake) znode is captured and dirtied when the tree
  12352. + * height changes or when the root node is relocated.
  12353. + * This causes atoms to fuse so that changes at the root
  12354. + * are serialized. However, this node is never flushed.
  12355. + * This special case used to be in lock.c to prevent the
  12356. + * above-root node from ever being captured, but now
  12357. + * that it is captured we simply prevent it from
  12358. + * flushing. The log-writer code relies on this to
  12359. + * properly log superblock modifications of the tree
  12360. + * height.
  12361. + */
  12362. + jnode_make_wander_nolock(node);
  12363. + } else if (JF_ISSET(node, JNODE_RELOC)) {
  12364. + queue_jnode(fq, node);
  12365. + ++(*nr_queued);
  12366. + } else
  12367. + break;
  12368. +
  12369. + spin_unlock_jnode(node);
  12370. + }
  12371. + return node;
  12372. +}
  12373. +
  12374. +/* Flush some nodes of current atom, usually slum, return -E_REPEAT if there are
  12375. + * more nodes to flush, return 0 if atom's dirty lists empty and keep current
  12376. + * atom locked, return other errors as they are. */
  12377. +int
  12378. +flush_current_atom(int flags, long nr_to_write, long *nr_submitted,
  12379. + txn_atom ** atom, jnode *start)
  12380. +{
  12381. + reiser4_super_info_data *sinfo = get_current_super_private();
  12382. + flush_queue_t *fq = NULL;
  12383. + jnode *node;
  12384. + int nr_queued;
  12385. + int ret;
  12386. +
  12387. + assert("zam-889", atom != NULL && *atom != NULL);
  12388. + assert_spin_locked(&((*atom)->alock));
  12389. + assert("zam-892", get_current_context()->trans->atom == *atom);
  12390. +
  12391. + BUG_ON(sb_rdonly(get_current_context()->super));
  12392. +
  12393. + nr_to_write = LONG_MAX;
  12394. + while (1) {
  12395. + ret = reiser4_fq_by_atom(*atom, &fq);
  12396. + if (ret != -E_REPEAT)
  12397. + break;
  12398. + *atom = get_current_atom_locked();
  12399. + }
  12400. + if (ret)
  12401. + return ret;
  12402. +
  12403. + assert_spin_locked(&((*atom)->alock));
  12404. +
  12405. + /* parallel flushers limit */
  12406. + if (sinfo->tmgr.atom_max_flushers != 0) {
  12407. + while ((*atom)->nr_flushers >= sinfo->tmgr.atom_max_flushers) {
  12408. + /* An reiser4_atom_send_event() call is inside
  12409. + reiser4_fq_put_nolock() which is called when flush is
  12410. + finished and nr_flushers is decremented. */
  12411. + reiser4_atom_wait_event(*atom);
  12412. + *atom = get_current_atom_locked();
  12413. + }
  12414. + }
  12415. +
  12416. + /* count ourself as a flusher */
  12417. + (*atom)->nr_flushers++;
  12418. +
  12419. + writeout_mode_enable();
  12420. +
  12421. + nr_queued = 0;
  12422. + node = find_flush_start_jnode(start, *atom, fq, &nr_queued, flags);
  12423. +
  12424. + if (node == NULL) {
  12425. + if (nr_queued == 0) {
  12426. + (*atom)->nr_flushers--;
  12427. + reiser4_fq_put_nolock(fq);
  12428. + reiser4_atom_send_event(*atom);
  12429. + /* current atom remains locked */
  12430. + writeout_mode_disable();
  12431. + return 0;
  12432. + }
  12433. + spin_unlock_atom(*atom);
  12434. + } else {
  12435. + jref(node);
  12436. + BUG_ON((*atom)->super != node->tree->super);
  12437. + spin_unlock_atom(*atom);
  12438. + spin_unlock_jnode(node);
  12439. + BUG_ON(nr_to_write == 0);
  12440. + ret = jnode_flush(node, nr_to_write, nr_submitted, fq, flags);
  12441. + jput(node);
  12442. + }
  12443. +
  12444. + ret =
  12445. + reiser4_write_fq(fq, nr_submitted,
  12446. + WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM);
  12447. +
  12448. + *atom = get_current_atom_locked();
  12449. + (*atom)->nr_flushers--;
  12450. + reiser4_fq_put_nolock(fq);
  12451. + reiser4_atom_send_event(*atom);
  12452. + spin_unlock_atom(*atom);
  12453. +
  12454. + writeout_mode_disable();
  12455. +
  12456. + if (ret == 0)
  12457. + ret = -E_REPEAT;
  12458. +
  12459. + return ret;
  12460. +}
  12461. +
  12462. +/**
  12463. + * This function calls txmod->reverse_alloc_formatted() to make a
  12464. + * reverse-parent-first relocation decision and then, if yes, it marks
  12465. + * the parent dirty.
  12466. + */
  12467. +static int reverse_allocate_parent(jnode * node,
  12468. + const coord_t *parent_coord,
  12469. + flush_pos_t *pos)
  12470. +{
  12471. + int ret;
  12472. +
  12473. + if (!JF_ISSET(ZJNODE(parent_coord->node), JNODE_DIRTY)) {
  12474. + txmod_plugin *txmod_plug = get_txmod_plugin();
  12475. +
  12476. + if (!txmod_plug->reverse_alloc_formatted)
  12477. + return 0;
  12478. + ret = txmod_plug->reverse_alloc_formatted(node,
  12479. + parent_coord, pos);
  12480. + if (ret < 0)
  12481. + return ret;
  12482. + /*
  12483. + * FIXME-ZAM: if parent is already relocated -
  12484. + * we do not want to grab space, right?
  12485. + */
  12486. + if (ret == 1) {
  12487. + int grabbed;
  12488. +
  12489. + grabbed = get_current_context()->grabbed_blocks;
  12490. + if (reiser4_grab_space_force((__u64) 1, BA_RESERVED) !=
  12491. + 0)
  12492. + reiser4_panic("umka-1250",
  12493. + "No space left during flush.");
  12494. +
  12495. + assert("jmacd-18923",
  12496. + znode_is_write_locked(parent_coord->node));
  12497. + znode_make_dirty(parent_coord->node);
  12498. + grabbed2free_mark(grabbed);
  12499. + }
  12500. + }
  12501. + return 0;
  12502. +}
  12503. +
  12504. +/* INITIAL ALLOCATE ANCESTORS STEP (REVERSE PARENT-FIRST ALLOCATION BEFORE
  12505. + FORWARD PARENT-FIRST LOOP BEGINS) */
  12506. +
  12507. +/* Get the leftmost child for given coord. */
  12508. +static int get_leftmost_child_of_unit(const coord_t *coord, jnode ** child)
  12509. +{
  12510. + int ret;
  12511. +
  12512. + ret = item_utmost_child(coord, LEFT_SIDE, child);
  12513. +
  12514. + if (ret)
  12515. + return ret;
  12516. +
  12517. + if (IS_ERR(*child))
  12518. + return PTR_ERR(*child);
  12519. +
  12520. + return 0;
  12521. +}
  12522. +
  12523. +/* This step occurs after the left- and right-scans are completed, before
  12524. + starting the forward parent-first traversal. Here we attempt to allocate
  12525. + ancestors of the starting flush point, which means continuing in the reverse
  12526. + parent-first direction to the parent, grandparent, and so on (as long as the
  12527. + child is a leftmost child). This routine calls a recursive process,
  12528. + alloc_one_ancestor, which does the real work, except there is special-case
  12529. + handling here for the first ancestor, which may be a twig. At each level
  12530. + (here and alloc_one_ancestor), we check for relocation and then, if the child
  12531. + is a leftmost child, repeat at the next level. On the way back down (the
  12532. + recursion), we allocate the ancestors in parent-first order. */
  12533. +static int alloc_pos_and_ancestors(flush_pos_t *pos)
  12534. +{
  12535. + int ret = 0;
  12536. + lock_handle plock;
  12537. + load_count pload;
  12538. + coord_t pcoord;
  12539. +
  12540. + if (znode_check_flushprepped(pos->lock.node))
  12541. + return 0;
  12542. +
  12543. + coord_init_invalid(&pcoord, NULL);
  12544. + init_lh(&plock);
  12545. + init_load_count(&pload);
  12546. +
  12547. + if (pos->state == POS_ON_EPOINT) {
  12548. + /* a special case for pos on twig level, where we already have
  12549. + a lock on parent node. */
  12550. + /* The parent may not be dirty, in which case we should decide
  12551. + whether to relocate the child now. If decision is made to
  12552. + relocate the child, the parent is marked dirty. */
  12553. + ret = reverse_allocate_parent(pos->child, &pos->coord, pos);
  12554. + if (ret)
  12555. + goto exit;
  12556. +
  12557. + /* FIXME_NFQUCMPD: We only need to allocate the twig (if child
  12558. + is leftmost) and the leaf/child, so recursion is not needed.
  12559. + Levels above the twig will be allocated for
  12560. + write-optimization before the transaction commits. */
  12561. +
  12562. + /* Do the recursive step, allocating zero or more of our
  12563. + * ancestors. */
  12564. + ret = alloc_one_ancestor(&pos->coord, pos);
  12565. +
  12566. + } else {
  12567. + if (!znode_is_root(pos->lock.node)) {
  12568. + /* all formatted nodes except tree root */
  12569. + ret =
  12570. + reiser4_get_parent(&plock, pos->lock.node,
  12571. + ZNODE_WRITE_LOCK);
  12572. + if (ret)
  12573. + goto exit;
  12574. +
  12575. + ret = incr_load_count_znode(&pload, plock.node);
  12576. + if (ret)
  12577. + goto exit;
  12578. +
  12579. + ret =
  12580. + find_child_ptr(plock.node, pos->lock.node, &pcoord);
  12581. + if (ret)
  12582. + goto exit;
  12583. +
  12584. + ret = reverse_allocate_parent(ZJNODE(pos->lock.node),
  12585. + &pcoord,
  12586. + pos);
  12587. + if (ret)
  12588. + goto exit;
  12589. +
  12590. + ret = alloc_one_ancestor(&pcoord, pos);
  12591. + if (ret)
  12592. + goto exit;
  12593. + }
  12594. +
  12595. + ret = allocate_znode(pos->lock.node, &pcoord, pos);
  12596. + }
  12597. +exit:
  12598. + done_load_count(&pload);
  12599. + done_lh(&plock);
  12600. + return ret;
  12601. +}
  12602. +
  12603. +/* This is the recursive step described in alloc_pos_and_ancestors, above.
  12604. + Ignoring the call to set_preceder, which is the next function described, this
  12605. + checks if the child is a leftmost child and returns if it is not. If the
  12606. + child is a leftmost child it checks for relocation, possibly dirtying the
  12607. + parent. Then it performs the recursive step. */
  12608. +static int alloc_one_ancestor(const coord_t *coord, flush_pos_t *pos)
  12609. +{
  12610. + int ret = 0;
  12611. + lock_handle alock;
  12612. + load_count aload;
  12613. + coord_t acoord;
  12614. +
  12615. + /* As we ascend at the left-edge of the region to flush, take this
  12616. + opportunity at the twig level to find our parent-first preceder
  12617. + unless we have already set it. */
  12618. + if (pos->preceder.blk == 0) {
  12619. + ret = set_preceder(coord, pos);
  12620. + if (ret != 0)
  12621. + return ret;
  12622. + }
  12623. +
  12624. + /* If the ancestor is clean or already allocated, or if the child is not
  12625. + a leftmost child, stop going up, even leaving coord->node not
  12626. + flushprepped. */
  12627. + if (znode_check_flushprepped(coord->node)
  12628. + || !coord_is_leftmost_unit(coord))
  12629. + return 0;
  12630. +
  12631. + init_lh(&alock);
  12632. + init_load_count(&aload);
  12633. + coord_init_invalid(&acoord, NULL);
  12634. +
  12635. + /* Only ascend to the next level if it is a leftmost child, but
  12636. + write-lock the parent in case we will relocate the child. */
  12637. + if (!znode_is_root(coord->node)) {
  12638. +
  12639. + ret =
  12640. + jnode_lock_parent_coord(ZJNODE(coord->node), &acoord,
  12641. + &alock, &aload, ZNODE_WRITE_LOCK,
  12642. + 0);
  12643. + if (ret != 0) {
  12644. + /* FIXME(C): check EINVAL, E_DEADLOCK */
  12645. + goto exit;
  12646. + }
  12647. +
  12648. + ret = reverse_allocate_parent(ZJNODE(coord->node),
  12649. + &acoord, pos);
  12650. + if (ret != 0)
  12651. + goto exit;
  12652. +
  12653. + /* Recursive call. */
  12654. + if (!znode_check_flushprepped(acoord.node)) {
  12655. + ret = alloc_one_ancestor(&acoord, pos);
  12656. + if (ret)
  12657. + goto exit;
  12658. + }
  12659. + }
  12660. +
  12661. + /* Note: we call allocate with the parent write-locked (except at the
  12662. + root) in case we relocate the child, in which case it will modify the
  12663. + parent during this call. */
  12664. + ret = allocate_znode(coord->node, &acoord, pos);
  12665. +
  12666. +exit:
  12667. + done_load_count(&aload);
  12668. + done_lh(&alock);
  12669. + return ret;
  12670. +}
  12671. +
  12672. +/* During the reverse parent-first alloc_pos_and_ancestors process described
  12673. + above there is a call to this function at the twig level. During
  12674. + alloc_pos_and_ancestors we may ask: should this node be relocated (in reverse
  12675. + parent-first context)? We repeat this process as long as the child is the
  12676. + leftmost child, eventually reaching an ancestor of the flush point that is
  12677. + not a leftmost child. The preceder of that ancestors, which is not a leftmost
  12678. + child, is actually on the leaf level. The preceder of that block is the
  12679. + left-neighbor of the flush point. The preceder of that block is the rightmost
  12680. + child of the twig on the left. So, when alloc_pos_and_ancestors passes upward
  12681. + through the twig level, it stops momentarily to remember the block of the
  12682. + rightmost child of the twig on the left and sets it to the flush_position's
  12683. + preceder_hint.
  12684. +
  12685. + There is one other place where we may set the flush_position's preceder hint,
  12686. + which is during scan-left.
  12687. +*/
  12688. +static int set_preceder(const coord_t *coord_in, flush_pos_t *pos)
  12689. +{
  12690. + int ret;
  12691. + coord_t coord;
  12692. + lock_handle left_lock;
  12693. + load_count left_load;
  12694. +
  12695. + coord_dup(&coord, coord_in);
  12696. +
  12697. + init_lh(&left_lock);
  12698. + init_load_count(&left_load);
  12699. +
  12700. + /* FIXME(B): Same FIXME as in "Find the preceder" in
  12701. + reverse_allocate. coord_is_leftmost_unit is not the right test
  12702. + if the unformatted child is in the middle of the first extent unit.*/
  12703. + if (!coord_is_leftmost_unit(&coord)) {
  12704. + coord_prev_unit(&coord);
  12705. + } else {
  12706. + ret =
  12707. + reiser4_get_left_neighbor(&left_lock, coord.node,
  12708. + ZNODE_READ_LOCK, GN_SAME_ATOM);
  12709. + if (ret) {
  12710. + /* If we fail for any reason it doesn't matter because
  12711. + the preceder is only a hint. We are low-priority at
  12712. + this point, so this must be the case. */
  12713. + if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR ||
  12714. + ret == -ENOENT || ret == -EINVAL
  12715. + || ret == -E_DEADLOCK)
  12716. + ret = 0;
  12717. + goto exit;
  12718. + }
  12719. +
  12720. + ret = incr_load_count_znode(&left_load, left_lock.node);
  12721. + if (ret)
  12722. + goto exit;
  12723. +
  12724. + coord_init_last_unit(&coord, left_lock.node);
  12725. + }
  12726. +
  12727. + ret =
  12728. + item_utmost_child_real_block(&coord, RIGHT_SIDE,
  12729. + &pos->preceder.blk);
  12730. +exit:
  12731. + check_preceder(pos->preceder.blk);
  12732. + done_load_count(&left_load);
  12733. + done_lh(&left_lock);
  12734. + return ret;
  12735. +}
  12736. +
  12737. +/* MAIN SQUEEZE AND ALLOCATE LOOP (THREE BIG FUNCTIONS) */
  12738. +
  12739. +/* This procedure implements the outer loop of the flush algorithm. To put this
  12740. + in context, here is the general list of steps taken by the flush routine as a
  12741. + whole:
  12742. +
  12743. + 1. Scan-left
  12744. + 2. Scan-right (maybe)
  12745. + 3. Allocate initial flush position and its ancestors
  12746. + 4. <handle extents>
  12747. + 5. <squeeze and next position and its ancestors to-the-right,
  12748. + then update position to-the-right>
  12749. + 6. <repeat from #4 until flush is stopped>
  12750. +
  12751. + This procedure implements the loop in steps 4 through 6 in the above listing.
  12752. +
  12753. + Step 4: if the current flush position is an extent item (position on the twig
  12754. + level), it allocates the extent (allocate_extent_item_in_place) then shifts
  12755. + to the next coordinate. If the next coordinate's leftmost child needs
  12756. + flushprep, we will continue. If the next coordinate is an internal item, we
  12757. + descend back to the leaf level, otherwise we repeat a step #4 (labeled
  12758. + ALLOC_EXTENTS below). If the "next coordinate" brings us past the end of the
  12759. + twig level, then we call reverse_relocate_end_of_twig to possibly dirty the
  12760. + next (right) twig, prior to step #5 which moves to the right.
  12761. +
  12762. + Step 5: calls squalloc_changed_ancestors, which initiates a recursive call up
  12763. + the tree to allocate any ancestors of the next-right flush position that are
  12764. + not also ancestors of the current position. Those ancestors (in top-down
  12765. + order) are the next in parent-first order. We squeeze adjacent nodes on the
  12766. + way up until the right node and current node share the same parent, then
  12767. + allocate on the way back down. Finally, this step sets the flush position to
  12768. + the next-right node. Then repeat steps 4 and 5.
  12769. +*/
  12770. +
  12771. +/* SQUEEZE CODE */
  12772. +
  12773. +/* squalloc_right_twig helper function, cut a range of extent items from
  12774. + cut node to->node from the beginning up to coord @to. */
  12775. +static int squalloc_right_twig_cut(coord_t *to, reiser4_key * to_key,
  12776. + znode * left)
  12777. +{
  12778. + coord_t from;
  12779. + reiser4_key from_key;
  12780. +
  12781. + coord_init_first_unit(&from, to->node);
  12782. + item_key_by_coord(&from, &from_key);
  12783. +
  12784. + return cut_node_content(&from, to, &from_key, to_key, NULL);
  12785. +}
  12786. +
  12787. +/* Copy as much of the leading extents from @right to @left, allocating
  12788. + unallocated extents as they are copied. Returns SQUEEZE_TARGET_FULL or
  12789. + SQUEEZE_SOURCE_EMPTY when no more can be shifted. If the next item is an
  12790. + internal item it calls shift_one_internal_unit and may then return
  12791. + SUBTREE_MOVED. */
  12792. +static int squeeze_right_twig(znode * left, znode * right, flush_pos_t *pos)
  12793. +{
  12794. + int ret = SUBTREE_MOVED;
  12795. + coord_t coord; /* used to iterate over items */
  12796. + reiser4_key stop_key;
  12797. + reiser4_tree *tree;
  12798. + txmod_plugin *txmod_plug = get_txmod_plugin();
  12799. +
  12800. + assert("jmacd-2008", !node_is_empty(right));
  12801. + coord_init_first_unit(&coord, right);
  12802. +
  12803. + /* FIXME: can be optimized to cut once */
  12804. + while (!node_is_empty(coord.node) && item_is_extent(&coord)) {
  12805. + ON_DEBUG(void *vp);
  12806. +
  12807. + assert("vs-1468", coord_is_leftmost_unit(&coord));
  12808. + ON_DEBUG(vp = shift_check_prepare(left, coord.node));
  12809. +
  12810. + /* stop_key is used to find what was copied and what to cut */
  12811. + stop_key = *reiser4_min_key();
  12812. + ret = txmod_plug->squeeze_alloc_unformatted(left,
  12813. + &coord, pos,
  12814. + &stop_key);
  12815. + if (ret != SQUEEZE_CONTINUE) {
  12816. + ON_DEBUG(kfree(vp));
  12817. + break;
  12818. + }
  12819. + assert("vs-1465", !keyeq(&stop_key, reiser4_min_key()));
  12820. +
  12821. + /* Helper function to do the cutting. */
  12822. + set_key_offset(&stop_key, get_key_offset(&stop_key) - 1);
  12823. + check_me("vs-1466",
  12824. + squalloc_right_twig_cut(&coord, &stop_key, left) == 0);
  12825. +
  12826. + ON_DEBUG(shift_check(vp, left, coord.node));
  12827. + }
  12828. + /*
  12829. + * @left and @right nodes participated in the
  12830. + * implicit shift, determined by the pair of
  12831. + * functions:
  12832. + * . squalloc_extent() - append units to the @left
  12833. + * . squalloc_right_twig_cut() - cut the units from @right
  12834. + * so update their delimiting keys
  12835. + */
  12836. + tree = znode_get_tree(left);
  12837. + write_lock_dk(tree);
  12838. + update_znode_dkeys(left, right);
  12839. + write_unlock_dk(tree);
  12840. +
  12841. + if (node_is_empty(coord.node))
  12842. + ret = SQUEEZE_SOURCE_EMPTY;
  12843. +
  12844. + if (ret == SQUEEZE_TARGET_FULL)
  12845. + goto out;
  12846. +
  12847. + if (node_is_empty(right)) {
  12848. + /* The whole right node was copied into @left. */
  12849. + assert("vs-464", ret == SQUEEZE_SOURCE_EMPTY);
  12850. + goto out;
  12851. + }
  12852. +
  12853. + coord_init_first_unit(&coord, right);
  12854. +
  12855. + if (!item_is_internal(&coord)) {
  12856. + /* we do not want to squeeze anything else to left neighbor
  12857. + because "slum" is over */
  12858. + ret = SQUEEZE_TARGET_FULL;
  12859. + goto out;
  12860. + }
  12861. + assert("jmacd-433", item_is_internal(&coord));
  12862. +
  12863. + /* Shift an internal unit. The child must be allocated before shifting
  12864. + any more extents, so we stop here. */
  12865. + ret = shift_one_internal_unit(left, right);
  12866. +
  12867. +out:
  12868. + assert("jmacd-8612", ret < 0 || ret == SQUEEZE_TARGET_FULL
  12869. + || ret == SUBTREE_MOVED || ret == SQUEEZE_SOURCE_EMPTY);
  12870. +
  12871. + if (ret == SQUEEZE_TARGET_FULL) {
  12872. + /* We submit prepped nodes here and expect that this @left twig
  12873. + * will not be modified again during this jnode_flush() call. */
  12874. + int ret1;
  12875. +
  12876. + /* NOTE: seems like io is done under long term locks. */
  12877. + ret1 = write_prepped_nodes(pos);
  12878. + if (ret1 < 0)
  12879. + return ret1;
  12880. + }
  12881. +
  12882. + return ret;
  12883. +}
  12884. +
  12885. +#if REISER4_DEBUG
  12886. +static void item_convert_invariant(flush_pos_t *pos)
  12887. +{
  12888. + assert("edward-1225", coord_is_existing_item(&pos->coord));
  12889. + if (convert_data_attached(pos)) {
  12890. + item_plugin *iplug = item_convert_plug(pos);
  12891. +
  12892. + assert("edward-1000",
  12893. + iplug == item_plugin_by_coord(&pos->coord));
  12894. + assert("edward-1001", iplug->f.convert != NULL);
  12895. + } else
  12896. + assert("edward-1226", pos->child == NULL);
  12897. +}
  12898. +#else
  12899. +
  12900. +#define item_convert_invariant(pos) noop
  12901. +
  12902. +#endif
  12903. +
  12904. +/*
  12905. + * Scan all node's items and apply for each one
  12906. + * its ->convert() method. This method may:
  12907. + * . resize the item;
  12908. + * . kill the item;
  12909. + * . insert a group of items/nodes on the right,
  12910. + * which possess the following properties:
  12911. + * . all new nodes are dirty and not convertible;
  12912. + * . for all new items ->convert() method is a noop.
  12913. + *
  12914. + * NOTE: this function makes the tree unbalanced!
  12915. + * This intended to be used by flush squalloc() in a
  12916. + * combination with squeeze procedure.
  12917. + *
  12918. + * GLOSSARY
  12919. + *
  12920. + * Chained nodes and items.
  12921. + * Two neighboring nodes @left and @right are chained,
  12922. + * iff the last item of @left and the first item of @right
  12923. + * belong to the same item cluster. In this case those
  12924. + * items are called chained.
  12925. + */
  12926. +static int convert_node(flush_pos_t *pos, znode * node)
  12927. +{
  12928. + int ret = 0;
  12929. + item_plugin *iplug;
  12930. + assert("edward-304", pos != NULL);
  12931. + assert("edward-305", pos->child == NULL);
  12932. + assert("edward-475", znode_convertible(node));
  12933. + assert("edward-669", znode_is_wlocked(node));
  12934. + assert("edward-1210", !node_is_empty(node));
  12935. +
  12936. + if (znode_get_level(node) != LEAF_LEVEL)
  12937. + /* unsupported */
  12938. + goto exit;
  12939. +
  12940. + coord_init_first_unit(&pos->coord, node);
  12941. +
  12942. + while (1) {
  12943. + ret = 0;
  12944. + coord_set_to_left(&pos->coord);
  12945. + item_convert_invariant(pos);
  12946. +
  12947. + iplug = item_plugin_by_coord(&pos->coord);
  12948. + assert("edward-844", iplug != NULL);
  12949. +
  12950. + if (iplug->f.convert) {
  12951. + ret = iplug->f.convert(pos);
  12952. + if (ret)
  12953. + goto exit;
  12954. + }
  12955. + assert("edward-307", pos->child == NULL);
  12956. +
  12957. + if (coord_next_item(&pos->coord)) {
  12958. + /*
  12959. + * node is over
  12960. + */
  12961. + if (convert_data_attached(pos))
  12962. + /*
  12963. + * the last item was convertible and
  12964. + * there still is an unprocesssed flow
  12965. + */
  12966. + if (next_node_is_chained(pos)) {
  12967. + /*
  12968. + * next node contains items of
  12969. + * the same disk cluster,
  12970. + * so finish with this node
  12971. + */
  12972. + update_chaining_state(pos, 0/* move
  12973. + to next
  12974. + node */);
  12975. + break;
  12976. + }
  12977. + else {
  12978. + /*
  12979. + * perform one more iteration
  12980. + * for the same item and the
  12981. + * rest of flow
  12982. + */
  12983. + update_chaining_state(pos, 1/* this
  12984. + node */);
  12985. + }
  12986. + else
  12987. + /*
  12988. + * the last item wasn't convertible, or
  12989. + * convert date was detached in the last
  12990. + * iteration,
  12991. + * go to next node
  12992. + */
  12993. + break;
  12994. + } else {
  12995. + /*
  12996. + * Node is not over, item position got decremented.
  12997. + */
  12998. + if (convert_data_attached(pos)) {
  12999. + /*
  13000. + * disk cluster should be increased, so roll
  13001. + * one item position back and perform the
  13002. + * iteration with the previous item and the
  13003. + * rest of attached data
  13004. + */
  13005. + if (iplug != item_plugin_by_coord(&pos->coord))
  13006. + set_item_convert_count(pos, 0);
  13007. +
  13008. + ret = coord_prev_item(&pos->coord);
  13009. + assert("edward-1003", !ret);
  13010. +
  13011. + update_chaining_state(pos, 1/* this node */);
  13012. + }
  13013. + else
  13014. + /*
  13015. + * previous item was't convertible, or
  13016. + * convert date was detached in the last
  13017. + * iteration, go to next item
  13018. + */
  13019. + ;
  13020. + }
  13021. + }
  13022. + JF_CLR(ZJNODE(node), JNODE_CONVERTIBLE);
  13023. + znode_make_dirty(node);
  13024. +exit:
  13025. + assert("edward-1004", !ret);
  13026. + return ret;
  13027. +}
  13028. +
  13029. +/* Squeeze and allocate the right neighbor. This is called after @left and
  13030. + its current children have been squeezed and allocated already. This
  13031. + procedure's job is to squeeze and items from @right to @left.
  13032. +
  13033. + If at the leaf level, use the shift_everything_left memcpy-optimized
  13034. + version of shifting (squeeze_right_leaf).
  13035. +
  13036. + If at the twig level, extents are allocated as they are shifted from @right
  13037. + to @left (squalloc_right_twig).
  13038. +
  13039. + At any other level, shift one internal item and return to the caller
  13040. + (squalloc_parent_first) so that the shifted-subtree can be processed in
  13041. + parent-first order.
  13042. +
  13043. + When unit of internal item is moved, squeezing stops and SUBTREE_MOVED is
  13044. + returned. When all content of @right is squeezed, SQUEEZE_SOURCE_EMPTY is
  13045. + returned. If nothing can be moved into @left anymore, SQUEEZE_TARGET_FULL
  13046. + is returned.
  13047. +*/
  13048. +
  13049. +static int squeeze_right_neighbor(flush_pos_t *pos, znode * left,
  13050. + znode * right)
  13051. +{
  13052. + int ret;
  13053. +
  13054. + /* FIXME it is possible to see empty hasn't-heard-banshee node in a
  13055. + * tree owing to error (for example, ENOSPC) in write */
  13056. + /* assert("jmacd-9321", !node_is_empty(left)); */
  13057. + assert("jmacd-9322", !node_is_empty(right));
  13058. + assert("jmacd-9323", znode_get_level(left) == znode_get_level(right));
  13059. +
  13060. + switch (znode_get_level(left)) {
  13061. + case TWIG_LEVEL:
  13062. + /* Shift with extent allocating until either an internal item
  13063. + is encountered or everything is shifted or no free space
  13064. + left in @left */
  13065. + ret = squeeze_right_twig(left, right, pos);
  13066. + break;
  13067. +
  13068. + default:
  13069. + /* All other levels can use shift_everything until we implement
  13070. + per-item flush plugins. */
  13071. + ret = squeeze_right_non_twig(left, right);
  13072. + break;
  13073. + }
  13074. +
  13075. + assert("jmacd-2011", (ret < 0 ||
  13076. + ret == SQUEEZE_SOURCE_EMPTY
  13077. + || ret == SQUEEZE_TARGET_FULL
  13078. + || ret == SUBTREE_MOVED));
  13079. + return ret;
  13080. +}
  13081. +
  13082. +static int squeeze_right_twig_and_advance_coord(flush_pos_t *pos,
  13083. + znode * right)
  13084. +{
  13085. + int ret;
  13086. +
  13087. + ret = squeeze_right_twig(pos->lock.node, right, pos);
  13088. + if (ret < 0)
  13089. + return ret;
  13090. + if (ret > 0) {
  13091. + coord_init_after_last_item(&pos->coord, pos->lock.node);
  13092. + return ret;
  13093. + }
  13094. +
  13095. + coord_init_last_unit(&pos->coord, pos->lock.node);
  13096. + return 0;
  13097. +}
  13098. +
  13099. +/* forward declaration */
  13100. +static int squalloc_upper_levels(flush_pos_t *, znode *, znode *);
  13101. +
  13102. +/* do a fast check for "same parents" condition before calling
  13103. + * squalloc_upper_levels() */
  13104. +static inline int check_parents_and_squalloc_upper_levels(flush_pos_t *pos,
  13105. + znode * left,
  13106. + znode * right)
  13107. +{
  13108. + if (znode_same_parents(left, right))
  13109. + return 0;
  13110. +
  13111. + return squalloc_upper_levels(pos, left, right);
  13112. +}
  13113. +
  13114. +/* Check whether the parent of given @right node needs to be processes
  13115. + ((re)allocated) prior to processing of the child. If @left and @right do not
  13116. + share at least the parent of the @right is after the @left but before the
  13117. + @right in parent-first order, we have to (re)allocate it before the @right
  13118. + gets (re)allocated. */
  13119. +static int squalloc_upper_levels(flush_pos_t *pos, znode * left, znode * right)
  13120. +{
  13121. + int ret;
  13122. +
  13123. + lock_handle left_parent_lock;
  13124. + lock_handle right_parent_lock;
  13125. +
  13126. + load_count left_parent_load;
  13127. + load_count right_parent_load;
  13128. +
  13129. + init_lh(&left_parent_lock);
  13130. + init_lh(&right_parent_lock);
  13131. +
  13132. + init_load_count(&left_parent_load);
  13133. + init_load_count(&right_parent_load);
  13134. +
  13135. + ret = reiser4_get_parent(&left_parent_lock, left, ZNODE_WRITE_LOCK);
  13136. + if (ret)
  13137. + goto out;
  13138. +
  13139. + ret = reiser4_get_parent(&right_parent_lock, right, ZNODE_WRITE_LOCK);
  13140. + if (ret)
  13141. + goto out;
  13142. +
  13143. + /* Check for same parents */
  13144. + if (left_parent_lock.node == right_parent_lock.node)
  13145. + goto out;
  13146. +
  13147. + if (znode_check_flushprepped(right_parent_lock.node)) {
  13148. + /* Keep parent-first order. In the order, the right parent node
  13149. + stands before the @right node. If it is already allocated,
  13150. + we set the preceder (next block search start point) to its
  13151. + block number, @right node should be allocated after it.
  13152. +
  13153. + However, preceder is set only if the right parent is on twig
  13154. + level. The explanation is the following: new branch nodes are
  13155. + allocated over already allocated children while the tree
  13156. + grows, it is difficult to keep tree ordered, we assume that
  13157. + only leaves and twings are correctly allocated. So, only
  13158. + twigs are used as a preceder for allocating of the rest of
  13159. + the slum. */
  13160. + if (znode_get_level(right_parent_lock.node) == TWIG_LEVEL) {
  13161. + pos->preceder.blk =
  13162. + *znode_get_block(right_parent_lock.node);
  13163. + check_preceder(pos->preceder.blk);
  13164. + }
  13165. + goto out;
  13166. + }
  13167. +
  13168. + ret = incr_load_count_znode(&left_parent_load, left_parent_lock.node);
  13169. + if (ret)
  13170. + goto out;
  13171. +
  13172. + ret = incr_load_count_znode(&right_parent_load, right_parent_lock.node);
  13173. + if (ret)
  13174. + goto out;
  13175. +
  13176. + ret =
  13177. + squeeze_right_neighbor(pos, left_parent_lock.node,
  13178. + right_parent_lock.node);
  13179. + /* We stop if error. We stop if some items/units were shifted (ret == 0)
  13180. + * and thus @right changed its parent. It means we have not process
  13181. + * right_parent node prior to processing of @right. Positive return
  13182. + * values say that shifting items was not happen because of "empty
  13183. + * source" or "target full" conditions. */
  13184. + if (ret <= 0)
  13185. + goto out;
  13186. +
  13187. + /* parent(@left) and parent(@right) may have different parents also. We
  13188. + * do a recursive call for checking that. */
  13189. + ret =
  13190. + check_parents_and_squalloc_upper_levels(pos, left_parent_lock.node,
  13191. + right_parent_lock.node);
  13192. + if (ret)
  13193. + goto out;
  13194. +
  13195. + /* allocate znode when going down */
  13196. + ret = lock_parent_and_allocate_znode(right_parent_lock.node, pos);
  13197. +
  13198. +out:
  13199. + done_load_count(&left_parent_load);
  13200. + done_load_count(&right_parent_load);
  13201. +
  13202. + done_lh(&left_parent_lock);
  13203. + done_lh(&right_parent_lock);
  13204. +
  13205. + return ret;
  13206. +}
  13207. +
  13208. +/* Check the leftmost child "flushprepped" status, also returns true if child
  13209. + * node was not found in cache. */
  13210. +static int leftmost_child_of_unit_check_flushprepped(const coord_t *coord)
  13211. +{
  13212. + int ret;
  13213. + int prepped;
  13214. +
  13215. + jnode *child;
  13216. +
  13217. + ret = get_leftmost_child_of_unit(coord, &child);
  13218. +
  13219. + if (ret)
  13220. + return ret;
  13221. +
  13222. + if (child) {
  13223. + prepped = jnode_check_flushprepped(child);
  13224. + jput(child);
  13225. + } else {
  13226. + /* We consider not existing child as a node which slum
  13227. + processing should not continue to. Not cached node is clean,
  13228. + so it is flushprepped. */
  13229. + prepped = 1;
  13230. + }
  13231. +
  13232. + return prepped;
  13233. +}
  13234. +
  13235. +/* (re)allocate znode with automated getting parent node */
  13236. +static int lock_parent_and_allocate_znode(znode * node, flush_pos_t *pos)
  13237. +{
  13238. + int ret;
  13239. + lock_handle parent_lock;
  13240. + load_count parent_load;
  13241. + coord_t pcoord;
  13242. +
  13243. + assert("zam-851", znode_is_write_locked(node));
  13244. +
  13245. + init_lh(&parent_lock);
  13246. + init_load_count(&parent_load);
  13247. +
  13248. + ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK);
  13249. + if (ret)
  13250. + goto out;
  13251. +
  13252. + ret = incr_load_count_znode(&parent_load, parent_lock.node);
  13253. + if (ret)
  13254. + goto out;
  13255. +
  13256. + ret = find_child_ptr(parent_lock.node, node, &pcoord);
  13257. + if (ret)
  13258. + goto out;
  13259. +
  13260. + ret = allocate_znode(node, &pcoord, pos);
  13261. +
  13262. +out:
  13263. + done_load_count(&parent_load);
  13264. + done_lh(&parent_lock);
  13265. + return ret;
  13266. +}
  13267. +
  13268. +/*
  13269. + * Process nodes on the leaf level until unformatted node or
  13270. + * rightmost node in the slum reached.
  13271. + *
  13272. + * This function is a complicated beast, because it calls a
  13273. + * static machine ->convert_node() for every node, which, in
  13274. + * turn, scans node's items and does something for each of them.
  13275. + */
  13276. +static int handle_pos_on_formatted(flush_pos_t *pos)
  13277. +{
  13278. + int ret;
  13279. + lock_handle right_lock;
  13280. + load_count right_load;
  13281. +
  13282. + init_lh(&right_lock);
  13283. + init_load_count(&right_load);
  13284. +
  13285. + if (znode_convertible(pos->lock.node)) {
  13286. + ret = convert_node(pos, pos->lock.node);
  13287. + if (ret)
  13288. + return ret;
  13289. + }
  13290. + while (1) {
  13291. + assert("edward-1635",
  13292. + ergo(node_is_empty(pos->lock.node),
  13293. + ZF_ISSET(pos->lock.node, JNODE_HEARD_BANSHEE)));
  13294. + /*
  13295. + * First of all, grab a right neighbor
  13296. + */
  13297. + if (convert_data(pos) && convert_data(pos)->right_locked) {
  13298. + /*
  13299. + * the right neighbor was locked by convert_node()
  13300. + * transfer the lock from the "cache".
  13301. + */
  13302. + move_lh(&right_lock, &convert_data(pos)->right_lock);
  13303. + done_lh(&convert_data(pos)->right_lock);
  13304. + convert_data(pos)->right_locked = 0;
  13305. + }
  13306. + else {
  13307. + ret = neighbor_in_slum(pos->lock.node, &right_lock,
  13308. + RIGHT_SIDE, ZNODE_WRITE_LOCK,
  13309. + 1, 0);
  13310. + if (ret) {
  13311. + /*
  13312. + * There is no right neighbor for some reasons,
  13313. + * so finish with this level.
  13314. + */
  13315. + assert("edward-1636",
  13316. + !should_convert_right_neighbor(pos));
  13317. + break;
  13318. + }
  13319. + }
  13320. + /*
  13321. + * Check "flushprepped" status of the right neighbor.
  13322. + *
  13323. + * We don't prep(allocate) nodes for flushing twice. This can be
  13324. + * suboptimal, or it can be optimal. For now we choose to live
  13325. + * with the risk that it will be suboptimal because it would be
  13326. + * quite complex to code it to be smarter.
  13327. + */
  13328. + if (znode_check_flushprepped(right_lock.node)
  13329. + && !znode_convertible(right_lock.node)) {
  13330. + assert("edward-1005",
  13331. + !should_convert_right_neighbor(pos));
  13332. + pos_stop(pos);
  13333. + break;
  13334. + }
  13335. + ret = incr_load_count_znode(&right_load, right_lock.node);
  13336. + if (ret)
  13337. + break;
  13338. + if (znode_convertible(right_lock.node)) {
  13339. + assert("edward-1643",
  13340. + ergo(convert_data(pos),
  13341. + convert_data(pos)->right_locked == 0));
  13342. +
  13343. + ret = convert_node(pos, right_lock.node);
  13344. + if (ret)
  13345. + break;
  13346. + }
  13347. + else
  13348. + assert("edward-1637",
  13349. + !should_convert_right_neighbor(pos));
  13350. +
  13351. + if (node_is_empty(pos->lock.node)) {
  13352. + /*
  13353. + * Current node became empty after conversion
  13354. + * and, hence, was removed from the tree;
  13355. + * Advance the current position to the right neighbor.
  13356. + */
  13357. + assert("edward-1638",
  13358. + ZF_ISSET(pos->lock.node, JNODE_HEARD_BANSHEE));
  13359. + move_flush_pos(pos, &right_lock, &right_load, NULL);
  13360. + continue;
  13361. + }
  13362. + if (node_is_empty(right_lock.node)) {
  13363. + assert("edward-1639",
  13364. + ZF_ISSET(right_lock.node, JNODE_HEARD_BANSHEE));
  13365. + /*
  13366. + * The right neighbor became empty after
  13367. + * convertion, and hence it was deleted
  13368. + * from the tree - skip this.
  13369. + * Since current node is not empty,
  13370. + * we'll obtain a correct pointer to
  13371. + * the next right neighbor
  13372. + */
  13373. + done_load_count(&right_load);
  13374. + done_lh(&right_lock);
  13375. + continue;
  13376. + }
  13377. + /*
  13378. + * At this point both, current node and its right
  13379. + * neigbor are converted and not empty.
  13380. + * Squeeze them _before_ going upward.
  13381. + */
  13382. + ret = squeeze_right_neighbor(pos, pos->lock.node,
  13383. + right_lock.node);
  13384. + if (ret < 0)
  13385. + break;
  13386. + if (node_is_empty(right_lock.node)) {
  13387. + assert("edward-1640",
  13388. + ZF_ISSET(right_lock.node, JNODE_HEARD_BANSHEE));
  13389. + /*
  13390. + * right neighbor was squeezed completely,
  13391. + * and hence has been deleted from the tree.
  13392. + * Skip this.
  13393. + */
  13394. + done_load_count(&right_load);
  13395. + done_lh(&right_lock);
  13396. + continue;
  13397. + }
  13398. + if (znode_check_flushprepped(right_lock.node)) {
  13399. + if (should_convert_right_neighbor(pos)) {
  13400. + /*
  13401. + * in spite of flushprepped status of the node,
  13402. + * its right slum neighbor should be converted
  13403. + */
  13404. + assert("edward-953", convert_data(pos));
  13405. + assert("edward-954", item_convert_data(pos));
  13406. +
  13407. + move_flush_pos(pos, &right_lock, &right_load, NULL);
  13408. + continue;
  13409. + } else {
  13410. + pos_stop(pos);
  13411. + break;
  13412. + }
  13413. + }
  13414. + /*
  13415. + * parent(right_lock.node) has to be processed before
  13416. + * (right_lock.node) due to "parent-first" allocation
  13417. + * order
  13418. + */
  13419. + ret = check_parents_and_squalloc_upper_levels(pos,
  13420. + pos->lock.node,
  13421. + right_lock.node);
  13422. + if (ret)
  13423. + break;
  13424. + /*
  13425. + * (re)allocate _after_ going upward
  13426. + */
  13427. + ret = lock_parent_and_allocate_znode(right_lock.node, pos);
  13428. + if (ret)
  13429. + break;
  13430. + if (should_terminate_squalloc(pos)) {
  13431. + set_item_convert_count(pos, 0);
  13432. + break;
  13433. + }
  13434. + /*
  13435. + * advance the flush position to the right neighbor
  13436. + */
  13437. + move_flush_pos(pos, &right_lock, &right_load, NULL);
  13438. +
  13439. + ret = rapid_flush(pos);
  13440. + if (ret)
  13441. + break;
  13442. + }
  13443. + check_convert_info(pos);
  13444. + done_load_count(&right_load);
  13445. + done_lh(&right_lock);
  13446. + /*
  13447. + * This function indicates via pos whether to stop or go to twig or
  13448. + * continue on current level
  13449. + */
  13450. + return ret;
  13451. +
  13452. +}
  13453. +
  13454. +/* Process nodes on leaf level until unformatted node or rightmost node in the
  13455. + * slum reached. */
  13456. +static int handle_pos_on_leaf(flush_pos_t *pos)
  13457. +{
  13458. + int ret;
  13459. +
  13460. + assert("zam-845", pos->state == POS_ON_LEAF);
  13461. +
  13462. + ret = handle_pos_on_formatted(pos);
  13463. +
  13464. + if (ret == -E_NO_NEIGHBOR) {
  13465. + /* cannot get right neighbor, go process extents. */
  13466. + pos->state = POS_TO_TWIG;
  13467. + return 0;
  13468. + }
  13469. +
  13470. + return ret;
  13471. +}
  13472. +
  13473. +/* Process slum on level > 1 */
  13474. +static int handle_pos_on_internal(flush_pos_t *pos)
  13475. +{
  13476. + assert("zam-850", pos->state == POS_ON_INTERNAL);
  13477. + return handle_pos_on_formatted(pos);
  13478. +}
  13479. +
  13480. +/* check whether squalloc should stop before processing given extent */
  13481. +static int squalloc_extent_should_stop(flush_pos_t *pos)
  13482. +{
  13483. + assert("zam-869", item_is_extent(&pos->coord));
  13484. +
  13485. + /* pos->child is a jnode handle_pos_on_extent() should start with in
  13486. + * stead of the first child of the first extent unit. */
  13487. + if (pos->child) {
  13488. + int prepped;
  13489. +
  13490. + assert("vs-1383", jnode_is_unformatted(pos->child));
  13491. + prepped = jnode_check_flushprepped(pos->child);
  13492. + pos->pos_in_unit =
  13493. + jnode_get_index(pos->child) -
  13494. + extent_unit_index(&pos->coord);
  13495. + assert("vs-1470",
  13496. + pos->pos_in_unit < extent_unit_width(&pos->coord));
  13497. + assert("nikita-3434",
  13498. + ergo(extent_is_unallocated(&pos->coord),
  13499. + pos->pos_in_unit == 0));
  13500. + jput(pos->child);
  13501. + pos->child = NULL;
  13502. +
  13503. + return prepped;
  13504. + }
  13505. +
  13506. + pos->pos_in_unit = 0;
  13507. + if (extent_is_unallocated(&pos->coord))
  13508. + return 0;
  13509. +
  13510. + return leftmost_child_of_unit_check_flushprepped(&pos->coord);
  13511. +}
  13512. +
  13513. +/* Handle the case when regular reiser4 tree (znodes connected one to its
  13514. + * neighbors by sibling pointers) is interrupted on leaf level by one or more
  13515. + * unformatted nodes. By having a lock on twig level and use extent code
  13516. + * routines to process unformatted nodes we swim around an irregular part of
  13517. + * reiser4 tree. */
  13518. +static int handle_pos_on_twig(flush_pos_t *pos)
  13519. +{
  13520. + int ret;
  13521. + txmod_plugin *txmod_plug = get_txmod_plugin();
  13522. +
  13523. + assert("zam-844", pos->state == POS_ON_EPOINT);
  13524. + assert("zam-843", item_is_extent(&pos->coord));
  13525. +
  13526. + /* We decide should we continue slum processing with current extent
  13527. + unit: if leftmost child of current extent unit is flushprepped
  13528. + (i.e. clean or already processed by flush) we stop squalloc(). There
  13529. + is a fast check for unallocated extents which we assume contain all
  13530. + not flushprepped nodes. */
  13531. + /* FIXME: Here we implement simple check, we are only looking on the
  13532. + leftmost child. */
  13533. + ret = squalloc_extent_should_stop(pos);
  13534. + if (ret != 0) {
  13535. + pos_stop(pos);
  13536. + return ret;
  13537. + }
  13538. +
  13539. + while (pos_valid(pos) && coord_is_existing_unit(&pos->coord)
  13540. + && item_is_extent(&pos->coord)) {
  13541. + ret = txmod_plug->forward_alloc_unformatted(pos);
  13542. + if (ret)
  13543. + break;
  13544. + coord_next_unit(&pos->coord);
  13545. + }
  13546. +
  13547. + if (coord_is_after_rightmost(&pos->coord)) {
  13548. + pos->state = POS_END_OF_TWIG;
  13549. + return 0;
  13550. + }
  13551. + if (item_is_internal(&pos->coord)) {
  13552. + pos->state = POS_TO_LEAF;
  13553. + return 0;
  13554. + }
  13555. +
  13556. + assert("zam-860", item_is_extent(&pos->coord));
  13557. +
  13558. + /* "slum" is over */
  13559. + pos->state = POS_INVALID;
  13560. + return 0;
  13561. +}
  13562. +
  13563. +/* When we about to return flush position from twig to leaf level we can process
  13564. + * the right twig node or move position to the leaf. This processes right twig
  13565. + * if it is possible and jump to leaf level if not. */
  13566. +static int handle_pos_end_of_twig(flush_pos_t *pos)
  13567. +{
  13568. + int ret;
  13569. + lock_handle right_lock;
  13570. + load_count right_load;
  13571. + coord_t at_right;
  13572. + jnode *child = NULL;
  13573. +
  13574. + assert("zam-848", pos->state == POS_END_OF_TWIG);
  13575. + assert("zam-849", coord_is_after_rightmost(&pos->coord));
  13576. +
  13577. + init_lh(&right_lock);
  13578. + init_load_count(&right_load);
  13579. +
  13580. + /* We get a lock on the right twig node even it is not dirty because
  13581. + * slum continues or discontinues on leaf level not on next twig. This
  13582. + * lock on the right twig is needed for getting its leftmost child. */
  13583. + ret =
  13584. + reiser4_get_right_neighbor(&right_lock, pos->lock.node,
  13585. + ZNODE_WRITE_LOCK, GN_SAME_ATOM);
  13586. + if (ret)
  13587. + goto out;
  13588. +
  13589. + ret = incr_load_count_znode(&right_load, right_lock.node);
  13590. + if (ret)
  13591. + goto out;
  13592. +
  13593. + /* right twig could be not dirty */
  13594. + if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY)) {
  13595. + /* If right twig node is dirty we always attempt to squeeze it
  13596. + * content to the left... */
  13597. +became_dirty:
  13598. + ret =
  13599. + squeeze_right_twig_and_advance_coord(pos, right_lock.node);
  13600. + if (ret <= 0) {
  13601. + /* pos->coord is on internal item, go to leaf level, or
  13602. + * we have an error which will be caught in squalloc()
  13603. + */
  13604. + pos->state = POS_TO_LEAF;
  13605. + goto out;
  13606. + }
  13607. +
  13608. + /* If right twig was squeezed completely we wave to re-lock
  13609. + * right twig. now it is done through the top-level squalloc
  13610. + * routine. */
  13611. + if (node_is_empty(right_lock.node))
  13612. + goto out;
  13613. +
  13614. + /* ... and prep it if it is not yet prepped */
  13615. + if (!znode_check_flushprepped(right_lock.node)) {
  13616. + /* As usual, process parent before ... */
  13617. + ret =
  13618. + check_parents_and_squalloc_upper_levels(pos,
  13619. + pos->lock.
  13620. + node,
  13621. + right_lock.
  13622. + node);
  13623. + if (ret)
  13624. + goto out;
  13625. +
  13626. + /* ... processing the child */
  13627. + ret =
  13628. + lock_parent_and_allocate_znode(right_lock.node,
  13629. + pos);
  13630. + if (ret)
  13631. + goto out;
  13632. + }
  13633. + } else {
  13634. + coord_init_first_unit(&at_right, right_lock.node);
  13635. +
  13636. + /* check first child of next twig, should we continue there ? */
  13637. + ret = get_leftmost_child_of_unit(&at_right, &child);
  13638. + if (ret || child == NULL || jnode_check_flushprepped(child)) {
  13639. + pos_stop(pos);
  13640. + goto out;
  13641. + }
  13642. +
  13643. + /* check clean twig for possible relocation */
  13644. + if (!znode_check_flushprepped(right_lock.node)) {
  13645. + ret = reverse_allocate_parent(child, &at_right, pos);
  13646. + if (ret)
  13647. + goto out;
  13648. + if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY))
  13649. + goto became_dirty;
  13650. + }
  13651. + }
  13652. +
  13653. + assert("zam-875", znode_check_flushprepped(right_lock.node));
  13654. +
  13655. + /* Update the preceder by a block number of just processed right twig
  13656. + * node. The code above could miss the preceder updating because
  13657. + * allocate_znode() could not be called for this node. */
  13658. + pos->preceder.blk = *znode_get_block(right_lock.node);
  13659. + check_preceder(pos->preceder.blk);
  13660. +
  13661. + coord_init_first_unit(&at_right, right_lock.node);
  13662. + assert("zam-868", coord_is_existing_unit(&at_right));
  13663. +
  13664. + pos->state = item_is_extent(&at_right) ? POS_ON_EPOINT : POS_TO_LEAF;
  13665. + move_flush_pos(pos, &right_lock, &right_load, &at_right);
  13666. +
  13667. +out:
  13668. + done_load_count(&right_load);
  13669. + done_lh(&right_lock);
  13670. +
  13671. + if (child)
  13672. + jput(child);
  13673. +
  13674. + return ret;
  13675. +}
  13676. +
  13677. +/* Move the pos->lock to leaf node pointed by pos->coord, check should we
  13678. + * continue there. */
  13679. +static int handle_pos_to_leaf(flush_pos_t *pos)
  13680. +{
  13681. + int ret;
  13682. + lock_handle child_lock;
  13683. + load_count child_load;
  13684. + jnode *child;
  13685. +
  13686. + assert("zam-846", pos->state == POS_TO_LEAF);
  13687. + assert("zam-847", item_is_internal(&pos->coord));
  13688. +
  13689. + init_lh(&child_lock);
  13690. + init_load_count(&child_load);
  13691. +
  13692. + ret = get_leftmost_child_of_unit(&pos->coord, &child);
  13693. + if (ret)
  13694. + return ret;
  13695. + if (child == NULL) {
  13696. + pos_stop(pos);
  13697. + return 0;
  13698. + }
  13699. +
  13700. + if (jnode_check_flushprepped(child)) {
  13701. + pos->state = POS_INVALID;
  13702. + goto out;
  13703. + }
  13704. +
  13705. + ret =
  13706. + longterm_lock_znode(&child_lock, JZNODE(child), ZNODE_WRITE_LOCK,
  13707. + ZNODE_LOCK_LOPRI);
  13708. + if (ret)
  13709. + goto out;
  13710. +
  13711. + ret = incr_load_count_znode(&child_load, JZNODE(child));
  13712. + if (ret)
  13713. + goto out;
  13714. +
  13715. + ret = allocate_znode(JZNODE(child), &pos->coord, pos);
  13716. + if (ret)
  13717. + goto out;
  13718. +
  13719. + /* move flush position to leaf level */
  13720. + pos->state = POS_ON_LEAF;
  13721. + move_flush_pos(pos, &child_lock, &child_load, NULL);
  13722. +
  13723. + if (node_is_empty(JZNODE(child))) {
  13724. + ret = delete_empty_node(JZNODE(child));
  13725. + pos->state = POS_INVALID;
  13726. + }
  13727. +out:
  13728. + done_load_count(&child_load);
  13729. + done_lh(&child_lock);
  13730. + jput(child);
  13731. +
  13732. + return ret;
  13733. +}
  13734. +
  13735. +/* move pos from leaf to twig, and move lock from leaf to twig. */
  13736. +/* Move pos->lock to upper (twig) level */
  13737. +static int handle_pos_to_twig(flush_pos_t *pos)
  13738. +{
  13739. + int ret;
  13740. +
  13741. + lock_handle parent_lock;
  13742. + load_count parent_load;
  13743. + coord_t pcoord;
  13744. +
  13745. + assert("zam-852", pos->state == POS_TO_TWIG);
  13746. +
  13747. + init_lh(&parent_lock);
  13748. + init_load_count(&parent_load);
  13749. +
  13750. + ret =
  13751. + reiser4_get_parent(&parent_lock, pos->lock.node, ZNODE_WRITE_LOCK);
  13752. + if (ret)
  13753. + goto out;
  13754. +
  13755. + ret = incr_load_count_znode(&parent_load, parent_lock.node);
  13756. + if (ret)
  13757. + goto out;
  13758. +
  13759. + ret = find_child_ptr(parent_lock.node, pos->lock.node, &pcoord);
  13760. + if (ret)
  13761. + goto out;
  13762. +
  13763. + assert("zam-870", item_is_internal(&pcoord));
  13764. + coord_next_item(&pcoord);
  13765. +
  13766. + if (coord_is_after_rightmost(&pcoord))
  13767. + pos->state = POS_END_OF_TWIG;
  13768. + else if (item_is_extent(&pcoord))
  13769. + pos->state = POS_ON_EPOINT;
  13770. + else {
  13771. + /* Here we understand that getting -E_NO_NEIGHBOR in
  13772. + * handle_pos_on_leaf() was because of just a reaching edge of
  13773. + * slum */
  13774. + pos_stop(pos);
  13775. + goto out;
  13776. + }
  13777. +
  13778. + move_flush_pos(pos, &parent_lock, &parent_load, &pcoord);
  13779. +
  13780. +out:
  13781. + done_load_count(&parent_load);
  13782. + done_lh(&parent_lock);
  13783. +
  13784. + return ret;
  13785. +}
  13786. +
  13787. +typedef int (*pos_state_handle_t) (flush_pos_t *);
  13788. +static pos_state_handle_t flush_pos_handlers[] = {
  13789. + /* process formatted nodes on leaf level, keep lock on a leaf node */
  13790. + [POS_ON_LEAF] = handle_pos_on_leaf,
  13791. + /* process unformatted nodes, keep lock on twig node, pos->coord points
  13792. + * to extent currently being processed */
  13793. + [POS_ON_EPOINT] = handle_pos_on_twig,
  13794. + /* move a lock from leaf node to its parent for further processing of
  13795. + unformatted nodes */
  13796. + [POS_TO_TWIG] = handle_pos_to_twig,
  13797. + /* move a lock from twig to leaf level when a processing of unformatted
  13798. + * nodes finishes, pos->coord points to the leaf node we jump to */
  13799. + [POS_TO_LEAF] = handle_pos_to_leaf,
  13800. + /* after processing last extent in the twig node, attempting to shift
  13801. + * items from the twigs right neighbor and process them while shifting*/
  13802. + [POS_END_OF_TWIG] = handle_pos_end_of_twig,
  13803. + /* process formatted nodes on internal level, keep lock on an internal
  13804. + node */
  13805. + [POS_ON_INTERNAL] = handle_pos_on_internal
  13806. +};
  13807. +
  13808. +/* Advance flush position horizontally, prepare for flushing ((re)allocate,
  13809. + * squeeze, encrypt) nodes and their ancestors in "parent-first" order */
  13810. +static int squalloc(flush_pos_t *pos)
  13811. +{
  13812. + int ret = 0;
  13813. +
  13814. + /* maybe needs to be made a case statement with handle_pos_on_leaf as
  13815. + * first case, for greater CPU efficiency? Measure and see.... -Hans */
  13816. + while (pos_valid(pos)) {
  13817. + ret = flush_pos_handlers[pos->state] (pos);
  13818. + if (ret < 0)
  13819. + break;
  13820. +
  13821. + ret = rapid_flush(pos);
  13822. + if (ret)
  13823. + break;
  13824. + }
  13825. +
  13826. + /* any positive value or -E_NO_NEIGHBOR are legal return codes for
  13827. + handle_pos* routines, -E_NO_NEIGHBOR means that slum edge was
  13828. + reached */
  13829. + if (ret > 0 || ret == -E_NO_NEIGHBOR)
  13830. + ret = 0;
  13831. +
  13832. + return ret;
  13833. +}
  13834. +
  13835. +static void update_ldkey(znode * node)
  13836. +{
  13837. + reiser4_key ldkey;
  13838. +
  13839. + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
  13840. + if (node_is_empty(node))
  13841. + return;
  13842. +
  13843. + znode_set_ld_key(node, leftmost_key_in_node(node, &ldkey));
  13844. +}
  13845. +
  13846. +/* this is to be called after calling of shift node's method to shift data from
  13847. + @right to @left. It sets left delimiting keys of @left and @right to keys of
  13848. + first items of @left and @right correspondingly and sets right delimiting key
  13849. + of @left to first key of @right */
  13850. +static void update_znode_dkeys(znode * left, znode * right)
  13851. +{
  13852. + assert_rw_write_locked(&(znode_get_tree(right)->dk_lock));
  13853. + assert("vs-1629", (znode_is_write_locked(left) &&
  13854. + znode_is_write_locked(right)));
  13855. +
  13856. + /* we need to update left delimiting of left if it was empty before
  13857. + shift */
  13858. + update_ldkey(left);
  13859. + update_ldkey(right);
  13860. + if (node_is_empty(right))
  13861. + znode_set_rd_key(left, znode_get_rd_key(right));
  13862. + else
  13863. + znode_set_rd_key(left, znode_get_ld_key(right));
  13864. +}
  13865. +
  13866. +/* try to shift everything from @right to @left. If everything was shifted -
  13867. + @right is removed from the tree. Result is the number of bytes shifted. */
  13868. +static int
  13869. +shift_everything_left(znode * right, znode * left, carry_level * todo)
  13870. +{
  13871. + coord_t from;
  13872. + node_plugin *nplug;
  13873. + carry_plugin_info info;
  13874. +
  13875. + coord_init_after_last_item(&from, right);
  13876. +
  13877. + nplug = node_plugin_by_node(right);
  13878. + info.doing = NULL;
  13879. + info.todo = todo;
  13880. + return nplug->shift(&from, left, SHIFT_LEFT,
  13881. + 1 /* delete @right if it becomes empty */ ,
  13882. + 1
  13883. + /* move coord @from to node @left if everything will
  13884. + be shifted */
  13885. + ,
  13886. + &info);
  13887. +}
  13888. +
  13889. +/* Shift as much as possible from @right to @left using the memcpy-optimized
  13890. + shift_everything_left. @left and @right are formatted neighboring nodes on
  13891. + leaf level. */
  13892. +static int squeeze_right_non_twig(znode * left, znode * right)
  13893. +{
  13894. + int ret;
  13895. + carry_pool *pool;
  13896. + carry_level *todo;
  13897. +
  13898. + assert("nikita-2246", znode_get_level(left) == znode_get_level(right));
  13899. +
  13900. + if (!JF_ISSET(ZJNODE(left), JNODE_DIRTY) ||
  13901. + !JF_ISSET(ZJNODE(right), JNODE_DIRTY))
  13902. + return SQUEEZE_TARGET_FULL;
  13903. +
  13904. + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo));
  13905. + if (IS_ERR(pool))
  13906. + return PTR_ERR(pool);
  13907. + todo = (carry_level *) (pool + 1);
  13908. + init_carry_level(todo, pool);
  13909. +
  13910. + ret = shift_everything_left(right, left, todo);
  13911. + if (ret > 0) {
  13912. + /* something was shifted */
  13913. + reiser4_tree *tree;
  13914. + __u64 grabbed;
  13915. +
  13916. + znode_make_dirty(left);
  13917. + znode_make_dirty(right);
  13918. +
  13919. + /* update delimiting keys of nodes which participated in
  13920. + shift. FIXME: it would be better to have this in shift
  13921. + node's operation. But it can not be done there. Nobody
  13922. + remembers why, though
  13923. + */
  13924. + tree = znode_get_tree(left);
  13925. + write_lock_dk(tree);
  13926. + update_znode_dkeys(left, right);
  13927. + write_unlock_dk(tree);
  13928. +
  13929. + /* Carry is called to update delimiting key and, maybe, to
  13930. + remove empty node. */
  13931. + grabbed = get_current_context()->grabbed_blocks;
  13932. + ret = reiser4_grab_space_force(tree->height, BA_RESERVED);
  13933. + assert("nikita-3003", ret == 0); /* reserved space is
  13934. + exhausted. Ask Hans. */
  13935. + ret = reiser4_carry(todo, NULL/* previous level */);
  13936. + grabbed2free_mark(grabbed);
  13937. + } else {
  13938. + /* Shifting impossible, we return appropriate result code */
  13939. + ret =
  13940. + node_is_empty(right) ? SQUEEZE_SOURCE_EMPTY :
  13941. + SQUEEZE_TARGET_FULL;
  13942. + }
  13943. +
  13944. + done_carry_pool(pool);
  13945. +
  13946. + return ret;
  13947. +}
  13948. +
  13949. +#if REISER4_DEBUG
  13950. +static int sibling_link_is_ok(const znode *left, const znode *right)
  13951. +{
  13952. + int result;
  13953. +
  13954. + read_lock_tree(znode_get_tree(left));
  13955. + result = (left->right == right && left == right->left);
  13956. + read_unlock_tree(znode_get_tree(left));
  13957. + return result;
  13958. +}
  13959. +#endif
  13960. +
  13961. +/* Shift first unit of first item if it is an internal one. Return
  13962. + SQUEEZE_TARGET_FULL if it fails to shift an item, otherwise return
  13963. + SUBTREE_MOVED. */
  13964. +static int shift_one_internal_unit(znode * left, znode * right)
  13965. +{
  13966. + int ret;
  13967. + carry_pool *pool;
  13968. + carry_level *todo;
  13969. + coord_t *coord;
  13970. + carry_plugin_info *info;
  13971. + int size, moved;
  13972. +
  13973. + assert("nikita-2247", znode_get_level(left) == znode_get_level(right));
  13974. + assert("nikita-2435", znode_is_write_locked(left));
  13975. + assert("nikita-2436", znode_is_write_locked(right));
  13976. + assert("nikita-2434", sibling_link_is_ok(left, right));
  13977. +
  13978. + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) +
  13979. + sizeof(*coord) + sizeof(*info)
  13980. +#if REISER4_DEBUG
  13981. + + sizeof(*coord) + 2 * sizeof(reiser4_key)
  13982. +#endif
  13983. + );
  13984. + if (IS_ERR(pool))
  13985. + return PTR_ERR(pool);
  13986. + todo = (carry_level *) (pool + 1);
  13987. + init_carry_level(todo, pool);
  13988. +
  13989. + coord = (coord_t *) (todo + 3);
  13990. + coord_init_first_unit(coord, right);
  13991. + info = (carry_plugin_info *) (coord + 1);
  13992. +
  13993. +#if REISER4_DEBUG
  13994. + if (!node_is_empty(left)) {
  13995. + coord_t *last;
  13996. + reiser4_key *right_key;
  13997. + reiser4_key *left_key;
  13998. +
  13999. + last = (coord_t *) (info + 1);
  14000. + right_key = (reiser4_key *) (last + 1);
  14001. + left_key = right_key + 1;
  14002. + coord_init_last_unit(last, left);
  14003. +
  14004. + assert("nikita-2463",
  14005. + keyle(item_key_by_coord(last, left_key),
  14006. + item_key_by_coord(coord, right_key)));
  14007. + }
  14008. +#endif
  14009. +
  14010. + assert("jmacd-2007", item_is_internal(coord));
  14011. +
  14012. + size = item_length_by_coord(coord);
  14013. + info->todo = todo;
  14014. + info->doing = NULL;
  14015. +
  14016. + ret = node_plugin_by_node(left)->shift(coord, left, SHIFT_LEFT,
  14017. + 1
  14018. + /* delete @right if it becomes
  14019. + empty */
  14020. + ,
  14021. + 0
  14022. + /* do not move coord @coord to
  14023. + node @left */
  14024. + ,
  14025. + info);
  14026. +
  14027. + /* If shift returns positive, then we shifted the item. */
  14028. + assert("vs-423", ret <= 0 || size == ret);
  14029. + moved = (ret > 0);
  14030. +
  14031. + if (moved) {
  14032. + /* something was moved */
  14033. + reiser4_tree *tree;
  14034. + int grabbed;
  14035. +
  14036. + znode_make_dirty(left);
  14037. + znode_make_dirty(right);
  14038. + tree = znode_get_tree(left);
  14039. + write_lock_dk(tree);
  14040. + update_znode_dkeys(left, right);
  14041. + write_unlock_dk(tree);
  14042. +
  14043. + /* reserve space for delimiting keys after shifting */
  14044. + grabbed = get_current_context()->grabbed_blocks;
  14045. + ret = reiser4_grab_space_force(tree->height, BA_RESERVED);
  14046. + assert("nikita-3003", ret == 0); /* reserved space is
  14047. + exhausted. Ask Hans. */
  14048. +
  14049. + ret = reiser4_carry(todo, NULL/* previous level */);
  14050. + grabbed2free_mark(grabbed);
  14051. + }
  14052. +
  14053. + done_carry_pool(pool);
  14054. +
  14055. + if (ret != 0) {
  14056. + /* Shift or carry operation failed. */
  14057. + assert("jmacd-7325", ret < 0);
  14058. + return ret;
  14059. + }
  14060. +
  14061. + return moved ? SUBTREE_MOVED : SQUEEZE_TARGET_FULL;
  14062. +}
  14063. +
  14064. +static int allocate_znode(znode * node,
  14065. + const coord_t *parent_coord, flush_pos_t *pos)
  14066. +{
  14067. + txmod_plugin *plug = get_txmod_plugin();
  14068. + /*
  14069. + * perform znode allocation with znode pinned in memory to avoid races
  14070. + * with asynchronous emergency flush (which plays with
  14071. + * JNODE_FLUSH_RESERVED bit).
  14072. + */
  14073. + return WITH_DATA(node, plug->forward_alloc_formatted(node,
  14074. + parent_coord,
  14075. + pos));
  14076. +}
  14077. +
  14078. +
  14079. +/* JNODE INTERFACE */
  14080. +
  14081. +/* Lock a node (if formatted) and then get its parent locked, set the child's
  14082. + coordinate in the parent. If the child is the root node, the above_root
  14083. + znode is returned but the coord is not set. This function may cause atom
  14084. + fusion, but it is only used for read locks (at this point) and therefore
  14085. + fusion only occurs when the parent is already dirty. */
  14086. +/* Hans adds this note: remember to ask how expensive this operation is vs.
  14087. + storing parent pointer in jnodes. */
  14088. +static int
  14089. +jnode_lock_parent_coord(jnode * node,
  14090. + coord_t *coord,
  14091. + lock_handle * parent_lh,
  14092. + load_count * parent_zh,
  14093. + znode_lock_mode parent_mode, int try)
  14094. +{
  14095. + int ret;
  14096. +
  14097. + assert("edward-53", jnode_is_unformatted(node) || jnode_is_znode(node));
  14098. + assert("edward-54", jnode_is_unformatted(node)
  14099. + || znode_is_any_locked(JZNODE(node)));
  14100. +
  14101. + if (!jnode_is_znode(node)) {
  14102. + reiser4_key key;
  14103. + tree_level stop_level = TWIG_LEVEL;
  14104. + lookup_bias bias = FIND_EXACT;
  14105. +
  14106. + assert("edward-168", !(jnode_get_type(node) == JNODE_BITMAP));
  14107. +
  14108. + /* The case when node is not znode, but can have parent coord
  14109. + (unformatted node, node which represents cluster page,
  14110. + etc..). Generate a key for the appropriate entry, search
  14111. + in the tree using coord_by_key, which handles locking for
  14112. + us. */
  14113. +
  14114. + /*
  14115. + * nothing is locked at this moment, so, nothing prevents
  14116. + * concurrent truncate from removing jnode from inode. To
  14117. + * prevent this spin-lock jnode. jnode can be truncated just
  14118. + * after call to the jnode_build_key(), but this is ok,
  14119. + * because coord_by_key() will just fail to find appropriate
  14120. + * extent.
  14121. + */
  14122. + spin_lock_jnode(node);
  14123. + if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
  14124. + jnode_build_key(node, &key);
  14125. + ret = 0;
  14126. + } else
  14127. + ret = RETERR(-ENOENT);
  14128. + spin_unlock_jnode(node);
  14129. +
  14130. + if (ret != 0)
  14131. + return ret;
  14132. +
  14133. + if (jnode_is_cluster_page(node))
  14134. + stop_level = LEAF_LEVEL;
  14135. +
  14136. + assert("jmacd-1812", coord != NULL);
  14137. +
  14138. + ret = coord_by_key(jnode_get_tree(node), &key, coord, parent_lh,
  14139. + parent_mode, bias, stop_level, stop_level,
  14140. + CBK_UNIQUE, NULL/*ra_info */);
  14141. + switch (ret) {
  14142. + case CBK_COORD_NOTFOUND:
  14143. + assert("edward-1038",
  14144. + ergo(jnode_is_cluster_page(node),
  14145. + JF_ISSET(node, JNODE_HEARD_BANSHEE)));
  14146. + if (!JF_ISSET(node, JNODE_HEARD_BANSHEE))
  14147. + warning("nikita-3177", "Parent not found");
  14148. + return ret;
  14149. + case CBK_COORD_FOUND:
  14150. + if (coord->between != AT_UNIT) {
  14151. + /* FIXME: comment needed */
  14152. + done_lh(parent_lh);
  14153. + if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
  14154. + warning("nikita-3178",
  14155. + "Found but not happy: %i",
  14156. + coord->between);
  14157. + }
  14158. + return RETERR(-ENOENT);
  14159. + }
  14160. + ret = incr_load_count_znode(parent_zh, parent_lh->node);
  14161. + if (ret != 0)
  14162. + return ret;
  14163. + /* if (jnode_is_cluster_page(node)) {
  14164. + races with write() are possible
  14165. + check_child_cluster (parent_lh->node);
  14166. + }
  14167. + */
  14168. + break;
  14169. + default:
  14170. + return ret;
  14171. + }
  14172. +
  14173. + } else {
  14174. + int flags;
  14175. + znode *z;
  14176. +
  14177. + z = JZNODE(node);
  14178. + /* Formatted node case: */
  14179. + assert("jmacd-2061", !znode_is_root(z));
  14180. +
  14181. + flags = GN_ALLOW_NOT_CONNECTED;
  14182. + if (try)
  14183. + flags |= GN_TRY_LOCK;
  14184. +
  14185. + ret =
  14186. + reiser4_get_parent_flags(parent_lh, z, parent_mode, flags);
  14187. + if (ret != 0)
  14188. + /* -E_REPEAT is ok here, it is handled by the caller. */
  14189. + return ret;
  14190. +
  14191. + /* Make the child's position "hint" up-to-date. (Unless above
  14192. + root, which caller must check.) */
  14193. + if (coord != NULL) {
  14194. +
  14195. + ret = incr_load_count_znode(parent_zh, parent_lh->node);
  14196. + if (ret != 0) {
  14197. + warning("jmacd-976812386",
  14198. + "incr_load_count_znode failed: %d",
  14199. + ret);
  14200. + return ret;
  14201. + }
  14202. +
  14203. + ret = find_child_ptr(parent_lh->node, z, coord);
  14204. + if (ret != 0) {
  14205. + warning("jmacd-976812",
  14206. + "find_child_ptr failed: %d", ret);
  14207. + return ret;
  14208. + }
  14209. + }
  14210. + }
  14211. +
  14212. + return 0;
  14213. +}
  14214. +
  14215. +/* Get the (locked) next neighbor of a znode which is dirty and a member of the
  14216. + same atom. If there is no next neighbor or the neighbor is not in memory or
  14217. + if there is a neighbor but it is not dirty or not in the same atom,
  14218. + -E_NO_NEIGHBOR is returned. In some cases the slum may include nodes which
  14219. + are not dirty, if so @check_dirty should be 0 */
  14220. +static int neighbor_in_slum(znode * node, /* starting point */
  14221. + lock_handle * lock, /* lock on starting point */
  14222. + sideof side, /* left or right direction we
  14223. + seek the next node in */
  14224. + znode_lock_mode mode, /* kind of lock we want */
  14225. + int check_dirty, /* true if the neighbor should
  14226. + be dirty */
  14227. + int use_upper_levels /* get neighbor by going though
  14228. + upper levels */)
  14229. +{
  14230. + int ret;
  14231. + int flags;
  14232. +
  14233. + assert("jmacd-6334", znode_is_connected(node));
  14234. +
  14235. + flags = GN_SAME_ATOM | (side == LEFT_SIDE ? GN_GO_LEFT : 0);
  14236. + if (use_upper_levels)
  14237. + flags |= GN_CAN_USE_UPPER_LEVELS;
  14238. +
  14239. + ret = reiser4_get_neighbor(lock, node, mode, flags);
  14240. + if (ret) {
  14241. + /* May return -ENOENT or -E_NO_NEIGHBOR. */
  14242. + /* FIXME(C): check EINVAL, E_DEADLOCK */
  14243. + if (ret == -ENOENT)
  14244. + ret = RETERR(-E_NO_NEIGHBOR);
  14245. + return ret;
  14246. + }
  14247. + if (!check_dirty)
  14248. + return 0;
  14249. + /* Check dirty bit of locked znode, no races here */
  14250. + if (JF_ISSET(ZJNODE(lock->node), JNODE_DIRTY))
  14251. + return 0;
  14252. +
  14253. + done_lh(lock);
  14254. + return RETERR(-E_NO_NEIGHBOR);
  14255. +}
  14256. +
  14257. +/* Return true if two znodes have the same parent. This is called with both
  14258. + nodes write-locked (for squeezing) so no tree lock is needed. */
  14259. +static int znode_same_parents(znode * a, znode * b)
  14260. +{
  14261. + int result;
  14262. +
  14263. + assert("jmacd-7011", znode_is_write_locked(a));
  14264. + assert("jmacd-7012", znode_is_write_locked(b));
  14265. +
  14266. + /* We lock the whole tree for this check.... I really don't like whole
  14267. + * tree locks... -Hans */
  14268. + read_lock_tree(znode_get_tree(a));
  14269. + result = (znode_parent(a) == znode_parent(b));
  14270. + read_unlock_tree(znode_get_tree(a));
  14271. + return result;
  14272. +}
  14273. +
  14274. +/* FLUSH SCAN */
  14275. +
  14276. +/* Initialize the flush_scan data structure. */
  14277. +static void scan_init(flush_scan * scan)
  14278. +{
  14279. + memset(scan, 0, sizeof(*scan));
  14280. + init_lh(&scan->node_lock);
  14281. + init_lh(&scan->parent_lock);
  14282. + init_load_count(&scan->parent_load);
  14283. + init_load_count(&scan->node_load);
  14284. + coord_init_invalid(&scan->parent_coord, NULL);
  14285. +}
  14286. +
  14287. +/* Release any resources held by the flush scan, e.g. release locks,
  14288. + free memory, etc. */
  14289. +static void scan_done(flush_scan * scan)
  14290. +{
  14291. + done_load_count(&scan->node_load);
  14292. + if (scan->node != NULL) {
  14293. + jput(scan->node);
  14294. + scan->node = NULL;
  14295. + }
  14296. + done_load_count(&scan->parent_load);
  14297. + done_lh(&scan->parent_lock);
  14298. + done_lh(&scan->node_lock);
  14299. +}
  14300. +
  14301. +/* Returns true if flush scanning is finished. */
  14302. +int reiser4_scan_finished(flush_scan * scan)
  14303. +{
  14304. + return scan->stop || (scan->direction == RIGHT_SIDE &&
  14305. + scan->count >= scan->max_count);
  14306. +}
  14307. +
  14308. +/* Return true if the scan should continue to the @tonode. True if the node
  14309. + meets the same_slum_check condition. If not, deref the "left" node and stop
  14310. + the scan. */
  14311. +int reiser4_scan_goto(flush_scan * scan, jnode * tonode)
  14312. +{
  14313. + int go = same_slum_check(scan->node, tonode, 1, 0);
  14314. +
  14315. + if (!go) {
  14316. + scan->stop = 1;
  14317. + jput(tonode);
  14318. + }
  14319. +
  14320. + return go;
  14321. +}
  14322. +
  14323. +/* Set the current scan->node, refcount it, increment count by the @add_count
  14324. + (number to count, e.g., skipped unallocated nodes), deref previous current,
  14325. + and copy the current parent coordinate. */
  14326. +int
  14327. +scan_set_current(flush_scan * scan, jnode * node, unsigned add_count,
  14328. + const coord_t *parent)
  14329. +{
  14330. + /* Release the old references, take the new reference. */
  14331. + done_load_count(&scan->node_load);
  14332. +
  14333. + if (scan->node != NULL)
  14334. + jput(scan->node);
  14335. + scan->node = node;
  14336. + scan->count += add_count;
  14337. +
  14338. + /* This next stmt is somewhat inefficient. The reiser4_scan_extent()
  14339. + code could delay this update step until it finishes and update the
  14340. + parent_coord only once. It did that before, but there was a bug and
  14341. + this was the easiest way to make it correct. */
  14342. + if (parent != NULL)
  14343. + coord_dup(&scan->parent_coord, parent);
  14344. +
  14345. + /* Failure may happen at the incr_load_count call, but the caller can
  14346. + assume the reference is safely taken. */
  14347. + return incr_load_count_jnode(&scan->node_load, node);
  14348. +}
  14349. +
  14350. +/* Return true if scanning in the leftward direction. */
  14351. +int reiser4_scanning_left(flush_scan * scan)
  14352. +{
  14353. + return scan->direction == LEFT_SIDE;
  14354. +}
  14355. +
  14356. +/* Performs leftward scanning starting from either kind of node. Counts the
  14357. + starting node. The right-scan object is passed in for the left-scan in order
  14358. + to copy the parent of an unformatted starting position. This way we avoid
  14359. + searching for the unformatted node's parent when scanning in each direction.
  14360. + If we search for the parent once it is set in both scan objects. The limit
  14361. + parameter tells flush-scan when to stop.
  14362. +
  14363. + Rapid scanning is used only during scan_left, where we are interested in
  14364. + finding the 'leftpoint' where we begin flushing. We are interested in
  14365. + stopping at the left child of a twig that does not have a dirty left
  14366. + neighbour. THIS IS A SPECIAL CASE. The problem is finding a way to flush only
  14367. + those nodes without unallocated children, and it is difficult to solve in the
  14368. + bottom-up flushing algorithm we are currently using. The problem can be
  14369. + solved by scanning left at every level as we go upward, but this would
  14370. + basically bring us back to using a top-down allocation strategy, which we
  14371. + already tried (see BK history from May 2002), and has a different set of
  14372. + problems. The top-down strategy makes avoiding unallocated children easier,
  14373. + but makes it difficult to propertly flush dirty children with clean parents
  14374. + that would otherwise stop the top-down flush, only later to dirty the parent
  14375. + once the children are flushed. So we solve the problem in the bottom-up
  14376. + algorithm with a special case for twigs and leaves only.
  14377. +
  14378. + The first step in solving the problem is this rapid leftward scan. After we
  14379. + determine that there are at least enough nodes counted to qualify for
  14380. + FLUSH_RELOCATE_THRESHOLD we are no longer interested in the exact count, we
  14381. + are only interested in finding the best place to start the flush.
  14382. +
  14383. + We could choose one of two possibilities:
  14384. +
  14385. + 1. Stop at the leftmost child (of a twig) that does not have a dirty left
  14386. + neighbor. This requires checking one leaf per rapid-scan twig
  14387. +
  14388. + 2. Stop at the leftmost child (of a twig) where there are no dirty children
  14389. + of the twig to the left. This requires checking possibly all of the in-memory
  14390. + children of each twig during the rapid scan.
  14391. +
  14392. + For now we implement the first policy.
  14393. +*/
  14394. +static int
  14395. +scan_left(flush_scan * scan, flush_scan * right, jnode * node, unsigned limit)
  14396. +{
  14397. + int ret = 0;
  14398. +
  14399. + scan->max_count = limit;
  14400. + scan->direction = LEFT_SIDE;
  14401. +
  14402. + ret = scan_set_current(scan, jref(node), 1, NULL);
  14403. + if (ret != 0)
  14404. + return ret;
  14405. +
  14406. + ret = scan_common(scan, right);
  14407. + if (ret != 0)
  14408. + return ret;
  14409. +
  14410. + /* Before rapid scanning, we need a lock on scan->node so that we can
  14411. + get its parent, only if formatted. */
  14412. + if (jnode_is_znode(scan->node)) {
  14413. + ret = longterm_lock_znode(&scan->node_lock, JZNODE(scan->node),
  14414. + ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI);
  14415. + }
  14416. +
  14417. + /* Rapid_scan would go here (with limit set to FLUSH_RELOCATE_THRESHOLD)
  14418. + */
  14419. + return ret;
  14420. +}
  14421. +
  14422. +/* Performs rightward scanning... Does not count the starting node. The limit
  14423. + parameter is described in scan_left. If the starting node is unformatted then
  14424. + the parent_coord was already set during scan_left. The rapid_after parameter
  14425. + is not used during right-scanning.
  14426. +
  14427. + scan_right is only called if the scan_left operation does not count at least
  14428. + FLUSH_RELOCATE_THRESHOLD nodes for flushing. Otherwise, the limit parameter
  14429. + is set to the difference between scan-left's count and
  14430. + FLUSH_RELOCATE_THRESHOLD, meaning scan-right counts as high as
  14431. + FLUSH_RELOCATE_THRESHOLD and then stops. */
  14432. +static int scan_right(flush_scan * scan, jnode * node, unsigned limit)
  14433. +{
  14434. + int ret;
  14435. +
  14436. + scan->max_count = limit;
  14437. + scan->direction = RIGHT_SIDE;
  14438. +
  14439. + ret = scan_set_current(scan, jref(node), 0, NULL);
  14440. + if (ret != 0)
  14441. + return ret;
  14442. +
  14443. + return scan_common(scan, NULL);
  14444. +}
  14445. +
  14446. +/* Common code to perform left or right scanning. */
  14447. +static int scan_common(flush_scan * scan, flush_scan * other)
  14448. +{
  14449. + int ret;
  14450. +
  14451. + assert("nikita-2376", scan->node != NULL);
  14452. + assert("edward-54", jnode_is_unformatted(scan->node)
  14453. + || jnode_is_znode(scan->node));
  14454. +
  14455. + /* Special case for starting at an unformatted node. Optimization: we
  14456. + only want to search for the parent (which requires a tree traversal)
  14457. + once. Obviously, we shouldn't have to call it once for the left scan
  14458. + and once for the right scan. For this reason, if we search for the
  14459. + parent during scan-left we then duplicate the coord/lock/load into
  14460. + the scan-right object. */
  14461. + if (jnode_is_unformatted(scan->node)) {
  14462. + ret = scan_unformatted(scan, other);
  14463. + if (ret != 0)
  14464. + return ret;
  14465. + }
  14466. + /* This loop expects to start at a formatted position and performs
  14467. + chaining of formatted regions */
  14468. + while (!reiser4_scan_finished(scan)) {
  14469. +
  14470. + ret = scan_formatted(scan);
  14471. + if (ret != 0)
  14472. + return ret;
  14473. + }
  14474. +
  14475. + return 0;
  14476. +}
  14477. +
  14478. +static int scan_unformatted(flush_scan * scan, flush_scan * other)
  14479. +{
  14480. + int ret = 0;
  14481. + int try = 0;
  14482. +
  14483. + if (!coord_is_invalid(&scan->parent_coord))
  14484. + goto scan;
  14485. +
  14486. + /* set parent coord from */
  14487. + if (!jnode_is_unformatted(scan->node)) {
  14488. + /* formatted position */
  14489. +
  14490. + lock_handle lock;
  14491. + assert("edward-301", jnode_is_znode(scan->node));
  14492. + init_lh(&lock);
  14493. +
  14494. + /*
  14495. + * when flush starts from unformatted node, first thing it
  14496. + * does is tree traversal to find formatted parent of starting
  14497. + * node. This parent is then kept lock across scans to the
  14498. + * left and to the right. This means that during scan to the
  14499. + * left we cannot take left-ward lock, because this is
  14500. + * dead-lock prone. So, if we are scanning to the left and
  14501. + * there is already lock held by this thread,
  14502. + * jnode_lock_parent_coord() should use try-lock.
  14503. + */
  14504. + try = reiser4_scanning_left(scan)
  14505. + && !lock_stack_isclean(get_current_lock_stack());
  14506. + /* Need the node locked to get the parent lock, We have to
  14507. + take write lock since there is at least one call path
  14508. + where this znode is already write-locked by us. */
  14509. + ret =
  14510. + longterm_lock_znode(&lock, JZNODE(scan->node),
  14511. + ZNODE_WRITE_LOCK,
  14512. + reiser4_scanning_left(scan) ?
  14513. + ZNODE_LOCK_LOPRI :
  14514. + ZNODE_LOCK_HIPRI);
  14515. + if (ret != 0)
  14516. + /* EINVAL or E_DEADLOCK here mean... try again! At this
  14517. + point we've scanned too far and can't back out, just
  14518. + start over. */
  14519. + return ret;
  14520. +
  14521. + ret = jnode_lock_parent_coord(scan->node,
  14522. + &scan->parent_coord,
  14523. + &scan->parent_lock,
  14524. + &scan->parent_load,
  14525. + ZNODE_WRITE_LOCK, try);
  14526. +
  14527. + /* FIXME(C): check EINVAL, E_DEADLOCK */
  14528. + done_lh(&lock);
  14529. + if (ret == -E_REPEAT) {
  14530. + scan->stop = 1;
  14531. + return 0;
  14532. + }
  14533. + if (ret)
  14534. + return ret;
  14535. +
  14536. + } else {
  14537. + /* unformatted position */
  14538. +
  14539. + ret =
  14540. + jnode_lock_parent_coord(scan->node, &scan->parent_coord,
  14541. + &scan->parent_lock,
  14542. + &scan->parent_load,
  14543. + ZNODE_WRITE_LOCK, try);
  14544. +
  14545. + if (IS_CBKERR(ret))
  14546. + return ret;
  14547. +
  14548. + if (ret == CBK_COORD_NOTFOUND)
  14549. + /* FIXME(C): check EINVAL, E_DEADLOCK */
  14550. + return ret;
  14551. +
  14552. + /* parent was found */
  14553. + assert("jmacd-8661", other != NULL);
  14554. + /* Duplicate the reference into the other flush_scan. */
  14555. + coord_dup(&other->parent_coord, &scan->parent_coord);
  14556. + copy_lh(&other->parent_lock, &scan->parent_lock);
  14557. + copy_load_count(&other->parent_load, &scan->parent_load);
  14558. + }
  14559. +scan:
  14560. + return scan_by_coord(scan);
  14561. +}
  14562. +
  14563. +/* Performs left- or rightward scanning starting from a formatted node. Follow
  14564. + left pointers under tree lock as long as:
  14565. +
  14566. + - node->left/right is non-NULL
  14567. + - node->left/right is connected, dirty
  14568. + - node->left/right belongs to the same atom
  14569. + - scan has not reached maximum count
  14570. +*/
  14571. +static int scan_formatted(flush_scan * scan)
  14572. +{
  14573. + int ret;
  14574. + znode *neighbor = NULL;
  14575. +
  14576. + assert("jmacd-1401", !reiser4_scan_finished(scan));
  14577. +
  14578. + do {
  14579. + znode *node = JZNODE(scan->node);
  14580. +
  14581. + /* Node should be connected, but if not stop the scan. */
  14582. + if (!znode_is_connected(node)) {
  14583. + scan->stop = 1;
  14584. + break;
  14585. + }
  14586. +
  14587. + /* Lock the tree, check-for and reference the next sibling. */
  14588. + read_lock_tree(znode_get_tree(node));
  14589. +
  14590. + /* It may be that a node is inserted or removed between a node
  14591. + and its left sibling while the tree lock is released, but the
  14592. + flush-scan count does not need to be precise. Thus, we
  14593. + release the tree lock as soon as we get the neighboring node.
  14594. + */
  14595. + neighbor =
  14596. + reiser4_scanning_left(scan) ? node->left : node->right;
  14597. + if (neighbor != NULL)
  14598. + zref(neighbor);
  14599. +
  14600. + read_unlock_tree(znode_get_tree(node));
  14601. +
  14602. + /* If neighbor is NULL at the leaf level, need to check for an
  14603. + unformatted sibling using the parent--break in any case. */
  14604. + if (neighbor == NULL)
  14605. + break;
  14606. +
  14607. + /* Check the condition for going left, break if it is not met.
  14608. + This also releases (jputs) the neighbor if false. */
  14609. + if (!reiser4_scan_goto(scan, ZJNODE(neighbor)))
  14610. + break;
  14611. +
  14612. + /* Advance the flush_scan state to the left, repeat. */
  14613. + ret = scan_set_current(scan, ZJNODE(neighbor), 1, NULL);
  14614. + if (ret != 0)
  14615. + return ret;
  14616. +
  14617. + } while (!reiser4_scan_finished(scan));
  14618. +
  14619. + /* If neighbor is NULL then we reached the end of a formatted region, or
  14620. + else the sibling is out of memory, now check for an extent to the
  14621. + left (as long as LEAF_LEVEL). */
  14622. + if (neighbor != NULL || jnode_get_level(scan->node) != LEAF_LEVEL
  14623. + || reiser4_scan_finished(scan)) {
  14624. + scan->stop = 1;
  14625. + return 0;
  14626. + }
  14627. + /* Otherwise, calls scan_by_coord for the right(left)most item of the
  14628. + left(right) neighbor on the parent level, then possibly continue. */
  14629. +
  14630. + coord_init_invalid(&scan->parent_coord, NULL);
  14631. + return scan_unformatted(scan, NULL);
  14632. +}
  14633. +
  14634. +/* NOTE-EDWARD:
  14635. + This scans adjacent items of the same type and calls scan flush plugin for
  14636. + each one. Performs left(right)ward scanning starting from a (possibly)
  14637. + unformatted node. If we start from unformatted node, then we continue only if
  14638. + the next neighbor is also unformatted. When called from scan_formatted, we
  14639. + skip first iteration (to make sure that right(left)most item of the
  14640. + left(right) neighbor on the parent level is of the same type and set
  14641. + appropriate coord). */
  14642. +static int scan_by_coord(flush_scan * scan)
  14643. +{
  14644. + int ret = 0;
  14645. + int scan_this_coord;
  14646. + lock_handle next_lock;
  14647. + load_count next_load;
  14648. + coord_t next_coord;
  14649. + jnode *child;
  14650. + item_plugin *iplug;
  14651. +
  14652. + init_lh(&next_lock);
  14653. + init_load_count(&next_load);
  14654. + scan_this_coord = (jnode_is_unformatted(scan->node) ? 1 : 0);
  14655. +
  14656. + /* set initial item id */
  14657. + iplug = item_plugin_by_coord(&scan->parent_coord);
  14658. +
  14659. + for (; !reiser4_scan_finished(scan); scan_this_coord = 1) {
  14660. + if (scan_this_coord) {
  14661. + /* Here we expect that unit is scannable. it would not
  14662. + * be so due to race with extent->tail conversion. */
  14663. + if (iplug->f.scan == NULL) {
  14664. + scan->stop = 1;
  14665. + ret = -E_REPEAT;
  14666. + /* skip the check at the end. */
  14667. + goto race;
  14668. + }
  14669. +
  14670. + ret = iplug->f.scan(scan);
  14671. + if (ret != 0)
  14672. + goto exit;
  14673. +
  14674. + if (reiser4_scan_finished(scan)) {
  14675. + checkchild(scan);
  14676. + break;
  14677. + }
  14678. + } else {
  14679. + /* the same race against truncate as above is possible
  14680. + * here, it seems */
  14681. +
  14682. + /* NOTE-JMACD: In this case, apply the same end-of-node
  14683. + logic but don't scan the first coordinate. */
  14684. + assert("jmacd-1231",
  14685. + item_is_internal(&scan->parent_coord));
  14686. + }
  14687. +
  14688. + if (iplug->f.utmost_child == NULL
  14689. + || znode_get_level(scan->parent_coord.node) != TWIG_LEVEL) {
  14690. + /* stop this coord and continue on parrent level */
  14691. + ret =
  14692. + scan_set_current(scan,
  14693. + ZJNODE(zref
  14694. + (scan->parent_coord.node)),
  14695. + 1, NULL);
  14696. + if (ret != 0)
  14697. + goto exit;
  14698. + break;
  14699. + }
  14700. +
  14701. + /* Either way, the invariant is that scan->parent_coord is set
  14702. + to the parent of scan->node. Now get the next unit. */
  14703. + coord_dup(&next_coord, &scan->parent_coord);
  14704. + coord_sideof_unit(&next_coord, scan->direction);
  14705. +
  14706. + /* If off-the-end of the twig, try the next twig. */
  14707. + if (coord_is_after_sideof_unit(&next_coord, scan->direction)) {
  14708. + /* We take the write lock because we may start flushing
  14709. + * from this coordinate. */
  14710. + ret = neighbor_in_slum(next_coord.node,
  14711. + &next_lock,
  14712. + scan->direction,
  14713. + ZNODE_WRITE_LOCK,
  14714. + 1 /* check dirty */,
  14715. + 0 /* don't go though upper
  14716. + levels */);
  14717. + if (ret == -E_NO_NEIGHBOR) {
  14718. + scan->stop = 1;
  14719. + ret = 0;
  14720. + break;
  14721. + }
  14722. +
  14723. + if (ret != 0)
  14724. + goto exit;
  14725. +
  14726. + ret = incr_load_count_znode(&next_load, next_lock.node);
  14727. + if (ret != 0)
  14728. + goto exit;
  14729. +
  14730. + coord_init_sideof_unit(&next_coord, next_lock.node,
  14731. + sideof_reverse(scan->direction));
  14732. + }
  14733. +
  14734. + iplug = item_plugin_by_coord(&next_coord);
  14735. +
  14736. + /* Get the next child. */
  14737. + ret =
  14738. + iplug->f.utmost_child(&next_coord,
  14739. + sideof_reverse(scan->direction),
  14740. + &child);
  14741. + if (ret != 0)
  14742. + goto exit;
  14743. + /* If the next child is not in memory, or, item_utmost_child
  14744. + failed (due to race with unlink, most probably), stop
  14745. + here. */
  14746. + if (child == NULL || IS_ERR(child)) {
  14747. + scan->stop = 1;
  14748. + checkchild(scan);
  14749. + break;
  14750. + }
  14751. +
  14752. + assert("nikita-2374", jnode_is_unformatted(child)
  14753. + || jnode_is_znode(child));
  14754. +
  14755. + /* See if it is dirty, part of the same atom. */
  14756. + if (!reiser4_scan_goto(scan, child)) {
  14757. + checkchild(scan);
  14758. + break;
  14759. + }
  14760. +
  14761. + /* If so, make this child current. */
  14762. + ret = scan_set_current(scan, child, 1, &next_coord);
  14763. + if (ret != 0)
  14764. + goto exit;
  14765. +
  14766. + /* Now continue. If formatted we release the parent lock and
  14767. + return, then proceed. */
  14768. + if (jnode_is_znode(child))
  14769. + break;
  14770. +
  14771. + /* Otherwise, repeat the above loop with next_coord. */
  14772. + if (next_load.node != NULL) {
  14773. + done_lh(&scan->parent_lock);
  14774. + move_lh(&scan->parent_lock, &next_lock);
  14775. + move_load_count(&scan->parent_load, &next_load);
  14776. + }
  14777. + }
  14778. +
  14779. + assert("jmacd-6233",
  14780. + reiser4_scan_finished(scan) || jnode_is_znode(scan->node));
  14781. +exit:
  14782. + checkchild(scan);
  14783. +race: /* skip the above check */
  14784. + if (jnode_is_znode(scan->node)) {
  14785. + done_lh(&scan->parent_lock);
  14786. + done_load_count(&scan->parent_load);
  14787. + }
  14788. +
  14789. + done_load_count(&next_load);
  14790. + done_lh(&next_lock);
  14791. + return ret;
  14792. +}
  14793. +
  14794. +/* FLUSH POS HELPERS */
  14795. +
  14796. +/* Initialize the fields of a flush_position. */
  14797. +static void pos_init(flush_pos_t *pos)
  14798. +{
  14799. + memset(pos, 0, sizeof *pos);
  14800. +
  14801. + pos->state = POS_INVALID;
  14802. + coord_init_invalid(&pos->coord, NULL);
  14803. + init_lh(&pos->lock);
  14804. + init_load_count(&pos->load);
  14805. +
  14806. + reiser4_blocknr_hint_init(&pos->preceder);
  14807. +}
  14808. +
  14809. +/* The flush loop inside squalloc periodically checks pos_valid to determine
  14810. + when "enough flushing" has been performed. This will return true until one
  14811. + of the following conditions is met:
  14812. +
  14813. + 1. the number of flush-queued nodes has reached the kernel-supplied
  14814. + "int *nr_to_flush" parameter, meaning we have flushed as many blocks as the
  14815. + kernel requested. When flushing to commit, this parameter is NULL.
  14816. +
  14817. + 2. pos_stop() is called because squalloc discovers that the "next" node in
  14818. + the flush order is either non-existant, not dirty, or not in the same atom.
  14819. +*/
  14820. +
  14821. +static int pos_valid(flush_pos_t *pos)
  14822. +{
  14823. + return pos->state != POS_INVALID;
  14824. +}
  14825. +
  14826. +/* Release any resources of a flush_position. Called when jnode_flush
  14827. + finishes. */
  14828. +static void pos_done(flush_pos_t *pos)
  14829. +{
  14830. + pos_stop(pos);
  14831. + reiser4_blocknr_hint_done(&pos->preceder);
  14832. + if (convert_data(pos))
  14833. + free_convert_data(pos);
  14834. +}
  14835. +
  14836. +/* Reset the point and parent. Called during flush subroutines to terminate the
  14837. + squalloc loop. */
  14838. +static int pos_stop(flush_pos_t *pos)
  14839. +{
  14840. + pos->state = POS_INVALID;
  14841. + done_lh(&pos->lock);
  14842. + done_load_count(&pos->load);
  14843. + coord_init_invalid(&pos->coord, NULL);
  14844. +
  14845. + if (pos->child) {
  14846. + jput(pos->child);
  14847. + pos->child = NULL;
  14848. + }
  14849. +
  14850. + return 0;
  14851. +}
  14852. +
  14853. +/* Return the flush_position's block allocator hint. */
  14854. +reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t *pos)
  14855. +{
  14856. + return &pos->preceder;
  14857. +}
  14858. +
  14859. +flush_queue_t *reiser4_pos_fq(flush_pos_t *pos)
  14860. +{
  14861. + return pos->fq;
  14862. +}
  14863. +
  14864. +/* Make Linus happy.
  14865. + Local variables:
  14866. + c-indentation-style: "K&R"
  14867. + mode-name: "LC"
  14868. + c-basic-offset: 8
  14869. + tab-width: 8
  14870. + fill-column: 90
  14871. + LocalWords: preceder
  14872. + End:
  14873. +*/
  14874. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/flush.h linux-5.16.14/fs/reiser4/flush.h
  14875. --- linux-5.16.14.orig/fs/reiser4/flush.h 1970-01-01 01:00:00.000000000 +0100
  14876. +++ linux-5.16.14/fs/reiser4/flush.h 2022-03-12 13:26:19.650892724 +0100
  14877. @@ -0,0 +1,290 @@
  14878. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  14879. +
  14880. +/* DECLARATIONS: */
  14881. +
  14882. +#if !defined(__REISER4_FLUSH_H__)
  14883. +#define __REISER4_FLUSH_H__
  14884. +
  14885. +#include "plugin/cluster.h"
  14886. +
  14887. +/* The flush_scan data structure maintains the state of an in-progress
  14888. + flush-scan on a single level of the tree. A flush-scan is used for counting
  14889. + the number of adjacent nodes to flush, which is used to determine whether we
  14890. + should relocate, and it is also used to find a starting point for flush. A
  14891. + flush-scan object can scan in both right and left directions via the
  14892. + scan_left() and scan_right() interfaces. The right- and left-variations are
  14893. + similar but perform different functions. When scanning left we (optionally
  14894. + perform rapid scanning and then) longterm-lock the endpoint node. When
  14895. + scanning right we are simply counting the number of adjacent, dirty nodes. */
  14896. +struct flush_scan {
  14897. +
  14898. + /* The current number of nodes scanned on this level. */
  14899. + unsigned count;
  14900. +
  14901. + /* There may be a maximum number of nodes for a scan on any single
  14902. + level. When going leftward, max_count is determined by
  14903. + FLUSH_SCAN_MAXNODES (see reiser4.h) */
  14904. + unsigned max_count;
  14905. +
  14906. + /* Direction: Set to one of the sideof enumeration:
  14907. + { LEFT_SIDE, RIGHT_SIDE }. */
  14908. + sideof direction;
  14909. +
  14910. + /* Initially @stop is set to false then set true once some condition
  14911. + stops the search (e.g., we found a clean node before reaching
  14912. + max_count or we found a node belonging to another atom). */
  14913. + int stop;
  14914. +
  14915. + /* The current scan position. If @node is non-NULL then its reference
  14916. + count has been incremented to reflect this reference. */
  14917. + jnode *node;
  14918. +
  14919. + /* A handle for zload/zrelse of current scan position node. */
  14920. + load_count node_load;
  14921. +
  14922. + /* During left-scan, if the final position (a.k.a. endpoint node) is
  14923. + formatted the node is locked using this lock handle. The endpoint
  14924. + needs to be locked for transfer to the flush_position object after
  14925. + scanning finishes. */
  14926. + lock_handle node_lock;
  14927. +
  14928. + /* When the position is unformatted, its parent, coordinate, and parent
  14929. + zload/zrelse handle. */
  14930. + lock_handle parent_lock;
  14931. + coord_t parent_coord;
  14932. + load_count parent_load;
  14933. +
  14934. + /* The block allocator preceder hint. Sometimes flush_scan determines
  14935. + what the preceder is and if so it sets it here, after which it is
  14936. + copied into the flush_position. Otherwise, the preceder is computed
  14937. + later. */
  14938. + reiser4_block_nr preceder_blk;
  14939. +};
  14940. +
  14941. +struct convert_item_info {
  14942. + dc_item_stat d_cur; /* per-cluster status of the current item */
  14943. + dc_item_stat d_next; /* per-cluster status of the first item on
  14944. + the right neighbor */
  14945. + int cluster_shift; /* disk cluster shift */
  14946. + flow_t flow; /* disk cluster data */
  14947. +};
  14948. +
  14949. +struct convert_info {
  14950. + int count; /* for squalloc terminating */
  14951. + item_plugin *iplug; /* current item plugin */
  14952. + struct convert_item_info *itm; /* current item info */
  14953. + struct cluster_handle clust; /* transform cluster */
  14954. + lock_handle right_lock; /* lock handle of the right neighbor */
  14955. + int right_locked;
  14956. +};
  14957. +
  14958. +typedef enum flush_position_state {
  14959. + POS_INVALID, /* Invalid or stopped pos, do not continue slum
  14960. + * processing */
  14961. + POS_ON_LEAF, /* pos points to already prepped, locked
  14962. + * formatted node at leaf level */
  14963. + POS_ON_EPOINT, /* pos keeps a lock on twig level, "coord" field
  14964. + * is used to traverse unformatted nodes */
  14965. + POS_TO_LEAF, /* pos is being moved to leaf level */
  14966. + POS_TO_TWIG, /* pos is being moved to twig level */
  14967. + POS_END_OF_TWIG, /* special case of POS_ON_TWIG, when coord is
  14968. + * after rightmost unit of the current twig */
  14969. + POS_ON_INTERNAL /* same as POS_ON_LEAF, but points to internal
  14970. + * node */
  14971. +} flushpos_state_t;
  14972. +
  14973. +/* An encapsulation of the current flush point and all the parameters that are
  14974. + passed through the entire squeeze-and-allocate stage of the flush routine.
  14975. + A single flush_position object is constructed after left- and right-scanning
  14976. + finishes. */
  14977. +struct flush_position {
  14978. + flushpos_state_t state;
  14979. +
  14980. + coord_t coord; /* coord to traverse unformatted nodes */
  14981. + lock_handle lock; /* current lock we hold */
  14982. + load_count load; /* load status for current locked formatted node
  14983. + */
  14984. + jnode *child; /* for passing a reference to unformatted child
  14985. + * across pos state changes */
  14986. +
  14987. + reiser4_blocknr_hint preceder; /* The flush 'hint' state. */
  14988. + int leaf_relocate; /* True if enough leaf-level nodes were
  14989. + * found to suggest a relocate policy. */
  14990. + int alloc_cnt; /* The number of nodes allocated during squeeze
  14991. + and allococate. */
  14992. + int prep_or_free_cnt; /* The number of nodes prepared for write
  14993. + (allocate) or squeezed and freed. */
  14994. + flush_queue_t *fq;
  14995. + long *nr_written; /* number of nodes submitted to disk */
  14996. + int flags; /* a copy of jnode_flush flags argument */
  14997. +
  14998. + znode *prev_twig; /* previous parent pointer value, used to catch
  14999. + * processing of new twig node */
  15000. + struct convert_info *sq; /* convert info */
  15001. +
  15002. + unsigned long pos_in_unit; /* for extents only. Position
  15003. + within an extent unit of first
  15004. + jnode of slum */
  15005. + long nr_to_write; /* number of unformatted nodes to handle on
  15006. + flush */
  15007. +};
  15008. +
  15009. +static inline int item_convert_count(flush_pos_t *pos)
  15010. +{
  15011. + return pos->sq->count;
  15012. +}
  15013. +static inline void inc_item_convert_count(flush_pos_t *pos)
  15014. +{
  15015. + pos->sq->count++;
  15016. +}
  15017. +static inline void set_item_convert_count(flush_pos_t *pos, int count)
  15018. +{
  15019. + pos->sq->count = count;
  15020. +}
  15021. +static inline item_plugin *item_convert_plug(flush_pos_t *pos)
  15022. +{
  15023. + return pos->sq->iplug;
  15024. +}
  15025. +
  15026. +static inline struct convert_info *convert_data(flush_pos_t *pos)
  15027. +{
  15028. + return pos->sq;
  15029. +}
  15030. +
  15031. +static inline struct convert_item_info *item_convert_data(flush_pos_t *pos)
  15032. +{
  15033. + assert("edward-955", convert_data(pos));
  15034. + return pos->sq->itm;
  15035. +}
  15036. +
  15037. +static inline struct tfm_cluster *tfm_cluster_sq(flush_pos_t *pos)
  15038. +{
  15039. + return &pos->sq->clust.tc;
  15040. +}
  15041. +
  15042. +static inline struct tfm_stream *tfm_stream_sq(flush_pos_t *pos,
  15043. + tfm_stream_id id)
  15044. +{
  15045. + assert("edward-854", pos->sq != NULL);
  15046. + return get_tfm_stream(tfm_cluster_sq(pos), id);
  15047. +}
  15048. +
  15049. +static inline int convert_data_attached(flush_pos_t *pos)
  15050. +{
  15051. + return convert_data(pos) != NULL && item_convert_data(pos) != NULL;
  15052. +}
  15053. +
  15054. +#define should_convert_right_neighbor(pos) convert_data_attached(pos)
  15055. +
  15056. +/* Returns true if next node contains next item of the disk cluster
  15057. + so item convert data should be moved to the right slum neighbor.
  15058. +*/
  15059. +static inline int next_node_is_chained(flush_pos_t *pos)
  15060. +{
  15061. + return convert_data_attached(pos) &&
  15062. + item_convert_data(pos)->d_next == DC_CHAINED_ITEM;
  15063. +}
  15064. +
  15065. +/*
  15066. + * Update "twin state" (d_cur, d_next) to assign a proper
  15067. + * conversion mode in the next iteration of convert_node()
  15068. + */
  15069. +static inline void update_chaining_state(flush_pos_t *pos,
  15070. + int this_node /* where to proceed */)
  15071. +{
  15072. +
  15073. + assert("edward-1010", convert_data_attached(pos));
  15074. +
  15075. + if (this_node) {
  15076. + /*
  15077. + * we want to perform one more iteration with the same item
  15078. + */
  15079. + assert("edward-1013",
  15080. + item_convert_data(pos)->d_cur == DC_FIRST_ITEM ||
  15081. + item_convert_data(pos)->d_cur == DC_CHAINED_ITEM);
  15082. + assert("edward-1227",
  15083. + item_convert_data(pos)->d_next == DC_AFTER_CLUSTER ||
  15084. + item_convert_data(pos)->d_next == DC_INVALID_STATE);
  15085. +
  15086. + item_convert_data(pos)->d_cur = DC_AFTER_CLUSTER;
  15087. + item_convert_data(pos)->d_next = DC_INVALID_STATE;
  15088. + }
  15089. + else {
  15090. + /*
  15091. + * we want to proceed on right neighbor, which is chained
  15092. + */
  15093. + assert("edward-1011",
  15094. + item_convert_data(pos)->d_cur == DC_FIRST_ITEM ||
  15095. + item_convert_data(pos)->d_cur == DC_CHAINED_ITEM);
  15096. + assert("edward-1012",
  15097. + item_convert_data(pos)->d_next == DC_CHAINED_ITEM);
  15098. +
  15099. + item_convert_data(pos)->d_cur = DC_CHAINED_ITEM;
  15100. + item_convert_data(pos)->d_next = DC_INVALID_STATE;
  15101. + }
  15102. +}
  15103. +
  15104. +#define SQUALLOC_THRESHOLD 256
  15105. +
  15106. +static inline int should_terminate_squalloc(flush_pos_t *pos)
  15107. +{
  15108. + return convert_data(pos) &&
  15109. + !item_convert_data(pos) &&
  15110. + item_convert_count(pos) >= SQUALLOC_THRESHOLD;
  15111. +}
  15112. +
  15113. +#if REISER4_DEBUG
  15114. +#define check_convert_info(pos) \
  15115. +do { \
  15116. + if (unlikely(should_convert_right_neighbor(pos))) { \
  15117. + warning("edward-1006", "unprocessed chained data"); \
  15118. + printk("d_cur = %d, d_next = %d, flow.len = %llu\n", \
  15119. + item_convert_data(pos)->d_cur, \
  15120. + item_convert_data(pos)->d_next, \
  15121. + item_convert_data(pos)->flow.length); \
  15122. + } \
  15123. +} while (0)
  15124. +#else
  15125. +#define check_convert_info(pos)
  15126. +#endif /* REISER4_DEBUG */
  15127. +
  15128. +void free_convert_data(flush_pos_t *pos);
  15129. +/* used in extent.c */
  15130. +int scan_set_current(flush_scan * scan, jnode * node, unsigned add_size,
  15131. + const coord_t *parent);
  15132. +int reiser4_scan_finished(flush_scan * scan);
  15133. +int reiser4_scanning_left(flush_scan * scan);
  15134. +int reiser4_scan_goto(flush_scan * scan, jnode * tonode);
  15135. +txn_atom *atom_locked_by_fq(flush_queue_t *fq);
  15136. +int reiser4_alloc_extent(flush_pos_t *flush_pos);
  15137. +squeeze_result squalloc_extent(znode *left, const coord_t *, flush_pos_t *,
  15138. + reiser4_key *stop_key);
  15139. +extern int reiser4_init_fqs(void);
  15140. +extern void reiser4_done_fqs(void);
  15141. +
  15142. +#if REISER4_DEBUG
  15143. +
  15144. +extern void reiser4_check_fq(const txn_atom *atom);
  15145. +extern atomic_t flush_cnt;
  15146. +
  15147. +#define check_preceder(blk) \
  15148. +assert("nikita-2588", blk < reiser4_block_count(reiser4_get_current_sb()));
  15149. +extern void check_pos(flush_pos_t *pos);
  15150. +#else
  15151. +#define check_preceder(b) noop
  15152. +#define check_pos(pos) noop
  15153. +#endif
  15154. +
  15155. +/* __REISER4_FLUSH_H__ */
  15156. +#endif
  15157. +
  15158. +/* Make Linus happy.
  15159. + Local variables:
  15160. + c-indentation-style: "K&R"
  15161. + mode-name: "LC"
  15162. + c-basic-offset: 8
  15163. + tab-width: 8
  15164. + fill-column: 90
  15165. + LocalWords: preceder
  15166. + End:
  15167. +*/
  15168. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/flush_queue.c linux-5.16.14/fs/reiser4/flush_queue.c
  15169. --- linux-5.16.14.orig/fs/reiser4/flush_queue.c 1970-01-01 01:00:00.000000000 +0100
  15170. +++ linux-5.16.14/fs/reiser4/flush_queue.c 2022-03-12 13:26:19.651892726 +0100
  15171. @@ -0,0 +1,680 @@
  15172. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  15173. + reiser4/README */
  15174. +
  15175. +#include "debug.h"
  15176. +#include "super.h"
  15177. +#include "txnmgr.h"
  15178. +#include "jnode.h"
  15179. +#include "znode.h"
  15180. +#include "page_cache.h"
  15181. +#include "wander.h"
  15182. +#include "vfs_ops.h"
  15183. +#include "writeout.h"
  15184. +#include "flush.h"
  15185. +
  15186. +#include <linux/bio.h>
  15187. +#include <linux/mm.h>
  15188. +#include <linux/pagemap.h>
  15189. +#include <linux/blkdev.h>
  15190. +#include <linux/writeback.h>
  15191. +
  15192. +/* A flush queue object is an accumulator for keeping jnodes prepared
  15193. + by the jnode_flush() function for writing to disk. Those "queued" jnodes are
  15194. + kept on the flush queue until memory pressure or atom commit asks
  15195. + flush queues to write some or all from their jnodes. */
  15196. +
  15197. +/*
  15198. + LOCKING:
  15199. +
  15200. + fq->guard spin lock protects fq->atom pointer and nothing else. fq->prepped
  15201. + list protected by atom spin lock. fq->prepped list uses the following
  15202. + locking:
  15203. +
  15204. + two ways to protect fq->prepped list for read-only list traversal:
  15205. +
  15206. + 1. atom spin-lock atom.
  15207. + 2. fq is IN_USE, atom->nr_running_queues increased.
  15208. +
  15209. + and one for list modification:
  15210. +
  15211. + 1. atom is spin-locked and one condition is true: fq is IN_USE or
  15212. + atom->nr_running_queues == 0.
  15213. +
  15214. + The deadlock-safe order for flush queues and atoms is: first lock atom, then
  15215. + lock flush queue, then lock jnode.
  15216. +*/
  15217. +
  15218. +#define fq_in_use(fq) ((fq)->state & FQ_IN_USE)
  15219. +#define fq_ready(fq) (!fq_in_use(fq))
  15220. +
  15221. +#define mark_fq_in_use(fq) do { (fq)->state |= FQ_IN_USE; } while (0)
  15222. +#define mark_fq_ready(fq) do { (fq)->state &= ~FQ_IN_USE; } while (0)
  15223. +
  15224. +/* get lock on atom from locked flush queue object */
  15225. +static txn_atom *atom_locked_by_fq_nolock(flush_queue_t *fq)
  15226. +{
  15227. + /* This code is similar to jnode_get_atom(), look at it for the
  15228. + * explanation. */
  15229. + txn_atom *atom;
  15230. +
  15231. + assert_spin_locked(&(fq->guard));
  15232. +
  15233. + while (1) {
  15234. + atom = fq->atom;
  15235. + if (atom == NULL)
  15236. + break;
  15237. +
  15238. + if (spin_trylock_atom(atom))
  15239. + break;
  15240. +
  15241. + atomic_inc(&atom->refcount);
  15242. + spin_unlock(&(fq->guard));
  15243. + spin_lock_atom(atom);
  15244. + spin_lock(&(fq->guard));
  15245. +
  15246. + if (fq->atom == atom) {
  15247. + atomic_dec(&atom->refcount);
  15248. + break;
  15249. + }
  15250. +
  15251. + spin_unlock(&(fq->guard));
  15252. + atom_dec_and_unlock(atom);
  15253. + spin_lock(&(fq->guard));
  15254. + }
  15255. +
  15256. + return atom;
  15257. +}
  15258. +
  15259. +txn_atom *atom_locked_by_fq(flush_queue_t *fq)
  15260. +{
  15261. + txn_atom *atom;
  15262. +
  15263. + spin_lock(&(fq->guard));
  15264. + atom = atom_locked_by_fq_nolock(fq);
  15265. + spin_unlock(&(fq->guard));
  15266. + return atom;
  15267. +}
  15268. +
  15269. +static void init_fq(flush_queue_t *fq)
  15270. +{
  15271. + memset(fq, 0, sizeof *fq);
  15272. +
  15273. + atomic_set(&fq->nr_submitted, 0);
  15274. +
  15275. + INIT_LIST_HEAD(ATOM_FQ_LIST(fq));
  15276. +
  15277. + init_waitqueue_head(&fq->wait);
  15278. + spin_lock_init(&fq->guard);
  15279. +}
  15280. +
  15281. +/* slab for flush queues */
  15282. +static struct kmem_cache *fq_slab;
  15283. +
  15284. +/**
  15285. + * reiser4_init_fqs - create flush queue cache
  15286. + *
  15287. + * Initializes slab cache of flush queues. It is part of reiser4 module
  15288. + * initialization.
  15289. + */
  15290. +int reiser4_init_fqs(void)
  15291. +{
  15292. + fq_slab = kmem_cache_create("fq",
  15293. + sizeof(flush_queue_t),
  15294. + 0, SLAB_HWCACHE_ALIGN, NULL);
  15295. + if (fq_slab == NULL)
  15296. + return RETERR(-ENOMEM);
  15297. + return 0;
  15298. +}
  15299. +
  15300. +/**
  15301. + * reiser4_done_fqs - delete flush queue cache
  15302. + *
  15303. + * This is called on reiser4 module unloading or system shutdown.
  15304. + */
  15305. +void reiser4_done_fqs(void)
  15306. +{
  15307. + destroy_reiser4_cache(&fq_slab);
  15308. +}
  15309. +
  15310. +/* create new flush queue object */
  15311. +static flush_queue_t *create_fq(gfp_t gfp)
  15312. +{
  15313. + flush_queue_t *fq;
  15314. +
  15315. + fq = kmem_cache_alloc(fq_slab, gfp);
  15316. + if (fq)
  15317. + init_fq(fq);
  15318. +
  15319. + return fq;
  15320. +}
  15321. +
  15322. +/* adjust atom's and flush queue's counters of queued nodes */
  15323. +static void count_enqueued_node(flush_queue_t *fq)
  15324. +{
  15325. + ON_DEBUG(fq->atom->num_queued++);
  15326. +}
  15327. +
  15328. +static void count_dequeued_node(flush_queue_t *fq)
  15329. +{
  15330. + assert("zam-993", fq->atom->num_queued > 0);
  15331. + ON_DEBUG(fq->atom->num_queued--);
  15332. +}
  15333. +
  15334. +/* attach flush queue object to the atom */
  15335. +static void attach_fq(txn_atom *atom, flush_queue_t *fq)
  15336. +{
  15337. + assert_spin_locked(&(atom->alock));
  15338. + list_add(&fq->alink, &atom->flush_queues);
  15339. + fq->atom = atom;
  15340. + ON_DEBUG(atom->nr_flush_queues++);
  15341. +}
  15342. +
  15343. +static void detach_fq(flush_queue_t *fq)
  15344. +{
  15345. + assert_spin_locked(&(fq->atom->alock));
  15346. +
  15347. + spin_lock(&(fq->guard));
  15348. + list_del_init(&fq->alink);
  15349. + assert("vs-1456", fq->atom->nr_flush_queues > 0);
  15350. + ON_DEBUG(fq->atom->nr_flush_queues--);
  15351. + fq->atom = NULL;
  15352. + spin_unlock(&(fq->guard));
  15353. +}
  15354. +
  15355. +/* destroy flush queue object */
  15356. +static void done_fq(flush_queue_t *fq)
  15357. +{
  15358. + assert("zam-763", list_empty_careful(ATOM_FQ_LIST(fq)));
  15359. + assert("zam-766", atomic_read(&fq->nr_submitted) == 0);
  15360. +
  15361. + kmem_cache_free(fq_slab, fq);
  15362. +}
  15363. +
  15364. +/* */
  15365. +static void mark_jnode_queued(flush_queue_t *fq, jnode * node)
  15366. +{
  15367. + JF_SET(node, JNODE_FLUSH_QUEUED);
  15368. + count_enqueued_node(fq);
  15369. +}
  15370. +
  15371. +/* Putting jnode into the flush queue. Both atom and jnode should be
  15372. + spin-locked. */
  15373. +void queue_jnode(flush_queue_t *fq, jnode * node)
  15374. +{
  15375. + assert_spin_locked(&(node->guard));
  15376. + assert("zam-713", node->atom != NULL);
  15377. + assert_spin_locked(&(node->atom->alock));
  15378. + assert("zam-716", fq->atom != NULL);
  15379. + assert("zam-717", fq->atom == node->atom);
  15380. + assert("zam-907", fq_in_use(fq));
  15381. +
  15382. + assert("zam-714", JF_ISSET(node, JNODE_DIRTY));
  15383. + assert("zam-826", JF_ISSET(node, JNODE_RELOC));
  15384. + assert("vs-1481", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
  15385. + assert("vs-1481", NODE_LIST(node) != FQ_LIST);
  15386. +
  15387. + mark_jnode_queued(fq, node);
  15388. + list_move_tail(&node->capture_link, ATOM_FQ_LIST(fq));
  15389. +
  15390. + ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node),
  15391. + FQ_LIST, 1));
  15392. +}
  15393. +
  15394. +/* repeatable process for waiting io completion on a flush queue object */
  15395. +static int wait_io(flush_queue_t *fq, int *nr_io_errors)
  15396. +{
  15397. + assert("zam-738", fq->atom != NULL);
  15398. + assert_spin_locked(&(fq->atom->alock));
  15399. + assert("zam-736", fq_in_use(fq));
  15400. + assert("zam-911", list_empty_careful(ATOM_FQ_LIST(fq)));
  15401. +
  15402. + if (atomic_read(&fq->nr_submitted) != 0) {
  15403. + struct super_block *super;
  15404. +
  15405. + spin_unlock_atom(fq->atom);
  15406. +
  15407. + assert("nikita-3013", reiser4_schedulable());
  15408. +
  15409. + super = reiser4_get_current_sb();
  15410. +
  15411. + /* FIXME: this is instead of blk_run_queues() */
  15412. + //blk_flush_plug(current);
  15413. +
  15414. + if (!sb_rdonly(super))
  15415. + wait_event(fq->wait,
  15416. + atomic_read(&fq->nr_submitted) == 0);
  15417. +
  15418. + /* Ask the caller to re-acquire the locks and call this
  15419. + function again. Note: this technique is commonly used in
  15420. + the txnmgr code. */
  15421. + return -E_REPEAT;
  15422. + }
  15423. +
  15424. + *nr_io_errors += atomic_read(&fq->nr_errors);
  15425. + return 0;
  15426. +}
  15427. +
  15428. +/* wait on I/O completion, re-submit dirty nodes to write */
  15429. +static int finish_fq(flush_queue_t *fq, int *nr_io_errors)
  15430. +{
  15431. + int ret;
  15432. + txn_atom *atom = fq->atom;
  15433. +
  15434. + assert("zam-801", atom != NULL);
  15435. + assert_spin_locked(&(atom->alock));
  15436. + assert("zam-762", fq_in_use(fq));
  15437. +
  15438. + ret = wait_io(fq, nr_io_errors);
  15439. + if (ret)
  15440. + return ret;
  15441. +
  15442. + detach_fq(fq);
  15443. + done_fq(fq);
  15444. +
  15445. + reiser4_atom_send_event(atom);
  15446. +
  15447. + return 0;
  15448. +}
  15449. +
  15450. +/* wait for all i/o for given atom to be completed, actually do one iteration
  15451. + on that and return -E_REPEAT if there more iterations needed */
  15452. +static int finish_all_fq(txn_atom * atom, int *nr_io_errors)
  15453. +{
  15454. + flush_queue_t *fq;
  15455. +
  15456. + assert_spin_locked(&(atom->alock));
  15457. +
  15458. + if (list_empty_careful(&atom->flush_queues))
  15459. + return 0;
  15460. +
  15461. + list_for_each_entry(fq, &atom->flush_queues, alink) {
  15462. + if (fq_ready(fq)) {
  15463. + int ret;
  15464. +
  15465. + mark_fq_in_use(fq);
  15466. + assert("vs-1247", fq->owner == NULL);
  15467. + ON_DEBUG(fq->owner = current);
  15468. + ret = finish_fq(fq, nr_io_errors);
  15469. +
  15470. + if (*nr_io_errors)
  15471. + reiser4_handle_error();
  15472. +
  15473. + if (ret) {
  15474. + reiser4_fq_put(fq);
  15475. + return ret;
  15476. + }
  15477. +
  15478. + spin_unlock_atom(atom);
  15479. +
  15480. + return -E_REPEAT;
  15481. + }
  15482. + }
  15483. +
  15484. + /* All flush queues are in use; atom remains locked */
  15485. + return -EBUSY;
  15486. +}
  15487. +
  15488. +/* wait all i/o for current atom */
  15489. +int current_atom_finish_all_fq(void)
  15490. +{
  15491. + txn_atom *atom;
  15492. + int nr_io_errors = 0;
  15493. + int ret = 0;
  15494. +
  15495. + do {
  15496. + while (1) {
  15497. + atom = get_current_atom_locked();
  15498. + ret = finish_all_fq(atom, &nr_io_errors);
  15499. + if (ret != -EBUSY)
  15500. + break;
  15501. + reiser4_atom_wait_event(atom);
  15502. + }
  15503. + } while (ret == -E_REPEAT);
  15504. +
  15505. + /* we do not need locked atom after this function finishes, SUCCESS or
  15506. + -EBUSY are two return codes when atom remains locked after
  15507. + finish_all_fq */
  15508. + if (!ret)
  15509. + spin_unlock_atom(atom);
  15510. +
  15511. + assert_spin_not_locked(&(atom->alock));
  15512. +
  15513. + if (ret)
  15514. + return ret;
  15515. +
  15516. + if (nr_io_errors)
  15517. + return RETERR(-EIO);
  15518. +
  15519. + return 0;
  15520. +}
  15521. +
  15522. +/* change node->atom field for all jnode from given list */
  15523. +static void
  15524. +scan_fq_and_update_atom_ref(struct list_head *list, txn_atom *atom)
  15525. +{
  15526. + jnode *cur;
  15527. +
  15528. + list_for_each_entry(cur, list, capture_link) {
  15529. + spin_lock_jnode(cur);
  15530. + cur->atom = atom;
  15531. + spin_unlock_jnode(cur);
  15532. + }
  15533. +}
  15534. +
  15535. +/* support for atom fusion operation */
  15536. +void reiser4_fuse_fq(txn_atom *to, txn_atom *from)
  15537. +{
  15538. + flush_queue_t *fq;
  15539. +
  15540. + assert_spin_locked(&(to->alock));
  15541. + assert_spin_locked(&(from->alock));
  15542. +
  15543. + list_for_each_entry(fq, &from->flush_queues, alink) {
  15544. + scan_fq_and_update_atom_ref(ATOM_FQ_LIST(fq), to);
  15545. + spin_lock(&(fq->guard));
  15546. + fq->atom = to;
  15547. + spin_unlock(&(fq->guard));
  15548. + }
  15549. +
  15550. + list_splice_init(&from->flush_queues, to->flush_queues.prev);
  15551. +
  15552. +#if REISER4_DEBUG
  15553. + to->num_queued += from->num_queued;
  15554. + to->nr_flush_queues += from->nr_flush_queues;
  15555. + from->nr_flush_queues = 0;
  15556. +#endif
  15557. +}
  15558. +
  15559. +#if REISER4_DEBUG
  15560. +int atom_fq_parts_are_clean(txn_atom * atom)
  15561. +{
  15562. + assert("zam-915", atom != NULL);
  15563. + return list_empty_careful(&atom->flush_queues);
  15564. +}
  15565. +#endif
  15566. +
  15567. +/*
  15568. + * Bio i/o completion routine for reiser4 write operations
  15569. + */
  15570. +static void end_io_handler(struct bio *bio)
  15571. +{
  15572. + int nr = 0;
  15573. + int nr_errors = 0;
  15574. + flush_queue_t *fq;
  15575. + struct bio_vec *bvec;
  15576. + struct bvec_iter_all iter_all;
  15577. +
  15578. + assert("zam-958", bio_op(bio) == WRITE);
  15579. +
  15580. + /* we expect that bio->private is set to NULL or fq object which is used
  15581. + * for synchronization and error counting. */
  15582. + fq = bio->bi_private;
  15583. + /* Check all elements of io_vec for correct write completion. */
  15584. + bio_for_each_segment_all(bvec, bio, iter_all) {
  15585. + struct page *pg = bvec->bv_page;
  15586. +
  15587. + if (bio->bi_status) {
  15588. + SetPageError(pg);
  15589. + nr_errors++;
  15590. + }
  15591. +
  15592. + {
  15593. + /* jnode WRITEBACK ("write is in progress bit") is
  15594. + * atomically cleared here. */
  15595. + jnode *node;
  15596. +
  15597. + assert("zam-736", pg != NULL);
  15598. + assert("zam-736", PagePrivate(pg));
  15599. + node = jprivate(pg);
  15600. +
  15601. + JF_CLR(node, JNODE_WRITEBACK);
  15602. + }
  15603. + nr ++;
  15604. + end_page_writeback(pg);
  15605. + put_page(pg);
  15606. + }
  15607. +
  15608. + if (fq) {
  15609. + /* count i/o error in fq object */
  15610. + atomic_add(nr_errors, &fq->nr_errors);
  15611. +
  15612. + /* If all write requests registered in this "fq" are done we up
  15613. + * the waiter. */
  15614. + if (atomic_sub_and_test(nr, &fq->nr_submitted))
  15615. + wake_up(&fq->wait);
  15616. + }
  15617. +
  15618. + bio_put(bio);
  15619. +}
  15620. +
  15621. +/* Count I/O requests which will be submitted by @bio in given flush queues
  15622. + @fq */
  15623. +void add_fq_to_bio(flush_queue_t *fq, struct bio *bio)
  15624. +{
  15625. + bio->bi_private = fq;
  15626. + bio->bi_end_io = end_io_handler;
  15627. +
  15628. + if (fq)
  15629. + atomic_add(bio->bi_iter.bi_size >> PAGE_SHIFT,
  15630. + &fq->nr_submitted);
  15631. +}
  15632. +
  15633. +/* Move all queued nodes out from @fq->prepped list. */
  15634. +static void release_prepped_list(flush_queue_t *fq)
  15635. +{
  15636. + txn_atom *atom;
  15637. +
  15638. + assert("zam-904", fq_in_use(fq));
  15639. + atom = atom_locked_by_fq(fq);
  15640. +
  15641. + while (!list_empty(ATOM_FQ_LIST(fq))) {
  15642. + jnode *cur;
  15643. +
  15644. + cur = list_entry(ATOM_FQ_LIST(fq)->next, jnode, capture_link);
  15645. + list_del_init(&cur->capture_link);
  15646. +
  15647. + count_dequeued_node(fq);
  15648. + spin_lock_jnode(cur);
  15649. + assert("nikita-3154", !JF_ISSET(cur, JNODE_OVRWR));
  15650. + assert("nikita-3154", JF_ISSET(cur, JNODE_RELOC));
  15651. + assert("nikita-3154", JF_ISSET(cur, JNODE_FLUSH_QUEUED));
  15652. + JF_CLR(cur, JNODE_FLUSH_QUEUED);
  15653. +
  15654. + if (JF_ISSET(cur, JNODE_DIRTY)) {
  15655. + list_add_tail(&cur->capture_link,
  15656. + ATOM_DIRTY_LIST(atom,
  15657. + jnode_get_level(cur)));
  15658. + ON_DEBUG(count_jnode(atom, cur, FQ_LIST,
  15659. + DIRTY_LIST, 1));
  15660. + } else {
  15661. + list_add_tail(&cur->capture_link,
  15662. + ATOM_CLEAN_LIST(atom));
  15663. + ON_DEBUG(count_jnode(atom, cur, FQ_LIST,
  15664. + CLEAN_LIST, 1));
  15665. + }
  15666. +
  15667. + spin_unlock_jnode(cur);
  15668. + }
  15669. +
  15670. + if (--atom->nr_running_queues == 0)
  15671. + reiser4_atom_send_event(atom);
  15672. +
  15673. + spin_unlock_atom(atom);
  15674. +}
  15675. +
  15676. +/* Submit write requests for nodes on the already filled flush queue @fq.
  15677. +
  15678. + @fq: flush queue object which contains jnodes we can (and will) write.
  15679. + @return: number of submitted blocks (>=0) if success, otherwise -- an error
  15680. + code (<0). */
  15681. +int reiser4_write_fq(flush_queue_t *fq, long *nr_submitted, int flags)
  15682. +{
  15683. + int ret;
  15684. + txn_atom *atom;
  15685. +
  15686. + while (1) {
  15687. + atom = atom_locked_by_fq(fq);
  15688. + assert("zam-924", atom);
  15689. + /* do not write fq in parallel. */
  15690. + if (atom->nr_running_queues == 0
  15691. + || !(flags & WRITEOUT_SINGLE_STREAM))
  15692. + break;
  15693. + reiser4_atom_wait_event(atom);
  15694. + }
  15695. +
  15696. + atom->nr_running_queues++;
  15697. + spin_unlock_atom(atom);
  15698. +
  15699. + ret = write_jnode_list(ATOM_FQ_LIST(fq), fq, nr_submitted, flags);
  15700. + release_prepped_list(fq);
  15701. +
  15702. + return ret;
  15703. +}
  15704. +
  15705. +/* Getting flush queue object for exclusive use by one thread. May require
  15706. + several iterations which is indicated by -E_REPEAT return code.
  15707. +
  15708. + This function does not contain code for obtaining an atom lock because an
  15709. + atom lock is obtained by different ways in different parts of reiser4,
  15710. + usually it is current atom, but we need a possibility for getting fq for the
  15711. + atom of given jnode. */
  15712. +static int fq_by_atom_gfp(txn_atom *atom, flush_queue_t **new_fq, gfp_t gfp)
  15713. +{
  15714. + flush_queue_t *fq;
  15715. +
  15716. + assert_spin_locked(&(atom->alock));
  15717. +
  15718. + fq = list_entry(atom->flush_queues.next, flush_queue_t, alink);
  15719. + while (&atom->flush_queues != &fq->alink) {
  15720. + spin_lock(&(fq->guard));
  15721. +
  15722. + if (fq_ready(fq)) {
  15723. + mark_fq_in_use(fq);
  15724. + assert("vs-1246", fq->owner == NULL);
  15725. + ON_DEBUG(fq->owner = current);
  15726. + spin_unlock(&(fq->guard));
  15727. +
  15728. + if (*new_fq)
  15729. + done_fq(*new_fq);
  15730. +
  15731. + *new_fq = fq;
  15732. +
  15733. + return 0;
  15734. + }
  15735. +
  15736. + spin_unlock(&(fq->guard));
  15737. +
  15738. + fq = list_entry(fq->alink.next, flush_queue_t, alink);
  15739. + }
  15740. +
  15741. + /* Use previously allocated fq object */
  15742. + if (*new_fq) {
  15743. + mark_fq_in_use(*new_fq);
  15744. + assert("vs-1248", (*new_fq)->owner == 0);
  15745. + ON_DEBUG((*new_fq)->owner = current);
  15746. + attach_fq(atom, *new_fq);
  15747. +
  15748. + return 0;
  15749. + }
  15750. +
  15751. + spin_unlock_atom(atom);
  15752. +
  15753. + *new_fq = create_fq(gfp);
  15754. +
  15755. + if (*new_fq == NULL)
  15756. + return RETERR(-ENOMEM);
  15757. +
  15758. + return RETERR(-E_REPEAT);
  15759. +}
  15760. +
  15761. +int reiser4_fq_by_atom(txn_atom * atom, flush_queue_t **new_fq)
  15762. +{
  15763. + return fq_by_atom_gfp(atom, new_fq, reiser4_ctx_gfp_mask_get());
  15764. +}
  15765. +
  15766. +/* A wrapper around reiser4_fq_by_atom for getting a flush queue
  15767. + object for current atom, if success fq->atom remains locked. */
  15768. +flush_queue_t *get_fq_for_current_atom(void)
  15769. +{
  15770. + flush_queue_t *fq = NULL;
  15771. + txn_atom *atom;
  15772. + int ret;
  15773. +
  15774. + do {
  15775. + atom = get_current_atom_locked();
  15776. + ret = reiser4_fq_by_atom(atom, &fq);
  15777. + } while (ret == -E_REPEAT);
  15778. +
  15779. + if (ret)
  15780. + return ERR_PTR(ret);
  15781. + return fq;
  15782. +}
  15783. +
  15784. +/* Releasing flush queue object after exclusive use */
  15785. +void reiser4_fq_put_nolock(flush_queue_t *fq)
  15786. +{
  15787. + assert("zam-747", fq->atom != NULL);
  15788. + assert("zam-902", list_empty_careful(ATOM_FQ_LIST(fq)));
  15789. + mark_fq_ready(fq);
  15790. + assert("vs-1245", fq->owner == current);
  15791. + ON_DEBUG(fq->owner = NULL);
  15792. +}
  15793. +
  15794. +void reiser4_fq_put(flush_queue_t *fq)
  15795. +{
  15796. + txn_atom *atom;
  15797. +
  15798. + spin_lock(&(fq->guard));
  15799. + atom = atom_locked_by_fq_nolock(fq);
  15800. +
  15801. + assert("zam-746", atom != NULL);
  15802. +
  15803. + reiser4_fq_put_nolock(fq);
  15804. + reiser4_atom_send_event(atom);
  15805. +
  15806. + spin_unlock(&(fq->guard));
  15807. + spin_unlock_atom(atom);
  15808. +}
  15809. +
  15810. +/* A part of atom object initialization related to the embedded flush queue
  15811. + list head */
  15812. +
  15813. +void init_atom_fq_parts(txn_atom *atom)
  15814. +{
  15815. + INIT_LIST_HEAD(&atom->flush_queues);
  15816. +}
  15817. +
  15818. +#if REISER4_DEBUG
  15819. +
  15820. +void reiser4_check_fq(const txn_atom *atom)
  15821. +{
  15822. + /* check number of nodes on all atom's flush queues */
  15823. + flush_queue_t *fq;
  15824. + int count;
  15825. + struct list_head *pos;
  15826. +
  15827. + count = 0;
  15828. + list_for_each_entry(fq, &atom->flush_queues, alink) {
  15829. + spin_lock(&(fq->guard));
  15830. + /* calculate number of jnodes on fq' list of prepped jnodes */
  15831. + list_for_each(pos, ATOM_FQ_LIST(fq))
  15832. + count++;
  15833. + spin_unlock(&(fq->guard));
  15834. + }
  15835. + if (count != atom->fq)
  15836. + warning("", "fq counter %d, real %d\n", atom->fq, count);
  15837. +
  15838. +}
  15839. +
  15840. +#endif
  15841. +
  15842. +/*
  15843. + * Local variables:
  15844. + * c-indentation-style: "K&R"
  15845. + * mode-name: "LC"
  15846. + * c-basic-offset: 8
  15847. + * tab-width: 8
  15848. + * fill-column: 79
  15849. + * scroll-step: 1
  15850. + * End:
  15851. + */
  15852. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/forward.h linux-5.16.14/fs/reiser4/forward.h
  15853. --- linux-5.16.14.orig/fs/reiser4/forward.h 1970-01-01 01:00:00.000000000 +0100
  15854. +++ linux-5.16.14/fs/reiser4/forward.h 2022-03-12 13:26:19.651892726 +0100
  15855. @@ -0,0 +1,259 @@
  15856. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  15857. + reiser4/README */
  15858. +
  15859. +/* Forward declarations. Thank you Kernighan. */
  15860. +
  15861. +#if !defined(__REISER4_FORWARD_H__)
  15862. +#define __REISER4_FORWARD_H__
  15863. +
  15864. +#include <asm/errno.h>
  15865. +#include <linux/types.h>
  15866. +
  15867. +typedef struct zlock zlock;
  15868. +typedef struct lock_stack lock_stack;
  15869. +typedef struct lock_handle lock_handle;
  15870. +typedef struct znode znode;
  15871. +typedef struct flow flow_t;
  15872. +typedef struct coord coord_t;
  15873. +typedef struct tree_access_pointer tap_t;
  15874. +typedef struct reiser4_object_create_data reiser4_object_create_data;
  15875. +typedef union reiser4_plugin reiser4_plugin;
  15876. +typedef __u16 reiser4_plugin_id;
  15877. +typedef __u64 reiser4_plugin_groups;
  15878. +typedef struct item_plugin item_plugin;
  15879. +typedef struct jnode_plugin jnode_plugin;
  15880. +typedef struct reiser4_item_data reiser4_item_data;
  15881. +typedef union reiser4_key reiser4_key;
  15882. +typedef struct reiser4_tree reiser4_tree;
  15883. +typedef struct carry_cut_data carry_cut_data;
  15884. +typedef struct carry_kill_data carry_kill_data;
  15885. +typedef struct carry_tree_op carry_tree_op;
  15886. +typedef struct carry_tree_node carry_tree_node;
  15887. +typedef struct carry_plugin_info carry_plugin_info;
  15888. +typedef struct reiser4_journal reiser4_journal;
  15889. +typedef struct txn_atom txn_atom;
  15890. +typedef struct txn_handle txn_handle;
  15891. +typedef struct txn_mgr txn_mgr;
  15892. +typedef struct reiser4_dir_entry_desc reiser4_dir_entry_desc;
  15893. +typedef struct reiser4_context reiser4_context;
  15894. +typedef struct carry_level carry_level;
  15895. +typedef struct blocknr_set_entry blocknr_set_entry;
  15896. +typedef struct blocknr_list_entry blocknr_list_entry;
  15897. +/* super_block->s_fs_info points to this */
  15898. +typedef struct reiser4_super_info_data reiser4_super_info_data;
  15899. +/* next two objects are fields of reiser4_super_info_data */
  15900. +typedef struct reiser4_oid_allocator reiser4_oid_allocator;
  15901. +typedef struct reiser4_space_allocator reiser4_space_allocator;
  15902. +
  15903. +typedef struct flush_scan flush_scan;
  15904. +typedef struct flush_position flush_pos_t;
  15905. +
  15906. +typedef unsigned short pos_in_node_t;
  15907. +#define MAX_POS_IN_NODE 65535
  15908. +
  15909. +typedef struct jnode jnode;
  15910. +typedef struct reiser4_blocknr_hint reiser4_blocknr_hint;
  15911. +
  15912. +typedef struct uf_coord uf_coord_t;
  15913. +typedef struct hint hint_t;
  15914. +
  15915. +typedef struct ktxnmgrd_context ktxnmgrd_context;
  15916. +
  15917. +struct inode;
  15918. +struct page;
  15919. +struct file;
  15920. +struct dentry;
  15921. +struct super_block;
  15922. +
  15923. +/* return values of coord_by_key(). cbk == coord_by_key */
  15924. +typedef enum {
  15925. + CBK_COORD_FOUND = 0,
  15926. + CBK_COORD_NOTFOUND = -ENOENT,
  15927. +} lookup_result;
  15928. +
  15929. +/* results of lookup with directory file */
  15930. +typedef enum {
  15931. + FILE_NAME_FOUND = 0,
  15932. + FILE_NAME_NOTFOUND = -ENOENT,
  15933. + FILE_IO_ERROR = -EIO, /* FIXME: it seems silly to have special OOM,
  15934. + IO_ERROR return codes for each search. */
  15935. + FILE_OOM = -ENOMEM /* FIXME: it seems silly to have special OOM,
  15936. + IO_ERROR return codes for each search. */
  15937. +} file_lookup_result;
  15938. +
  15939. +/* behaviors of lookup. If coord we are looking for is actually in a tree,
  15940. + both coincide. */
  15941. +typedef enum {
  15942. + /* search exactly for the coord with key given */
  15943. + FIND_EXACT,
  15944. + /* search for coord with the maximal key not greater than one
  15945. + given */
  15946. + FIND_MAX_NOT_MORE_THAN /*LEFT_SLANT_BIAS */
  15947. +} lookup_bias;
  15948. +
  15949. +typedef enum {
  15950. + /* number of leaf level of the tree
  15951. + The fake root has (tree_level=0). */
  15952. + LEAF_LEVEL = 1,
  15953. +
  15954. + /* number of level one above leaf level of the tree.
  15955. +
  15956. + It is supposed that internal tree used by reiser4 to store file
  15957. + system data and meta data will have height 2 initially (when
  15958. + created by mkfs).
  15959. + */
  15960. + TWIG_LEVEL = 2,
  15961. +} tree_level;
  15962. +
  15963. +/* The "real" maximum ztree height is the 0-origin size of any per-level
  15964. + array, since the zero'th level is not used. */
  15965. +#define REAL_MAX_ZTREE_HEIGHT (REISER4_MAX_ZTREE_HEIGHT-LEAF_LEVEL)
  15966. +
  15967. +/* enumeration of possible mutual position of item and coord. This enum is
  15968. + return type of ->is_in_item() item plugin method which see. */
  15969. +typedef enum {
  15970. + /* coord is on the left of an item */
  15971. + IP_ON_THE_LEFT,
  15972. + /* coord is inside item */
  15973. + IP_INSIDE,
  15974. + /* coord is inside item, but to the right of the rightmost unit of
  15975. + this item */
  15976. + IP_RIGHT_EDGE,
  15977. + /* coord is on the right of an item */
  15978. + IP_ON_THE_RIGHT
  15979. +} interposition;
  15980. +
  15981. +/* type of lock to acquire on znode before returning it to caller */
  15982. +typedef enum {
  15983. + ZNODE_NO_LOCK = 0,
  15984. + ZNODE_READ_LOCK = 1,
  15985. + ZNODE_WRITE_LOCK = 2,
  15986. +} znode_lock_mode;
  15987. +
  15988. +/* type of lock request */
  15989. +typedef enum {
  15990. + ZNODE_LOCK_LOPRI = 0,
  15991. + ZNODE_LOCK_HIPRI = (1 << 0),
  15992. +
  15993. + /* By setting the ZNODE_LOCK_NONBLOCK flag in a lock request the call to
  15994. + longterm_lock_znode will not sleep waiting for the lock to become
  15995. + available. If the lock is unavailable, reiser4_znode_lock will
  15996. + immediately return the value -E_REPEAT. */
  15997. + ZNODE_LOCK_NONBLOCK = (1 << 1),
  15998. + /* An option for longterm_lock_znode which prevents atom fusion */
  15999. + ZNODE_LOCK_DONT_FUSE = (1 << 2)
  16000. +} znode_lock_request;
  16001. +
  16002. +typedef enum { READ_OP = 0, WRITE_OP = 1 } rw_op;
  16003. +
  16004. +/* used to specify direction of shift. These must be -1 and 1 */
  16005. +typedef enum {
  16006. + SHIFT_LEFT = 1,
  16007. + SHIFT_RIGHT = -1
  16008. +} shift_direction;
  16009. +
  16010. +typedef enum {
  16011. + LEFT_SIDE,
  16012. + RIGHT_SIDE
  16013. +} sideof;
  16014. +
  16015. +#define reiser4_round_up(value, order) \
  16016. + ((typeof(value))(((long) (value) + (order) - 1U) & \
  16017. + ~((order) - 1)))
  16018. +
  16019. +/* values returned by squalloc_right_neighbor and its auxiliary functions */
  16020. +typedef enum {
  16021. + /* unit of internal item is moved */
  16022. + SUBTREE_MOVED = 0,
  16023. + /* nothing else can be squeezed into left neighbor */
  16024. + SQUEEZE_TARGET_FULL = 1,
  16025. + /* all content of node is squeezed into its left neighbor */
  16026. + SQUEEZE_SOURCE_EMPTY = 2,
  16027. + /* one more item is copied (this is only returned by
  16028. + allocate_and_copy_extent to squalloc_twig)) */
  16029. + SQUEEZE_CONTINUE = 3
  16030. +} squeeze_result;
  16031. +
  16032. +/* Do not change items ids. If you do - there will be format change */
  16033. +typedef enum {
  16034. + STATIC_STAT_DATA_ID = 0x0,
  16035. + SIMPLE_DIR_ENTRY_ID = 0x1,
  16036. + COMPOUND_DIR_ID = 0x2,
  16037. + NODE_POINTER_ID = 0x3,
  16038. + EXTENT_POINTER_ID = 0x5,
  16039. + FORMATTING_ID = 0x6,
  16040. + CTAIL_ID = 0x7,
  16041. + BLACK_BOX_ID = 0x8,
  16042. + LAST_ITEM_ID = 0x9
  16043. +} item_id;
  16044. +
  16045. +/* Flags passed to jnode_flush() to allow it to distinguish default settings
  16046. + based on whether commit() was called or VM memory pressure was applied. */
  16047. +typedef enum {
  16048. + /* submit flush queue to disk at jnode_flush completion */
  16049. + JNODE_FLUSH_WRITE_BLOCKS = 1,
  16050. +
  16051. + /* flush is called for commit */
  16052. + JNODE_FLUSH_COMMIT = 2,
  16053. + /* not implemented */
  16054. + JNODE_FLUSH_MEMORY_FORMATTED = 4,
  16055. +
  16056. + /* not implemented */
  16057. + JNODE_FLUSH_MEMORY_UNFORMATTED = 8,
  16058. +} jnode_flush_flags;
  16059. +
  16060. +/* Flags to insert/paste carry operations. Currently they only used in
  16061. + flushing code, but in future, they can be used to optimize for repetitive
  16062. + accesses. */
  16063. +typedef enum {
  16064. + /* carry is not allowed to shift data to the left when trying to find
  16065. + free space */
  16066. + COPI_DONT_SHIFT_LEFT = (1 << 0),
  16067. + /* carry is not allowed to shift data to the right when trying to find
  16068. + free space */
  16069. + COPI_DONT_SHIFT_RIGHT = (1 << 1),
  16070. + /* carry is not allowed to allocate new node(s) when trying to find
  16071. + free space */
  16072. + COPI_DONT_ALLOCATE = (1 << 2),
  16073. + /* try to load left neighbor if its not in a cache */
  16074. + COPI_LOAD_LEFT = (1 << 3),
  16075. + /* try to load right neighbor if its not in a cache */
  16076. + COPI_LOAD_RIGHT = (1 << 4),
  16077. + /* shift insertion point to the left neighbor */
  16078. + COPI_GO_LEFT = (1 << 5),
  16079. + /* shift insertion point to the right neighbor */
  16080. + COPI_GO_RIGHT = (1 << 6),
  16081. + /* try to step back into original node if insertion into new node
  16082. + fails after shifting data there. */
  16083. + COPI_STEP_BACK = (1 << 7),
  16084. + /* use all possible space in the node */
  16085. + COPI_SWEEP = (1 << 8)
  16086. +} cop_insert_flag;
  16087. +
  16088. +typedef enum {
  16089. + SAFE_UNLINK, /* safe-link for unlink */
  16090. + SAFE_TRUNCATE /* safe-link for truncate */
  16091. +} reiser4_safe_link_t;
  16092. +
  16093. +/* this is to show on which list of atom jnode is */
  16094. +typedef enum {
  16095. + NOT_CAPTURED,
  16096. + DIRTY_LIST,
  16097. + CLEAN_LIST,
  16098. + FQ_LIST,
  16099. + WB_LIST,
  16100. + OVRWR_LIST
  16101. +} atom_list;
  16102. +
  16103. +/* __REISER4_FORWARD_H__ */
  16104. +#endif
  16105. +
  16106. +/* Make Linus happy.
  16107. + Local variables:
  16108. + c-indentation-style: "K&R"
  16109. + mode-name: "LC"
  16110. + c-basic-offset: 8
  16111. + tab-width: 8
  16112. + fill-column: 120
  16113. + End:
  16114. +*/
  16115. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/fsdata.c linux-5.16.14/fs/reiser4/fsdata.c
  16116. --- linux-5.16.14.orig/fs/reiser4/fsdata.c 1970-01-01 01:00:00.000000000 +0100
  16117. +++ linux-5.16.14/fs/reiser4/fsdata.c 2022-03-12 13:26:19.652892728 +0100
  16118. @@ -0,0 +1,801 @@
  16119. +/* Copyright 2001, 2002, 2003, 2004, 2005 by Hans Reiser, licensing governed by
  16120. + * reiser4/README */
  16121. +
  16122. +#include "fsdata.h"
  16123. +#include "inode.h"
  16124. +
  16125. +#include <linux/shrinker.h>
  16126. +
  16127. +/* cache or dir_cursors */
  16128. +static struct kmem_cache *d_cursor_cache;
  16129. +
  16130. +/* list of unused cursors */
  16131. +static LIST_HEAD(cursor_cache);
  16132. +
  16133. +/* number of cursors in list of ununsed cursors */
  16134. +static unsigned long d_cursor_unused = 0;
  16135. +
  16136. +/* spinlock protecting manipulations with dir_cursor's hash table and lists */
  16137. +DEFINE_SPINLOCK(d_c_lock);
  16138. +
  16139. +static reiser4_file_fsdata *create_fsdata(struct file *file);
  16140. +static int file_is_stateless(struct file *file);
  16141. +static void free_fsdata(reiser4_file_fsdata *fsdata);
  16142. +static void kill_cursor(dir_cursor *);
  16143. +
  16144. +static unsigned long d_cursor_shrink_scan(struct shrinker *shrink,
  16145. + struct shrink_control *sc)
  16146. +{
  16147. + dir_cursor *scan;
  16148. + unsigned long freed = 0;
  16149. +
  16150. + spin_lock(&d_c_lock);
  16151. + while (!list_empty(&cursor_cache) && sc->nr_to_scan) {
  16152. + scan = list_entry(cursor_cache.next, dir_cursor, alist);
  16153. + assert("nikita-3567", scan->ref == 0);
  16154. + kill_cursor(scan);
  16155. + freed++;
  16156. + sc->nr_to_scan--;
  16157. + }
  16158. + spin_unlock(&d_c_lock);
  16159. + return freed;
  16160. +}
  16161. +
  16162. +static unsigned long d_cursor_shrink_count (struct shrinker *shrink,
  16163. + struct shrink_control *sc)
  16164. +{
  16165. + return d_cursor_unused;
  16166. +}
  16167. +
  16168. +/*
  16169. + * actually, d_cursors are "priceless", because there is no way to
  16170. + * recover information stored in them. On the other hand, we don't
  16171. + * want to consume all kernel memory by them. As a compromise, just
  16172. + * assign higher "seeks" value to d_cursor cache, so that it will be
  16173. + * shrunk only if system is really tight on memory.
  16174. + */
  16175. +static struct shrinker d_cursor_shrinker = {
  16176. + .count_objects = d_cursor_shrink_count,
  16177. + .scan_objects = d_cursor_shrink_scan,
  16178. + .seeks = DEFAULT_SEEKS << 3
  16179. +};
  16180. +
  16181. +/**
  16182. + * reiser4_init_d_cursor - create d_cursor cache
  16183. + *
  16184. + * Initializes slab cache of d_cursors. It is part of reiser4 module
  16185. + * initialization.
  16186. + */
  16187. +int reiser4_init_d_cursor(void)
  16188. +{
  16189. + d_cursor_cache = kmem_cache_create("d_cursor", sizeof(dir_cursor), 0,
  16190. + SLAB_HWCACHE_ALIGN, NULL);
  16191. + if (d_cursor_cache == NULL)
  16192. + return RETERR(-ENOMEM);
  16193. +
  16194. + register_shrinker(&d_cursor_shrinker);
  16195. + return 0;
  16196. +}
  16197. +
  16198. +/**
  16199. + * reiser4_done_d_cursor - delete d_cursor cache and d_cursor shrinker
  16200. + *
  16201. + * This is called on reiser4 module unloading or system shutdown.
  16202. + */
  16203. +void reiser4_done_d_cursor(void)
  16204. +{
  16205. + unregister_shrinker(&d_cursor_shrinker);
  16206. +
  16207. + destroy_reiser4_cache(&d_cursor_cache);
  16208. +}
  16209. +
  16210. +#define D_CURSOR_TABLE_SIZE (256)
  16211. +
  16212. +static inline unsigned long
  16213. +d_cursor_hash(d_cursor_hash_table * table, const struct d_cursor_key *key)
  16214. +{
  16215. + assert("nikita-3555", IS_POW(D_CURSOR_TABLE_SIZE));
  16216. + return (key->oid + key->cid) & (D_CURSOR_TABLE_SIZE - 1);
  16217. +}
  16218. +
  16219. +static inline int d_cursor_eq(const struct d_cursor_key *k1,
  16220. + const struct d_cursor_key *k2)
  16221. +{
  16222. + return k1->cid == k2->cid && k1->oid == k2->oid;
  16223. +}
  16224. +
  16225. +/*
  16226. + * define functions to manipulate reiser4 super block's hash table of
  16227. + * dir_cursors
  16228. + */
  16229. +#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
  16230. +#define KFREE(ptr, size) kfree(ptr)
  16231. +TYPE_SAFE_HASH_DEFINE(d_cursor,
  16232. + dir_cursor,
  16233. + struct d_cursor_key,
  16234. + key, hash, d_cursor_hash, d_cursor_eq);
  16235. +#undef KFREE
  16236. +#undef KMALLOC
  16237. +
  16238. +/**
  16239. + * reiser4_init_super_d_info - initialize per-super-block d_cursor resources
  16240. + * @super: super block to initialize
  16241. + *
  16242. + * Initializes per-super-block d_cursor's hash table and radix tree. It is part
  16243. + * of mount.
  16244. + */
  16245. +int reiser4_init_super_d_info(struct super_block *super)
  16246. +{
  16247. + struct d_cursor_info *p;
  16248. +
  16249. + p = &get_super_private(super)->d_info;
  16250. +
  16251. + INIT_RADIX_TREE(&p->tree, reiser4_ctx_gfp_mask_get());
  16252. + return d_cursor_hash_init(&p->table, D_CURSOR_TABLE_SIZE);
  16253. +}
  16254. +
  16255. +/**
  16256. + * reiser4_done_super_d_info - release per-super-block d_cursor resources
  16257. + * @super: super block being umounted
  16258. + *
  16259. + * It is called on umount. Kills all directory cursors attached to suoer block.
  16260. + */
  16261. +void reiser4_done_super_d_info(struct super_block *super)
  16262. +{
  16263. + struct d_cursor_info *d_info;
  16264. + dir_cursor *cursor, *next;
  16265. +
  16266. + d_info = &get_super_private(super)->d_info;
  16267. + for_all_in_htable(&d_info->table, d_cursor, cursor, next)
  16268. + kill_cursor(cursor);
  16269. +
  16270. + BUG_ON(!radix_tree_empty(&d_info->tree));
  16271. + d_cursor_hash_done(&d_info->table);
  16272. +}
  16273. +
  16274. +/**
  16275. + * kill_cursor - free dir_cursor and reiser4_file_fsdata attached to it
  16276. + * @cursor: cursor to free
  16277. + *
  16278. + * Removes reiser4_file_fsdata attached to @cursor from readdir list of
  16279. + * reiser4_inode, frees that reiser4_file_fsdata. Removes @cursor from from
  16280. + * indices, hash table, list of unused cursors and frees it.
  16281. + */
  16282. +static void kill_cursor(dir_cursor *cursor)
  16283. +{
  16284. + unsigned long index;
  16285. +
  16286. + assert("nikita-3566", cursor->ref == 0);
  16287. + assert("nikita-3572", cursor->fsdata != NULL);
  16288. +
  16289. + index = (unsigned long)cursor->key.oid;
  16290. + list_del_init(&cursor->fsdata->dir.linkage);
  16291. + free_fsdata(cursor->fsdata);
  16292. + cursor->fsdata = NULL;
  16293. +
  16294. + if (list_empty_careful(&cursor->list))
  16295. + /* this is last cursor for a file. Kill radix-tree entry */
  16296. + radix_tree_delete(&cursor->info->tree, index);
  16297. + else {
  16298. + void **slot;
  16299. +
  16300. + /*
  16301. + * there are other cursors for the same oid.
  16302. + */
  16303. +
  16304. + /*
  16305. + * if radix tree point to the cursor being removed, re-target
  16306. + * radix tree slot to the next cursor in the (non-empty as was
  16307. + * checked above) element of the circular list of all cursors
  16308. + * for this oid.
  16309. + */
  16310. + slot = radix_tree_lookup_slot(&cursor->info->tree, index);
  16311. + assert("nikita-3571", *slot != NULL);
  16312. + if (*slot == cursor)
  16313. + *slot = list_entry(cursor->list.next, dir_cursor, list);
  16314. + /* remove cursor from circular list */
  16315. + list_del_init(&cursor->list);
  16316. + }
  16317. + /* remove cursor from the list of unused cursors */
  16318. + list_del_init(&cursor->alist);
  16319. + /* remove cursor from the hash table */
  16320. + d_cursor_hash_remove(&cursor->info->table, cursor);
  16321. + /* and free it */
  16322. + kmem_cache_free(d_cursor_cache, cursor);
  16323. + --d_cursor_unused;
  16324. +}
  16325. +
  16326. +/* possible actions that can be performed on all cursors for the given file */
  16327. +enum cursor_action {
  16328. + /*
  16329. + * load all detached state: this is called when stat-data is loaded
  16330. + * from the disk to recover information about all pending readdirs
  16331. + */
  16332. + CURSOR_LOAD,
  16333. + /*
  16334. + * detach all state from inode, leaving it in the cache. This is called
  16335. + * when inode is removed form the memory by memory pressure
  16336. + */
  16337. + CURSOR_DISPOSE,
  16338. + /*
  16339. + * detach cursors from the inode, and free them. This is called when
  16340. + * inode is destroyed
  16341. + */
  16342. + CURSOR_KILL
  16343. +};
  16344. +
  16345. +/*
  16346. + * return d_cursor data for the file system @inode is in.
  16347. + */
  16348. +static inline struct d_cursor_info *d_info(struct inode *inode)
  16349. +{
  16350. + return &get_super_private(inode->i_sb)->d_info;
  16351. +}
  16352. +
  16353. +/*
  16354. + * lookup d_cursor in the per-super-block radix tree.
  16355. + */
  16356. +static inline dir_cursor *lookup(struct d_cursor_info *info,
  16357. + unsigned long index)
  16358. +{
  16359. + return (dir_cursor *) radix_tree_lookup(&info->tree, index);
  16360. +}
  16361. +
  16362. +/*
  16363. + * attach @cursor to the radix tree. There may be multiple cursors for the
  16364. + * same oid, they are chained into circular list.
  16365. + */
  16366. +static void bind_cursor(dir_cursor * cursor, unsigned long index)
  16367. +{
  16368. + dir_cursor *head;
  16369. +
  16370. + head = lookup(cursor->info, index);
  16371. + if (head == NULL) {
  16372. + /* this is the first cursor for this index */
  16373. + INIT_LIST_HEAD(&cursor->list);
  16374. + radix_tree_insert(&cursor->info->tree, index, cursor);
  16375. + } else {
  16376. + /* some cursor already exists. Chain ours */
  16377. + list_add(&cursor->list, &head->list);
  16378. + }
  16379. +}
  16380. +
  16381. +/*
  16382. + * detach fsdata (if detachable) from file descriptor, and put cursor on the
  16383. + * "unused" list. Called when file descriptor is not longer in active use.
  16384. + */
  16385. +static void clean_fsdata(struct file *file)
  16386. +{
  16387. + dir_cursor *cursor;
  16388. + reiser4_file_fsdata *fsdata;
  16389. +
  16390. + assert("nikita-3570", file_is_stateless(file));
  16391. +
  16392. + fsdata = (reiser4_file_fsdata *) file->private_data;
  16393. + if (fsdata != NULL) {
  16394. + cursor = fsdata->cursor;
  16395. + if (cursor != NULL) {
  16396. + spin_lock(&d_c_lock);
  16397. + --cursor->ref;
  16398. + if (cursor->ref == 0) {
  16399. + list_add_tail(&cursor->alist, &cursor_cache);
  16400. + ++d_cursor_unused;
  16401. + }
  16402. + spin_unlock(&d_c_lock);
  16403. + file->private_data = NULL;
  16404. + }
  16405. + }
  16406. +}
  16407. +
  16408. +/*
  16409. + * global counter used to generate "client ids". These ids are encoded into
  16410. + * high bits of fpos.
  16411. + */
  16412. +static __u32 cid_counter = 0;
  16413. +#define CID_SHIFT (20)
  16414. +#define CID_MASK (0xfffffull)
  16415. +
  16416. +static void free_file_fsdata_nolock(struct file *);
  16417. +
  16418. +/**
  16419. + * insert_cursor - allocate file_fsdata, insert cursor to tree and hash table
  16420. + * @cursor:
  16421. + * @file:
  16422. + * @inode:
  16423. + *
  16424. + * Allocates reiser4_file_fsdata, attaches it to @cursor, inserts cursor to
  16425. + * reiser4 super block's hash table and radix tree.
  16426. + add detachable readdir
  16427. + * state to the @f
  16428. + */
  16429. +static int insert_cursor(dir_cursor *cursor, struct file *file, loff_t *fpos,
  16430. + struct inode *inode)
  16431. +{
  16432. + int result;
  16433. + reiser4_file_fsdata *fsdata;
  16434. +
  16435. + memset(cursor, 0, sizeof *cursor);
  16436. +
  16437. + /* this is either first call to readdir, or rewind. Anyway, create new
  16438. + * cursor. */
  16439. + fsdata = create_fsdata(NULL);
  16440. + if (fsdata != NULL) {
  16441. + result = radix_tree_preload(reiser4_ctx_gfp_mask_get());
  16442. + if (result == 0) {
  16443. + struct d_cursor_info *info;
  16444. + oid_t oid;
  16445. +
  16446. + info = d_info(inode);
  16447. + oid = get_inode_oid(inode);
  16448. + /* cid occupies higher 12 bits of f->f_pos. Don't
  16449. + * allow it to become negative: this confuses
  16450. + * nfsd_readdir() */
  16451. + cursor->key.cid = (++cid_counter) & 0x7ff;
  16452. + cursor->key.oid = oid;
  16453. + cursor->fsdata = fsdata;
  16454. + cursor->info = info;
  16455. + cursor->ref = 1;
  16456. +
  16457. + spin_lock_inode(inode);
  16458. + /* install cursor as @f's private_data, discarding old
  16459. + * one if necessary */
  16460. +#if REISER4_DEBUG
  16461. + if (file->private_data)
  16462. + warning("", "file has fsdata already");
  16463. +#endif
  16464. + clean_fsdata(file);
  16465. + free_file_fsdata_nolock(file);
  16466. + file->private_data = fsdata;
  16467. + fsdata->cursor = cursor;
  16468. + spin_unlock_inode(inode);
  16469. + spin_lock(&d_c_lock);
  16470. + /* insert cursor into hash table */
  16471. + d_cursor_hash_insert(&info->table, cursor);
  16472. + /* and chain it into radix-tree */
  16473. + bind_cursor(cursor, (unsigned long)oid);
  16474. + spin_unlock(&d_c_lock);
  16475. + radix_tree_preload_end();
  16476. + *fpos = ((__u64) cursor->key.cid) << CID_SHIFT;
  16477. + }
  16478. + } else
  16479. + result = RETERR(-ENOMEM);
  16480. + return result;
  16481. +}
  16482. +
  16483. +/**
  16484. + * process_cursors - do action on each cursor attached to inode
  16485. + * @inode:
  16486. + * @act: action to do
  16487. + *
  16488. + * Finds all cursors of @inode in reiser4's super block radix tree of cursors
  16489. + * and performs action specified by @act on each of cursors.
  16490. + */
  16491. +static void process_cursors(struct inode *inode, enum cursor_action act)
  16492. +{
  16493. + oid_t oid;
  16494. + dir_cursor *start;
  16495. + struct list_head *head;
  16496. + reiser4_context *ctx;
  16497. + struct d_cursor_info *info;
  16498. +
  16499. + /* this can be called by
  16500. + *
  16501. + * kswapd->...->prune_icache->..reiser4_destroy_inode
  16502. + *
  16503. + * without reiser4_context
  16504. + */
  16505. + ctx = reiser4_init_context(inode->i_sb);
  16506. + if (IS_ERR(ctx)) {
  16507. + warning("vs-23", "failed to init context");
  16508. + return;
  16509. + }
  16510. +
  16511. + assert("nikita-3558", inode != NULL);
  16512. +
  16513. + info = d_info(inode);
  16514. + oid = get_inode_oid(inode);
  16515. + spin_lock_inode(inode);
  16516. + head = get_readdir_list(inode);
  16517. + spin_lock(&d_c_lock);
  16518. + /* find any cursor for this oid: reference to it is hanging of radix
  16519. + * tree */
  16520. + start = lookup(info, (unsigned long)oid);
  16521. + if (start != NULL) {
  16522. + dir_cursor *scan;
  16523. + reiser4_file_fsdata *fsdata;
  16524. +
  16525. + /* process circular list of cursors for this oid */
  16526. + scan = start;
  16527. + do {
  16528. + dir_cursor *next;
  16529. +
  16530. + next = list_entry(scan->list.next, dir_cursor, list);
  16531. + fsdata = scan->fsdata;
  16532. + assert("nikita-3557", fsdata != NULL);
  16533. + if (scan->key.oid == oid) {
  16534. + switch (act) {
  16535. + case CURSOR_DISPOSE:
  16536. + list_del_init(&fsdata->dir.linkage);
  16537. + break;
  16538. + case CURSOR_LOAD:
  16539. + list_add(&fsdata->dir.linkage, head);
  16540. + break;
  16541. + case CURSOR_KILL:
  16542. + kill_cursor(scan);
  16543. + break;
  16544. + }
  16545. + }
  16546. + if (scan == next)
  16547. + /* last cursor was just killed */
  16548. + break;
  16549. + scan = next;
  16550. + } while (scan != start);
  16551. + }
  16552. + spin_unlock(&d_c_lock);
  16553. + /* check that we killed 'em all */
  16554. + assert("nikita-3568",
  16555. + ergo(act == CURSOR_KILL,
  16556. + list_empty_careful(get_readdir_list(inode))));
  16557. + assert("nikita-3569",
  16558. + ergo(act == CURSOR_KILL, lookup(info, oid) == NULL));
  16559. + spin_unlock_inode(inode);
  16560. + reiser4_exit_context(ctx);
  16561. +}
  16562. +
  16563. +/**
  16564. + * reiser4_dispose_cursors - removes cursors from inode's list
  16565. + * @inode: inode to dispose cursors of
  16566. + *
  16567. + * For each of cursors corresponding to @inode - removes reiser4_file_fsdata
  16568. + * attached to cursor from inode's readdir list. This is called when inode is
  16569. + * removed from the memory by memory pressure.
  16570. + */
  16571. +void reiser4_dispose_cursors(struct inode *inode)
  16572. +{
  16573. + process_cursors(inode, CURSOR_DISPOSE);
  16574. +}
  16575. +
  16576. +/**
  16577. + * reiser4_load_cursors - attach cursors to inode
  16578. + * @inode: inode to load cursors to
  16579. + *
  16580. + * For each of cursors corresponding to @inode - attaches reiser4_file_fsdata
  16581. + * attached to cursor to inode's readdir list. This is done when inode is
  16582. + * loaded into memory.
  16583. + */
  16584. +void reiser4_load_cursors(struct inode *inode)
  16585. +{
  16586. + process_cursors(inode, CURSOR_LOAD);
  16587. +}
  16588. +
  16589. +/**
  16590. + * reiser4_kill_cursors - kill all inode cursors
  16591. + * @inode: inode to kill cursors of
  16592. + *
  16593. + * Frees all cursors for this inode. This is called when inode is destroyed.
  16594. + */
  16595. +void reiser4_kill_cursors(struct inode *inode)
  16596. +{
  16597. + process_cursors(inode, CURSOR_KILL);
  16598. +}
  16599. +
  16600. +/**
  16601. + * file_is_stateless -
  16602. + * @file:
  16603. + *
  16604. + * true, if file descriptor @f is created by NFS server by "demand" to serve
  16605. + * one file system operation. This means that there may be "detached state"
  16606. + * for underlying inode.
  16607. + */
  16608. +static int file_is_stateless(struct file *file)
  16609. +{
  16610. + return reiser4_get_dentry_fsdata(file->f_path.dentry)->stateless;
  16611. +}
  16612. +
  16613. +/**
  16614. + * reiser4_get_dir_fpos -
  16615. + * @dir:
  16616. + * @fpos: effective value of dir->f_pos
  16617. + *
  16618. + * Calculates ->fpos from user-supplied cookie. Normally it is dir->f_pos, but
  16619. + * in the case of stateless directory operation (readdir-over-nfs), client id
  16620. + * was encoded in the high bits of cookie and should me masked off.
  16621. + */
  16622. +loff_t reiser4_get_dir_fpos(struct file *dir, loff_t fpos)
  16623. +{
  16624. + if (file_is_stateless(dir))
  16625. + return fpos & CID_MASK;
  16626. + else
  16627. + return fpos;
  16628. +}
  16629. +
  16630. +/**
  16631. + * reiser4_attach_fsdata - try to attach fsdata
  16632. + * @file:
  16633. + * @fpos: effective value of @file->f_pos
  16634. + * @inode:
  16635. + *
  16636. + * Finds or creates cursor for readdir-over-nfs.
  16637. + */
  16638. +int reiser4_attach_fsdata(struct file *file, loff_t *fpos, struct inode *inode)
  16639. +{
  16640. + loff_t pos;
  16641. + int result;
  16642. + dir_cursor *cursor;
  16643. +
  16644. + /*
  16645. + * we are serialized by inode->i_mutex
  16646. + */
  16647. + if (!file_is_stateless(file))
  16648. + return 0;
  16649. +
  16650. + pos = *fpos;
  16651. + result = 0;
  16652. + if (pos == 0) {
  16653. + /*
  16654. + * first call to readdir (or rewind to the beginning of
  16655. + * directory)
  16656. + */
  16657. + cursor = kmem_cache_alloc(d_cursor_cache,
  16658. + reiser4_ctx_gfp_mask_get());
  16659. + if (cursor != NULL)
  16660. + result = insert_cursor(cursor, file, fpos, inode);
  16661. + else
  16662. + result = RETERR(-ENOMEM);
  16663. + } else {
  16664. + /* try to find existing cursor */
  16665. + struct d_cursor_key key;
  16666. +
  16667. + key.cid = pos >> CID_SHIFT;
  16668. + key.oid = get_inode_oid(inode);
  16669. + spin_lock(&d_c_lock);
  16670. + cursor = d_cursor_hash_find(&d_info(inode)->table, &key);
  16671. + if (cursor != NULL) {
  16672. + /* cursor was found */
  16673. + if (cursor->ref == 0) {
  16674. + /* move it from unused list */
  16675. + list_del_init(&cursor->alist);
  16676. + --d_cursor_unused;
  16677. + }
  16678. + ++cursor->ref;
  16679. + }
  16680. + spin_unlock(&d_c_lock);
  16681. + if (cursor != NULL) {
  16682. + spin_lock_inode(inode);
  16683. + assert("nikita-3556", cursor->fsdata->back == NULL);
  16684. + clean_fsdata(file);
  16685. + free_file_fsdata_nolock(file);
  16686. + file->private_data = cursor->fsdata;
  16687. + spin_unlock_inode(inode);
  16688. + }
  16689. + }
  16690. + return result;
  16691. +}
  16692. +
  16693. +/**
  16694. + * reiser4_detach_fsdata - ???
  16695. + * @file:
  16696. + *
  16697. + * detach fsdata, if necessary
  16698. + */
  16699. +void reiser4_detach_fsdata(struct file *file)
  16700. +{
  16701. + struct inode *inode;
  16702. +
  16703. + if (!file_is_stateless(file))
  16704. + return;
  16705. +
  16706. + inode = file_inode(file);
  16707. + spin_lock_inode(inode);
  16708. + clean_fsdata(file);
  16709. + spin_unlock_inode(inode);
  16710. +}
  16711. +
  16712. +/* slab for reiser4_dentry_fsdata */
  16713. +static struct kmem_cache *dentry_fsdata_cache;
  16714. +
  16715. +/**
  16716. + * reiser4_init_dentry_fsdata - create cache of dentry_fsdata
  16717. + *
  16718. + * Initializes slab cache of structures attached to denty->d_fsdata. It is
  16719. + * part of reiser4 module initialization.
  16720. + */
  16721. +int reiser4_init_dentry_fsdata(void)
  16722. +{
  16723. + dentry_fsdata_cache = kmem_cache_create("dentry_fsdata",
  16724. + sizeof(struct reiser4_dentry_fsdata),
  16725. + 0,
  16726. + SLAB_HWCACHE_ALIGN |
  16727. + SLAB_RECLAIM_ACCOUNT,
  16728. + NULL);
  16729. + if (dentry_fsdata_cache == NULL)
  16730. + return RETERR(-ENOMEM);
  16731. + return 0;
  16732. +}
  16733. +
  16734. +/**
  16735. + * reiser4_done_dentry_fsdata - delete cache of dentry_fsdata
  16736. + *
  16737. + * This is called on reiser4 module unloading or system shutdown.
  16738. + */
  16739. +void reiser4_done_dentry_fsdata(void)
  16740. +{
  16741. + destroy_reiser4_cache(&dentry_fsdata_cache);
  16742. +}
  16743. +
  16744. +/**
  16745. + * reiser4_get_dentry_fsdata - get fs-specific dentry data
  16746. + * @dentry: queried dentry
  16747. + *
  16748. + * Allocates if necessary and returns per-dentry data that we attach to each
  16749. + * dentry.
  16750. + */
  16751. +struct reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *dentry)
  16752. +{
  16753. + assert("nikita-1365", dentry != NULL);
  16754. +
  16755. + if (dentry->d_fsdata == NULL) {
  16756. + dentry->d_fsdata = kmem_cache_alloc(dentry_fsdata_cache,
  16757. + reiser4_ctx_gfp_mask_get());
  16758. + if (dentry->d_fsdata == NULL)
  16759. + return ERR_PTR(RETERR(-ENOMEM));
  16760. + memset(dentry->d_fsdata, 0,
  16761. + sizeof(struct reiser4_dentry_fsdata));
  16762. + }
  16763. + return dentry->d_fsdata;
  16764. +}
  16765. +
  16766. +/**
  16767. + * reiser4_free_dentry_fsdata - detach and free dentry_fsdata
  16768. + * @dentry: dentry to free fsdata of
  16769. + *
  16770. + * Detaches and frees fs-specific dentry data
  16771. + */
  16772. +void reiser4_free_dentry_fsdata(struct dentry *dentry)
  16773. +{
  16774. + if (dentry->d_fsdata != NULL) {
  16775. + kmem_cache_free(dentry_fsdata_cache, dentry->d_fsdata);
  16776. + dentry->d_fsdata = NULL;
  16777. + }
  16778. +}
  16779. +
  16780. +/* slab for reiser4_file_fsdata */
  16781. +static struct kmem_cache *file_fsdata_cache;
  16782. +
  16783. +/**
  16784. + * reiser4_init_file_fsdata - create cache of reiser4_file_fsdata
  16785. + *
  16786. + * Initializes slab cache of structures attached to file->private_data. It is
  16787. + * part of reiser4 module initialization.
  16788. + */
  16789. +int reiser4_init_file_fsdata(void)
  16790. +{
  16791. + file_fsdata_cache = kmem_cache_create("file_fsdata",
  16792. + sizeof(reiser4_file_fsdata),
  16793. + 0,
  16794. + SLAB_HWCACHE_ALIGN |
  16795. + SLAB_RECLAIM_ACCOUNT, NULL);
  16796. + if (file_fsdata_cache == NULL)
  16797. + return RETERR(-ENOMEM);
  16798. + return 0;
  16799. +}
  16800. +
  16801. +/**
  16802. + * reiser4_done_file_fsdata - delete cache of reiser4_file_fsdata
  16803. + *
  16804. + * This is called on reiser4 module unloading or system shutdown.
  16805. + */
  16806. +void reiser4_done_file_fsdata(void)
  16807. +{
  16808. + destroy_reiser4_cache(&file_fsdata_cache);
  16809. +}
  16810. +
  16811. +/**
  16812. + * create_fsdata - allocate and initialize reiser4_file_fsdata
  16813. + * @file: what to create file_fsdata for, may be NULL
  16814. + *
  16815. + * Allocates and initializes reiser4_file_fsdata structure.
  16816. + */
  16817. +static reiser4_file_fsdata *create_fsdata(struct file *file)
  16818. +{
  16819. + reiser4_file_fsdata *fsdata;
  16820. +
  16821. + fsdata = kmem_cache_alloc(file_fsdata_cache,
  16822. + reiser4_ctx_gfp_mask_get());
  16823. + if (fsdata != NULL) {
  16824. + memset(fsdata, 0, sizeof *fsdata);
  16825. + fsdata->back = file;
  16826. + INIT_LIST_HEAD(&fsdata->dir.linkage);
  16827. + }
  16828. + return fsdata;
  16829. +}
  16830. +
  16831. +/**
  16832. + * free_fsdata - free reiser4_file_fsdata
  16833. + * @fsdata: object to free
  16834. + *
  16835. + * Dual to create_fsdata(). Free reiser4_file_fsdata.
  16836. + */
  16837. +static void free_fsdata(reiser4_file_fsdata *fsdata)
  16838. +{
  16839. + BUG_ON(fsdata == NULL);
  16840. + kmem_cache_free(file_fsdata_cache, fsdata);
  16841. +}
  16842. +
  16843. +/**
  16844. + * reiser4_get_file_fsdata - get fs-specific file data
  16845. + * @file: queried file
  16846. + *
  16847. + * Returns fs-specific data of @file. If it is NULL, allocates it and attaches
  16848. + * to @file.
  16849. + */
  16850. +reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *file)
  16851. +{
  16852. + assert("nikita-1603", file != NULL);
  16853. +
  16854. + if (file->private_data == NULL) {
  16855. + reiser4_file_fsdata *fsdata;
  16856. + struct inode *inode;
  16857. +
  16858. + fsdata = create_fsdata(file);
  16859. + if (fsdata == NULL)
  16860. + return ERR_PTR(RETERR(-ENOMEM));
  16861. +
  16862. + inode = file_inode(file);
  16863. + spin_lock_inode(inode);
  16864. + if (file->private_data == NULL) {
  16865. + file->private_data = fsdata;
  16866. + fsdata = NULL;
  16867. + }
  16868. + spin_unlock_inode(inode);
  16869. + if (fsdata != NULL)
  16870. + /* other thread initialized ->fsdata */
  16871. + kmem_cache_free(file_fsdata_cache, fsdata);
  16872. + }
  16873. + assert("nikita-2665", file->private_data != NULL);
  16874. + return file->private_data;
  16875. +}
  16876. +
  16877. +/**
  16878. + * free_file_fsdata_nolock - detach and free reiser4_file_fsdata
  16879. + * @file:
  16880. + *
  16881. + * Detaches reiser4_file_fsdata from @file, removes reiser4_file_fsdata from
  16882. + * readdir list, frees if it is not linked to d_cursor object.
  16883. + */
  16884. +static void free_file_fsdata_nolock(struct file *file)
  16885. +{
  16886. + reiser4_file_fsdata *fsdata;
  16887. +
  16888. + assert("", spin_inode_is_locked(file_inode(file)));
  16889. + fsdata = file->private_data;
  16890. + if (fsdata != NULL) {
  16891. + list_del_init(&fsdata->dir.linkage);
  16892. + if (fsdata->cursor == NULL)
  16893. + free_fsdata(fsdata);
  16894. + }
  16895. + file->private_data = NULL;
  16896. +}
  16897. +
  16898. +/**
  16899. + * reiser4_free_file_fsdata - detach from struct file and free reiser4_file_fsdata
  16900. + * @file:
  16901. + *
  16902. + * Spinlocks inode and calls free_file_fsdata_nolock to do the work.
  16903. + */
  16904. +void reiser4_free_file_fsdata(struct file *file)
  16905. +{
  16906. + spin_lock_inode(file_inode(file));
  16907. + free_file_fsdata_nolock(file);
  16908. + spin_unlock_inode(file_inode(file));
  16909. +}
  16910. +
  16911. +/*
  16912. + * Local variables:
  16913. + * c-indentation-style: "K&R"
  16914. + * mode-name: "LC"
  16915. + * c-basic-offset: 8
  16916. + * tab-width: 8
  16917. + * fill-column: 79
  16918. + * End:
  16919. + */
  16920. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/fsdata.h linux-5.16.14/fs/reiser4/fsdata.h
  16921. --- linux-5.16.14.orig/fs/reiser4/fsdata.h 1970-01-01 01:00:00.000000000 +0100
  16922. +++ linux-5.16.14/fs/reiser4/fsdata.h 2022-03-12 13:26:19.652892728 +0100
  16923. @@ -0,0 +1,203 @@
  16924. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  16925. + * reiser4/README */
  16926. +
  16927. +#if !defined(__REISER4_FSDATA_H__)
  16928. +#define __REISER4_FSDATA_H__
  16929. +
  16930. +#include "debug.h"
  16931. +#include "kassign.h"
  16932. +#include "seal.h"
  16933. +#include "type_safe_hash.h"
  16934. +#include "plugin/file/file.h"
  16935. +#include "readahead.h"
  16936. +
  16937. +/*
  16938. + * comment about reiser4_dentry_fsdata
  16939. + *
  16940. + *
  16941. + */
  16942. +
  16943. +/*
  16944. + * locking: fields of per file descriptor readdir_pos and ->f_pos are
  16945. + * protected by ->i_mutex on inode. Under this lock following invariant
  16946. + * holds:
  16947. + *
  16948. + * file descriptor is "looking" at the entry_no-th directory entry from
  16949. + * the beginning of directory. This entry has key dir_entry_key and is
  16950. + * pos-th entry with duplicate-key sequence.
  16951. + *
  16952. + */
  16953. +
  16954. +/* logical position within directory */
  16955. +struct dir_pos {
  16956. + /* key of directory entry (actually, part of a key sufficient to
  16957. + identify directory entry) */
  16958. + de_id dir_entry_key;
  16959. + /* ordinal number of directory entry among all entries with the same
  16960. + key. (Starting from 0.) */
  16961. + unsigned pos;
  16962. +};
  16963. +
  16964. +struct readdir_pos {
  16965. + /* f_pos corresponding to this readdir position */
  16966. + __u64 fpos;
  16967. + /* logical position within directory */
  16968. + struct dir_pos position;
  16969. + /* logical number of directory entry within
  16970. + directory */
  16971. + __u64 entry_no;
  16972. +};
  16973. +
  16974. +/*
  16975. + * this is used to speed up lookups for directory entry: on initial call to
  16976. + * ->lookup() seal and coord of directory entry (if found, that is) are stored
  16977. + * in struct dentry and reused later to avoid tree traversals.
  16978. + */
  16979. +struct de_location {
  16980. + /* seal covering directory entry */
  16981. + seal_t entry_seal;
  16982. + /* coord of directory entry */
  16983. + coord_t entry_coord;
  16984. + /* ordinal number of directory entry among all entries with the same
  16985. + key. (Starting from 0.) */
  16986. + int pos;
  16987. +};
  16988. +
  16989. +/**
  16990. + * reiser4_dentry_fsdata - reiser4-specific data attached to dentries
  16991. + *
  16992. + * This is allocated dynamically and released in d_op->d_release()
  16993. + *
  16994. + * Currently it only contains cached location (hint) of directory entry, but
  16995. + * it is expected that other information will be accumulated here.
  16996. + */
  16997. +struct reiser4_dentry_fsdata {
  16998. + /*
  16999. + * here will go fields filled by ->lookup() to speedup next
  17000. + * create/unlink, like blocknr of znode with stat-data, or key of
  17001. + * stat-data.
  17002. + */
  17003. + struct de_location dec;
  17004. + int stateless; /* created through reiser4_decode_fh, needs
  17005. + * special treatment in readdir. */
  17006. +};
  17007. +
  17008. +extern int reiser4_init_dentry_fsdata(void);
  17009. +extern void reiser4_done_dentry_fsdata(void);
  17010. +extern struct reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *);
  17011. +extern void reiser4_free_dentry_fsdata(struct dentry *dentry);
  17012. +
  17013. +/**
  17014. + * reiser4_file_fsdata - reiser4-specific data attached to file->private_data
  17015. + *
  17016. + * This is allocated dynamically and released in inode->i_fop->release
  17017. + */
  17018. +typedef struct reiser4_file_fsdata {
  17019. + /*
  17020. + * pointer back to the struct file which this reiser4_file_fsdata is
  17021. + * part of
  17022. + */
  17023. + struct file *back;
  17024. + /* detached cursor for stateless readdir. */
  17025. + struct dir_cursor *cursor;
  17026. + /*
  17027. + * We need both directory and regular file parts here, because there
  17028. + * are file system objects that are files and directories.
  17029. + */
  17030. + struct {
  17031. + /*
  17032. + * position in directory. It is updated each time directory is
  17033. + * modified
  17034. + */
  17035. + struct readdir_pos readdir;
  17036. + /* head of this list is reiser4_inode->lists.readdir_list */
  17037. + struct list_head linkage;
  17038. + } dir;
  17039. + /* hints to speed up operations with regular files: read and write. */
  17040. + struct {
  17041. + hint_t hint;
  17042. + } reg;
  17043. +} reiser4_file_fsdata;
  17044. +
  17045. +extern int reiser4_init_file_fsdata(void);
  17046. +extern void reiser4_done_file_fsdata(void);
  17047. +extern reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *);
  17048. +extern void reiser4_free_file_fsdata(struct file *);
  17049. +
  17050. +/*
  17051. + * d_cursor is reiser4_file_fsdata not attached to struct file. d_cursors are
  17052. + * used to address problem reiser4 has with readdir accesses via NFS. See
  17053. + * plugin/file_ops_readdir.c for more details.
  17054. + */
  17055. +struct d_cursor_key{
  17056. + __u16 cid;
  17057. + __u64 oid;
  17058. +};
  17059. +
  17060. +/*
  17061. + * define structures d_cursor_hash_table d_cursor_hash_link which are used to
  17062. + * maintain hash table of dir_cursor-s in reiser4's super block
  17063. + */
  17064. +typedef struct dir_cursor dir_cursor;
  17065. +TYPE_SAFE_HASH_DECLARE(d_cursor, dir_cursor);
  17066. +
  17067. +struct dir_cursor {
  17068. + int ref;
  17069. + reiser4_file_fsdata *fsdata;
  17070. +
  17071. + /* link to reiser4 super block hash table of cursors */
  17072. + d_cursor_hash_link hash;
  17073. +
  17074. + /*
  17075. + * this is to link cursors to reiser4 super block's radix tree of
  17076. + * cursors if there are more than one cursor of the same objectid
  17077. + */
  17078. + struct list_head list;
  17079. + struct d_cursor_key key;
  17080. + struct d_cursor_info *info;
  17081. + /* list of unused cursors */
  17082. + struct list_head alist;
  17083. +};
  17084. +
  17085. +extern int reiser4_init_d_cursor(void);
  17086. +extern void reiser4_done_d_cursor(void);
  17087. +
  17088. +extern int reiser4_init_super_d_info(struct super_block *);
  17089. +extern void reiser4_done_super_d_info(struct super_block *);
  17090. +
  17091. +extern loff_t reiser4_get_dir_fpos(struct file *, loff_t);
  17092. +extern int reiser4_attach_fsdata(struct file *, loff_t *, struct inode *);
  17093. +extern void reiser4_detach_fsdata(struct file *);
  17094. +
  17095. +/* these are needed for "stateless" readdir. See plugin/file_ops_readdir.c for
  17096. + more details */
  17097. +void reiser4_dispose_cursors(struct inode *inode);
  17098. +void reiser4_load_cursors(struct inode *inode);
  17099. +void reiser4_kill_cursors(struct inode *inode);
  17100. +void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de,
  17101. + int offset, int adj);
  17102. +
  17103. +/*
  17104. + * this structure is embedded to reise4_super_info_data. It maintains d_cursors
  17105. + * (detached readdir state). See plugin/file_ops_readdir.c for more details.
  17106. + */
  17107. +struct d_cursor_info {
  17108. + d_cursor_hash_table table;
  17109. + struct radix_tree_root tree;
  17110. +};
  17111. +
  17112. +/* spinlock protecting readdir cursors */
  17113. +extern spinlock_t d_c_lock;
  17114. +
  17115. +/* __REISER4_FSDATA_H__ */
  17116. +#endif
  17117. +
  17118. +/*
  17119. + * Local variables:
  17120. + * c-indentation-style: "K&R"
  17121. + * mode-name: "LC"
  17122. + * c-basic-offset: 8
  17123. + * tab-width: 8
  17124. + * fill-column: 120
  17125. + * End:
  17126. + */
  17127. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/init_super.c linux-5.16.14/fs/reiser4/init_super.c
  17128. --- linux-5.16.14.orig/fs/reiser4/init_super.c 1970-01-01 01:00:00.000000000 +0100
  17129. +++ linux-5.16.14/fs/reiser4/init_super.c 2022-03-12 13:26:19.653892731 +0100
  17130. @@ -0,0 +1,806 @@
  17131. +/* Copyright by Hans Reiser, 2003 */
  17132. +
  17133. +#include "super.h"
  17134. +#include "inode.h"
  17135. +#include "plugin/plugin_set.h"
  17136. +
  17137. +#include <linux/swap.h>
  17138. +
  17139. +/**
  17140. + * init_fs_info - allocate reiser4 specific super block
  17141. + * @super: super block of filesystem
  17142. + *
  17143. + * Allocates and initialize reiser4_super_info_data, attaches it to
  17144. + * super->s_fs_info, initializes structures maintaining d_cursor-s.
  17145. + */
  17146. +int reiser4_init_fs_info(struct super_block *super)
  17147. +{
  17148. + reiser4_super_info_data *sbinfo;
  17149. +
  17150. + sbinfo = kzalloc(sizeof(reiser4_super_info_data),
  17151. + reiser4_ctx_gfp_mask_get());
  17152. + if (!sbinfo)
  17153. + return RETERR(-ENOMEM);
  17154. +
  17155. + super->s_fs_info = sbinfo;
  17156. + super->s_op = NULL;
  17157. +
  17158. + ON_DEBUG(INIT_LIST_HEAD(&sbinfo->all_jnodes));
  17159. + ON_DEBUG(spin_lock_init(&sbinfo->all_guard));
  17160. +
  17161. + mutex_init(&sbinfo->delete_mutex);
  17162. + spin_lock_init(&(sbinfo->guard));
  17163. +
  17164. + /* initialize per-super-block d_cursor resources */
  17165. + reiser4_init_super_d_info(super);
  17166. +
  17167. + return 0;
  17168. +}
  17169. +
  17170. +/**
  17171. + * Release reiser4 specific super block
  17172. + *
  17173. + * release per-super-block d_cursor resources
  17174. + * free reiser4_super_info_data.
  17175. + */
  17176. +void reiser4_done_fs_info(struct super_block *super)
  17177. +{
  17178. + assert("zam-990", super->s_fs_info != NULL);
  17179. +
  17180. + reiser4_done_super_d_info(super);
  17181. + kfree(super->s_fs_info);
  17182. + super->s_fs_info = NULL;
  17183. +}
  17184. +
  17185. +/* type of option parseable by parse_option() */
  17186. +typedef enum {
  17187. + /* value of option is arbitrary string */
  17188. + OPT_STRING,
  17189. +
  17190. + /*
  17191. + * option specifies bit in a bitmask. When option is set - bit in
  17192. + * sbinfo->fs_flags is set. Examples are bsdgroups, 32bittimes, mtflush,
  17193. + * dont_load_bitmap, atomic_write.
  17194. + */
  17195. + OPT_BIT,
  17196. +
  17197. + /*
  17198. + * value of option should conform to sprintf() format. Examples are
  17199. + * tmgr.atom_max_size=N, tmgr.atom_max_age=N
  17200. + */
  17201. + OPT_FORMAT,
  17202. +
  17203. + /*
  17204. + * option can take one of predefined values. Example is onerror=panic or
  17205. + * onerror=remount-ro
  17206. + */
  17207. + OPT_ONEOF,
  17208. +
  17209. + /*
  17210. + * option take one of txmod plugin labels.
  17211. + * Example is "txmod=journal" or "txmod=wa"
  17212. + */
  17213. + OPT_TXMOD,
  17214. +} opt_type_t;
  17215. +
  17216. +#if 0
  17217. +struct opt_bitmask_bit {
  17218. + const char *bit_name;
  17219. + int bit_nr;
  17220. +};
  17221. +#endif
  17222. +
  17223. +#define MAX_ONEOF_LIST 10
  17224. +
  17225. +/* description of option parseable by parse_option() */
  17226. +struct opt_desc {
  17227. + /* option name.
  17228. +
  17229. + parsed portion of string has a form "name=value".
  17230. + */
  17231. + const char *name;
  17232. + /* type of option */
  17233. + opt_type_t type;
  17234. + union {
  17235. + /* where to store value of string option (type == OPT_STRING) */
  17236. + char **string;
  17237. + /* description of bits for bit option (type == OPT_BIT) */
  17238. + struct {
  17239. + int nr;
  17240. + void *addr;
  17241. + } bit;
  17242. + /* description of format and targets for format option (type
  17243. + == OPT_FORMAT) */
  17244. + struct {
  17245. + const char *format;
  17246. + int nr_args;
  17247. + void *arg1;
  17248. + void *arg2;
  17249. + void *arg3;
  17250. + void *arg4;
  17251. + } f;
  17252. + struct {
  17253. + int *result;
  17254. + const char *list[MAX_ONEOF_LIST];
  17255. + } oneof;
  17256. + struct {
  17257. + reiser4_txmod_id *result;
  17258. + } txmod;
  17259. + struct {
  17260. + void *addr;
  17261. + int nr_bits;
  17262. + /* struct opt_bitmask_bit *bits; */
  17263. + } bitmask;
  17264. + } u;
  17265. +};
  17266. +
  17267. +/**
  17268. + * parse_option - parse one option
  17269. + * @opt_strin: starting point of parsing
  17270. + * @opt: option description
  17271. + *
  17272. + * foo=bar,
  17273. + * ^ ^ ^
  17274. + * | | +-- replaced to '\0'
  17275. + * | +-- val_start
  17276. + * +-- opt_string
  17277. + * Figures out option type and handles option correspondingly.
  17278. + */
  17279. +static int parse_option(char *opt_string, struct opt_desc *opt)
  17280. +{
  17281. + char *val_start;
  17282. + int result;
  17283. + const char *err_msg;
  17284. +
  17285. + /* NOTE-NIKITA think about using lib/cmdline.c functions here. */
  17286. +
  17287. + val_start = strchr(opt_string, '=');
  17288. + if (val_start != NULL) {
  17289. + *val_start = '\0';
  17290. + ++val_start;
  17291. + }
  17292. +
  17293. + err_msg = NULL;
  17294. + result = 0;
  17295. + switch (opt->type) {
  17296. + case OPT_STRING:
  17297. + if (val_start == NULL) {
  17298. + err_msg = "String arg missing";
  17299. + result = RETERR(-EINVAL);
  17300. + } else
  17301. + *opt->u.string = val_start;
  17302. + break;
  17303. + case OPT_BIT:
  17304. + if (val_start != NULL)
  17305. + err_msg = "Value ignored";
  17306. + else
  17307. + set_bit(opt->u.bit.nr, opt->u.bit.addr);
  17308. + break;
  17309. + case OPT_FORMAT:
  17310. + if (val_start == NULL) {
  17311. + err_msg = "Formatted arg missing";
  17312. + result = RETERR(-EINVAL);
  17313. + break;
  17314. + }
  17315. + if (sscanf(val_start, opt->u.f.format,
  17316. + opt->u.f.arg1, opt->u.f.arg2, opt->u.f.arg3,
  17317. + opt->u.f.arg4) != opt->u.f.nr_args) {
  17318. + err_msg = "Wrong conversion";
  17319. + result = RETERR(-EINVAL);
  17320. + }
  17321. + break;
  17322. + case OPT_ONEOF:
  17323. + {
  17324. + int i = 0;
  17325. +
  17326. + if (val_start == NULL) {
  17327. + err_msg = "Value is missing";
  17328. + result = RETERR(-EINVAL);
  17329. + break;
  17330. + }
  17331. + err_msg = "Wrong option value";
  17332. + result = RETERR(-EINVAL);
  17333. + while (opt->u.oneof.list[i]) {
  17334. + if (!strcmp(opt->u.oneof.list[i], val_start)) {
  17335. + result = 0;
  17336. + err_msg = NULL;
  17337. + *opt->u.oneof.result = i;
  17338. + break;
  17339. + }
  17340. + i++;
  17341. + }
  17342. + break;
  17343. + }
  17344. + break;
  17345. + case OPT_TXMOD:
  17346. + {
  17347. + reiser4_txmod_id i = 0;
  17348. +
  17349. + if (val_start == NULL) {
  17350. + err_msg = "Value is missing";
  17351. + result = RETERR(-EINVAL);
  17352. + break;
  17353. + }
  17354. + err_msg = "Wrong option value";
  17355. + result = RETERR(-EINVAL);
  17356. + while (i < LAST_TXMOD_ID) {
  17357. + if (!strcmp(txmod_plugins[i].h.label,
  17358. + val_start)) {
  17359. + result = 0;
  17360. + err_msg = NULL;
  17361. + *opt->u.txmod.result = i;
  17362. + break;
  17363. + }
  17364. + i++;
  17365. + }
  17366. + break;
  17367. + }
  17368. + default:
  17369. + wrong_return_value("nikita-2100", "opt -> type");
  17370. + break;
  17371. + }
  17372. + if (err_msg != NULL) {
  17373. + warning("nikita-2496", "%s when parsing option \"%s%s%s\"",
  17374. + err_msg, opt->name, val_start ? "=" : "",
  17375. + val_start ? : "");
  17376. + }
  17377. + return result;
  17378. +}
  17379. +
  17380. +/**
  17381. + * parse_options - parse reiser4 mount options
  17382. + * @opt_string: starting point
  17383. + * @opts: array of option description
  17384. + * @nr_opts: number of elements in @opts
  17385. + *
  17386. + * Parses comma separated list of reiser4 mount options.
  17387. + */
  17388. +static int parse_options(char *opt_string, struct opt_desc *opts, int nr_opts)
  17389. +{
  17390. + int result;
  17391. +
  17392. + result = 0;
  17393. + while ((result == 0) && opt_string && *opt_string) {
  17394. + int j;
  17395. + char *next;
  17396. +
  17397. + next = strchr(opt_string, ',');
  17398. + if (next != NULL) {
  17399. + *next = '\0';
  17400. + ++next;
  17401. + }
  17402. + for (j = 0; j < nr_opts; ++j) {
  17403. + if (!strncmp(opt_string, opts[j].name,
  17404. + strlen(opts[j].name))) {
  17405. + result = parse_option(opt_string, &opts[j]);
  17406. + break;
  17407. + }
  17408. + }
  17409. + if (j == nr_opts) {
  17410. + warning("nikita-2307", "Unrecognized option: \"%s\"",
  17411. + opt_string);
  17412. + /* traditionally, -EINVAL is returned on wrong mount
  17413. + option */
  17414. + result = RETERR(-EINVAL);
  17415. + }
  17416. + opt_string = next;
  17417. + }
  17418. + return result;
  17419. +}
  17420. +
  17421. +#define NUM_OPT(label, fmt, addr) \
  17422. + { \
  17423. + .name = (label), \
  17424. + .type = OPT_FORMAT, \
  17425. + .u = { \
  17426. + .f = { \
  17427. + .format = (fmt), \
  17428. + .nr_args = 1, \
  17429. + .arg1 = (addr), \
  17430. + .arg2 = NULL, \
  17431. + .arg3 = NULL, \
  17432. + .arg4 = NULL \
  17433. + } \
  17434. + } \
  17435. + }
  17436. +
  17437. +#define SB_FIELD_OPT(field, fmt) NUM_OPT(#field, fmt, &sbinfo->field)
  17438. +
  17439. +#define BIT_OPT(label, bitnr) \
  17440. + { \
  17441. + .name = label, \
  17442. + .type = OPT_BIT, \
  17443. + .u = { \
  17444. + .bit = { \
  17445. + .nr = bitnr, \
  17446. + .addr = &sbinfo->fs_flags \
  17447. + } \
  17448. + } \
  17449. + }
  17450. +
  17451. +#define MAX_NR_OPTIONS (30)
  17452. +
  17453. +#if REISER4_DEBUG
  17454. +# define OPT_ARRAY_CHECK(opt, array) \
  17455. + if ((opt) > (array) + MAX_NR_OPTIONS) { \
  17456. + warning("zam-1046", "opt array is overloaded"); break; \
  17457. + }
  17458. +#else
  17459. +# define OPT_ARRAY_CHECK(opt, array) noop
  17460. +#endif
  17461. +
  17462. +#define PUSH_OPT(opt, array, ...) \
  17463. +do { \
  17464. + struct opt_desc o = __VA_ARGS__; \
  17465. + OPT_ARRAY_CHECK(opt, array); \
  17466. + *(opt) ++ = o; \
  17467. +} while (0)
  17468. +
  17469. +static noinline void push_sb_field_opts(struct opt_desc **p,
  17470. + struct opt_desc *opts,
  17471. + reiser4_super_info_data *sbinfo)
  17472. +{
  17473. +#define PUSH_SB_FIELD_OPT(field, format) \
  17474. + PUSH_OPT(*p, opts, SB_FIELD_OPT(field, format))
  17475. + /*
  17476. + * tmgr.atom_max_size=N
  17477. + * Atoms containing more than N blocks will be forced to commit. N is
  17478. + * decimal.
  17479. + */
  17480. + PUSH_SB_FIELD_OPT(tmgr.atom_max_size, "%u");
  17481. + /*
  17482. + * tmgr.atom_max_age=N
  17483. + * Atoms older than N seconds will be forced to commit. N is decimal.
  17484. + */
  17485. + PUSH_SB_FIELD_OPT(tmgr.atom_max_age, "%u");
  17486. + /*
  17487. + * tmgr.atom_min_size=N
  17488. + * In committing an atom to free dirty pages, force the atom less than
  17489. + * N in size to fuse with another one.
  17490. + */
  17491. + PUSH_SB_FIELD_OPT(tmgr.atom_min_size, "%u");
  17492. + /*
  17493. + * tmgr.atom_max_flushers=N
  17494. + * limit of concurrent flushers for one atom. 0 means no limit.
  17495. + */
  17496. + PUSH_SB_FIELD_OPT(tmgr.atom_max_flushers, "%u");
  17497. + /*
  17498. + * tree.cbk_cache_slots=N
  17499. + * Number of slots in the cbk cache.
  17500. + */
  17501. + PUSH_SB_FIELD_OPT(tree.cbk_cache.nr_slots, "%u");
  17502. + /*
  17503. + * If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty
  17504. + * leaf-level blocks it will force them to be relocated.
  17505. + */
  17506. + PUSH_SB_FIELD_OPT(flush.relocate_threshold, "%u");
  17507. + /*
  17508. + * If flush finds can find a block allocation closer than at most
  17509. + * FLUSH_RELOCATE_DISTANCE from the preceder it will relocate to that
  17510. + * position.
  17511. + */
  17512. + PUSH_SB_FIELD_OPT(flush.relocate_distance, "%u");
  17513. + /*
  17514. + * If we have written this much or more blocks before encountering busy
  17515. + * jnode in flush list - abort flushing hoping that next time we get
  17516. + * called this jnode will be clean already, and we will save some
  17517. + * seeks.
  17518. + */
  17519. + PUSH_SB_FIELD_OPT(flush.written_threshold, "%u");
  17520. + /* The maximum number of nodes to scan left on a level during flush. */
  17521. + PUSH_SB_FIELD_OPT(flush.scan_maxnodes, "%u");
  17522. + /* preferred IO size */
  17523. + PUSH_SB_FIELD_OPT(optimal_io_size, "%u");
  17524. + /* carry flags used for insertion of new nodes */
  17525. + PUSH_SB_FIELD_OPT(tree.carry.new_node_flags, "%u");
  17526. + /* carry flags used for insertion of new extents */
  17527. + PUSH_SB_FIELD_OPT(tree.carry.new_extent_flags, "%u");
  17528. + /* carry flags used for paste operations */
  17529. + PUSH_SB_FIELD_OPT(tree.carry.paste_flags, "%u");
  17530. + /* carry flags used for insert operations */
  17531. + PUSH_SB_FIELD_OPT(tree.carry.insert_flags, "%u");
  17532. +
  17533. +#ifdef CONFIG_REISER4_BADBLOCKS
  17534. + /*
  17535. + * Alternative master superblock location in case if it's original
  17536. + * location is not writeable/accessable. This is offset in BYTES.
  17537. + */
  17538. + PUSH_SB_FIELD_OPT(altsuper, "%lu");
  17539. +#endif
  17540. +}
  17541. +
  17542. +/**
  17543. + * reiser4_init_super_data - initialize reiser4 private super block
  17544. + * @super: super block to initialize
  17545. + * @opt_string: list of reiser4 mount options
  17546. + *
  17547. + * Sets various reiser4 parameters to default values. Parses mount options and
  17548. + * overwrites default settings.
  17549. + */
  17550. +int reiser4_init_super_data(struct super_block *super, char *opt_string)
  17551. +{
  17552. + int result;
  17553. + struct opt_desc *opts, *p;
  17554. + reiser4_super_info_data *sbinfo = get_super_private(super);
  17555. +
  17556. + /* initialize super, export, dentry operations */
  17557. + sbinfo->ops.super = reiser4_super_operations;
  17558. + sbinfo->ops.export = reiser4_export_operations;
  17559. + sbinfo->ops.dentry = reiser4_dentry_operations;
  17560. + super->s_op = &sbinfo->ops.super;
  17561. + super->s_export_op = &sbinfo->ops.export;
  17562. +
  17563. + /* initialize transaction manager parameters to default values */
  17564. + sbinfo->tmgr.atom_max_size = totalram_pages() / 4;
  17565. + sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE / HZ;
  17566. + sbinfo->tmgr.atom_min_size = 256;
  17567. + sbinfo->tmgr.atom_max_flushers = ATOM_MAX_FLUSHERS;
  17568. +
  17569. + /* initialize cbk cache parameter */
  17570. + sbinfo->tree.cbk_cache.nr_slots = CBK_CACHE_SLOTS;
  17571. +
  17572. + /* initialize flush parameters */
  17573. + sbinfo->flush.relocate_threshold = FLUSH_RELOCATE_THRESHOLD;
  17574. + sbinfo->flush.relocate_distance = FLUSH_RELOCATE_DISTANCE;
  17575. + sbinfo->flush.written_threshold = FLUSH_WRITTEN_THRESHOLD;
  17576. + sbinfo->flush.scan_maxnodes = FLUSH_SCAN_MAXNODES;
  17577. +
  17578. + sbinfo->optimal_io_size = REISER4_OPTIMAL_IO_SIZE;
  17579. +
  17580. + /* preliminary tree initializations */
  17581. + sbinfo->tree.super = super;
  17582. + sbinfo->tree.carry.new_node_flags = REISER4_NEW_NODE_FLAGS;
  17583. + sbinfo->tree.carry.new_extent_flags = REISER4_NEW_EXTENT_FLAGS;
  17584. + sbinfo->tree.carry.paste_flags = REISER4_PASTE_FLAGS;
  17585. + sbinfo->tree.carry.insert_flags = REISER4_INSERT_FLAGS;
  17586. + rwlock_init(&(sbinfo->tree.tree_lock));
  17587. + spin_lock_init(&(sbinfo->tree.epoch_lock));
  17588. +
  17589. + /* initialize default readahead params */
  17590. + sbinfo->ra_params.max = totalram_pages() / 4;
  17591. + sbinfo->ra_params.flags = 0;
  17592. +
  17593. + /* allocate memory for structure describing reiser4 mount options */
  17594. + opts = kmalloc(sizeof(struct opt_desc) * MAX_NR_OPTIONS,
  17595. + reiser4_ctx_gfp_mask_get());
  17596. + if (opts == NULL)
  17597. + return RETERR(-ENOMEM);
  17598. +
  17599. + /* initialize structure describing reiser4 mount options */
  17600. + p = opts;
  17601. +
  17602. + push_sb_field_opts(&p, opts, sbinfo);
  17603. + /* turn on BSD-style gid assignment */
  17604. +
  17605. +#define PUSH_BIT_OPT(name, bit) \
  17606. + PUSH_OPT(p, opts, BIT_OPT(name, bit))
  17607. +
  17608. + PUSH_BIT_OPT("bsdgroups", REISER4_BSD_GID);
  17609. + /* turn on 32 bit times */
  17610. + PUSH_BIT_OPT("32bittimes", REISER4_32_BIT_TIMES);
  17611. + /*
  17612. + * Don't load all bitmap blocks at mount time, it is useful for
  17613. + * machines with tiny RAM and large disks.
  17614. + */
  17615. + PUSH_BIT_OPT("dont_load_bitmap", REISER4_DONT_LOAD_BITMAP);
  17616. + /* disable transaction commits during write() */
  17617. + PUSH_BIT_OPT("atomic_write", REISER4_ATOMIC_WRITE);
  17618. + /* enable issuing of discard requests */
  17619. + PUSH_BIT_OPT("discard", REISER4_DISCARD);
  17620. + /* disable hole punching at flush time */
  17621. + PUSH_BIT_OPT("dont_punch_holes", REISER4_DONT_PUNCH_HOLES);
  17622. +
  17623. + PUSH_OPT(p, opts,
  17624. + {
  17625. + /*
  17626. + * tree traversal readahead parameters:
  17627. + * -o readahead:MAXNUM:FLAGS
  17628. + * MAXNUM - max number fo nodes to request readahead for: -1UL
  17629. + * will set it to max_sane_readahead()
  17630. + * FLAGS - combination of bits: RA_ADJCENT_ONLY, RA_ALL_LEVELS,
  17631. + * CONTINUE_ON_PRESENT
  17632. + */
  17633. + .name = "readahead",
  17634. + .type = OPT_FORMAT,
  17635. + .u = {
  17636. + .f = {
  17637. + .format = "%u:%u",
  17638. + .nr_args = 2,
  17639. + .arg1 = &sbinfo->ra_params.max,
  17640. + .arg2 = &sbinfo->ra_params.flags,
  17641. + .arg3 = NULL,
  17642. + .arg4 = NULL
  17643. + }
  17644. + }
  17645. + }
  17646. + );
  17647. +
  17648. + /* What to do in case of fs error */
  17649. + PUSH_OPT(p, opts,
  17650. + {
  17651. + .name = "onerror",
  17652. + .type = OPT_ONEOF,
  17653. + .u = {
  17654. + .oneof = {
  17655. + .result = &sbinfo->onerror,
  17656. + .list = {
  17657. + "remount-ro", "panic", NULL
  17658. + },
  17659. + }
  17660. + }
  17661. + }
  17662. + );
  17663. +
  17664. + /*
  17665. + * What trancaction model (journal, cow, etc)
  17666. + * is used to commit transactions
  17667. + */
  17668. + PUSH_OPT(p, opts,
  17669. + {
  17670. + .name = "txmod",
  17671. + .type = OPT_TXMOD,
  17672. + .u = {
  17673. + .txmod = {
  17674. + .result = &sbinfo->txmod
  17675. + }
  17676. + }
  17677. + }
  17678. + );
  17679. +
  17680. + /* modify default settings to values set by mount options */
  17681. + result = parse_options(opt_string, opts, p - opts);
  17682. + kfree(opts);
  17683. + if (result != 0)
  17684. + return result;
  17685. +
  17686. + /* correct settings to sanity values */
  17687. + sbinfo->tmgr.atom_max_age *= HZ;
  17688. + if (sbinfo->tmgr.atom_max_age <= 0)
  17689. + /* overflow */
  17690. + sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE;
  17691. +
  17692. + /* round optimal io size up to 512 bytes */
  17693. + sbinfo->optimal_io_size >>= VFS_BLKSIZE_BITS;
  17694. + sbinfo->optimal_io_size <<= VFS_BLKSIZE_BITS;
  17695. + if (sbinfo->optimal_io_size == 0) {
  17696. + warning("nikita-2497", "optimal_io_size is too small");
  17697. + return RETERR(-EINVAL);
  17698. + }
  17699. + return result;
  17700. +}
  17701. +
  17702. +/**
  17703. + * reiser4_init_read_super - read reiser4 master super block
  17704. + * @super: super block to fill
  17705. + * @silent: if 0 - print warnings
  17706. + *
  17707. + * Reads reiser4 master super block either from predefined location or from
  17708. + * location specified by altsuper mount option, initializes disk format plugin.
  17709. + */
  17710. +int reiser4_init_read_super(struct super_block *super, int silent)
  17711. +{
  17712. + struct buffer_head *super_bh;
  17713. + struct reiser4_master_sb *master_sb;
  17714. + reiser4_super_info_data *sbinfo = get_super_private(super);
  17715. + unsigned long blocksize;
  17716. +
  17717. + read_super_block:
  17718. +#ifdef CONFIG_REISER4_BADBLOCKS
  17719. + if (sbinfo->altsuper)
  17720. + /*
  17721. + * read reiser4 master super block at position specified by
  17722. + * mount option
  17723. + */
  17724. + super_bh = sb_bread(super,
  17725. + (sector_t)(sbinfo->altsuper / super->s_blocksize));
  17726. + else
  17727. +#endif
  17728. + /* read reiser4 master super block at 16-th 4096 block */
  17729. + super_bh = sb_bread(super,
  17730. + (sector_t)(REISER4_MAGIC_OFFSET / super->s_blocksize));
  17731. + if (!super_bh)
  17732. + return RETERR(-EIO);
  17733. +
  17734. + master_sb = (struct reiser4_master_sb *)super_bh->b_data;
  17735. + /* check reiser4 magic string */
  17736. + if (!strncmp(master_sb->magic, REISER4_SUPER_MAGIC_STRING,
  17737. + sizeof(REISER4_SUPER_MAGIC_STRING))) {
  17738. + /* reiser4 master super block contains filesystem blocksize */
  17739. + blocksize = le16_to_cpu(get_unaligned(&master_sb->blocksize));
  17740. +
  17741. + if (blocksize != PAGE_SIZE) {
  17742. + /*
  17743. + * currenly reiser4's blocksize must be equal to
  17744. + * pagesize
  17745. + */
  17746. + if (!silent)
  17747. + warning("nikita-2609",
  17748. + "%s: wrong block size %ld\n", super->s_id,
  17749. + blocksize);
  17750. + brelse(super_bh);
  17751. + return RETERR(-EINVAL);
  17752. + }
  17753. + if (blocksize != super->s_blocksize) {
  17754. + /*
  17755. + * filesystem uses different blocksize. Reread master
  17756. + * super block with correct blocksize
  17757. + */
  17758. + brelse(super_bh);
  17759. + if (!sb_set_blocksize(super, (int)blocksize))
  17760. + return RETERR(-EINVAL);
  17761. + goto read_super_block;
  17762. + }
  17763. +
  17764. + sbinfo->df_plug =
  17765. + disk_format_plugin_by_unsafe_id(
  17766. + le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id)));
  17767. + if (sbinfo->df_plug == NULL) {
  17768. + if (!silent)
  17769. + warning("nikita-26091",
  17770. + "%s: unknown disk format plugin %d\n",
  17771. + super->s_id,
  17772. + le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id)));
  17773. + brelse(super_bh);
  17774. + return RETERR(-EINVAL);
  17775. + }
  17776. + sbinfo->diskmap_block = le64_to_cpu(get_unaligned(&master_sb->diskmap));
  17777. + brelse(super_bh);
  17778. + return 0;
  17779. + }
  17780. +
  17781. + /* there is no reiser4 on the device */
  17782. + if (!silent)
  17783. + warning("nikita-2608",
  17784. + "%s: wrong master super block magic", super->s_id);
  17785. + brelse(super_bh);
  17786. + return RETERR(-EINVAL);
  17787. +}
  17788. +
  17789. +static struct {
  17790. + reiser4_plugin_type type;
  17791. + reiser4_plugin_id id;
  17792. +} default_plugins[PSET_LAST] = {
  17793. + [PSET_FILE] = {
  17794. + .type = REISER4_FILE_PLUGIN_TYPE,
  17795. + .id = UNIX_FILE_PLUGIN_ID
  17796. + },
  17797. + [PSET_DIR] = {
  17798. + .type = REISER4_DIR_PLUGIN_TYPE,
  17799. + .id = HASHED_DIR_PLUGIN_ID
  17800. + },
  17801. + [PSET_HASH] = {
  17802. + .type = REISER4_HASH_PLUGIN_TYPE,
  17803. + .id = R5_HASH_ID
  17804. + },
  17805. + [PSET_FIBRATION] = {
  17806. + .type = REISER4_FIBRATION_PLUGIN_TYPE,
  17807. + .id = FIBRATION_DOT_O
  17808. + },
  17809. + [PSET_PERM] = {
  17810. + .type = REISER4_PERM_PLUGIN_TYPE,
  17811. + .id = NULL_PERM_ID
  17812. + },
  17813. + [PSET_FORMATTING] = {
  17814. + .type = REISER4_FORMATTING_PLUGIN_TYPE,
  17815. + .id = SMALL_FILE_FORMATTING_ID
  17816. + },
  17817. + [PSET_SD] = {
  17818. + .type = REISER4_ITEM_PLUGIN_TYPE,
  17819. + .id = STATIC_STAT_DATA_ID
  17820. + },
  17821. + [PSET_DIR_ITEM] = {
  17822. + .type = REISER4_ITEM_PLUGIN_TYPE,
  17823. + .id = COMPOUND_DIR_ID
  17824. + },
  17825. + [PSET_CIPHER] = {
  17826. + .type = REISER4_CIPHER_PLUGIN_TYPE,
  17827. + .id = NONE_CIPHER_ID
  17828. + },
  17829. + [PSET_DIGEST] = {
  17830. + .type = REISER4_DIGEST_PLUGIN_TYPE,
  17831. + .id = SHA256_32_DIGEST_ID
  17832. + },
  17833. + [PSET_COMPRESSION] = {
  17834. + .type = REISER4_COMPRESSION_PLUGIN_TYPE,
  17835. + .id = LZO1_COMPRESSION_ID
  17836. + },
  17837. + [PSET_COMPRESSION_MODE] = {
  17838. + .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  17839. + .id = CONVX_COMPRESSION_MODE_ID
  17840. + },
  17841. + [PSET_CLUSTER] = {
  17842. + .type = REISER4_CLUSTER_PLUGIN_TYPE,
  17843. + .id = CLUSTER_64K_ID
  17844. + },
  17845. + [PSET_CREATE] = {
  17846. + .type = REISER4_FILE_PLUGIN_TYPE,
  17847. + .id = UNIX_FILE_PLUGIN_ID
  17848. + }
  17849. +};
  17850. +
  17851. +/* access to default plugin table */
  17852. +reiser4_plugin *get_default_plugin(pset_member memb)
  17853. +{
  17854. + return plugin_by_id(default_plugins[memb].type,
  17855. + default_plugins[memb].id);
  17856. +}
  17857. +
  17858. +/**
  17859. + * reiser4_init_root_inode - obtain inode of root directory
  17860. + * @super: super block of filesystem
  17861. + *
  17862. + * Obtains inode of root directory (reading it from disk), initializes plugin
  17863. + * set it was not initialized.
  17864. + */
  17865. +int reiser4_init_root_inode(struct super_block *super)
  17866. +{
  17867. + reiser4_super_info_data *sbinfo = get_super_private(super);
  17868. + struct inode *inode;
  17869. + int result = 0;
  17870. +
  17871. + inode = reiser4_iget(super, sbinfo->df_plug->root_dir_key(super), 0);
  17872. + if (IS_ERR(inode))
  17873. + return RETERR(PTR_ERR(inode));
  17874. +
  17875. + super->s_root = d_make_root(inode);
  17876. + if (!super->s_root) {
  17877. + return RETERR(-ENOMEM);
  17878. + }
  17879. +
  17880. + super->s_root->d_op = &sbinfo->ops.dentry;
  17881. +
  17882. + if (!is_inode_loaded(inode)) {
  17883. + pset_member memb;
  17884. + plugin_set *pset;
  17885. +
  17886. + pset = reiser4_inode_data(inode)->pset;
  17887. + for (memb = 0; memb < PSET_LAST; ++memb) {
  17888. +
  17889. + if (aset_get(pset, memb) != NULL)
  17890. + continue;
  17891. +
  17892. + result = grab_plugin_pset(inode, NULL, memb);
  17893. + if (result != 0)
  17894. + break;
  17895. +
  17896. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  17897. + }
  17898. +
  17899. + if (result == 0) {
  17900. + if (REISER4_DEBUG) {
  17901. + for (memb = 0; memb < PSET_LAST; ++memb)
  17902. + assert("nikita-3500",
  17903. + aset_get(pset, memb) != NULL);
  17904. + }
  17905. + } else
  17906. + warning("nikita-3448", "Cannot set plugins of root: %i",
  17907. + result);
  17908. + reiser4_iget_complete(inode);
  17909. +
  17910. + /* As the default pset kept in the root dir may has been changed
  17911. + (length is unknown), call update_sd. */
  17912. + if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) {
  17913. + result = reiser4_grab_space(
  17914. + inode_file_plugin(inode)->estimate.update(inode),
  17915. + BA_CAN_COMMIT);
  17916. +
  17917. + if (result == 0)
  17918. + result = reiser4_update_sd(inode);
  17919. +
  17920. + all_grabbed2free();
  17921. + }
  17922. + }
  17923. +
  17924. + super->s_maxbytes = MAX_LFS_FILESIZE;
  17925. + return result;
  17926. +}
  17927. +
  17928. +/*
  17929. + * Local variables:
  17930. + * c-indentation-style: "K&R"
  17931. + * mode-name: "LC"
  17932. + * c-basic-offset: 8
  17933. + * tab-width: 8
  17934. + * fill-column: 79
  17935. + * End:
  17936. + */
  17937. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/inode.c linux-5.16.14/fs/reiser4/inode.c
  17938. --- linux-5.16.14.orig/fs/reiser4/inode.c 1970-01-01 01:00:00.000000000 +0100
  17939. +++ linux-5.16.14/fs/reiser4/inode.c 2022-03-12 13:26:19.653892731 +0100
  17940. @@ -0,0 +1,711 @@
  17941. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  17942. + reiser4/README */
  17943. +
  17944. +/* Inode specific operations. */
  17945. +
  17946. +#include "forward.h"
  17947. +#include "debug.h"
  17948. +#include "key.h"
  17949. +#include "kassign.h"
  17950. +#include "coord.h"
  17951. +#include "seal.h"
  17952. +#include "dscale.h"
  17953. +#include "plugin/item/item.h"
  17954. +#include "plugin/security/perm.h"
  17955. +#include "plugin/plugin.h"
  17956. +#include "plugin/object.h"
  17957. +#include "znode.h"
  17958. +#include "vfs_ops.h"
  17959. +#include "inode.h"
  17960. +#include "super.h"
  17961. +#include "reiser4.h"
  17962. +
  17963. +#include <linux/fs.h> /* for struct super_block, address_space */
  17964. +
  17965. +/* return reiser4 internal tree which inode belongs to */
  17966. +/* Audited by: green(2002.06.17) */
  17967. +reiser4_tree *reiser4_tree_by_inode(const struct inode *inode/* inode queried*/)
  17968. +{
  17969. + assert("nikita-256", inode != NULL);
  17970. + assert("nikita-257", inode->i_sb != NULL);
  17971. + return reiser4_get_tree(inode->i_sb);
  17972. +}
  17973. +
  17974. +/* return reiser4-specific inode flags */
  17975. +static inline unsigned long *inode_flags(const struct inode *const inode)
  17976. +{
  17977. + assert("nikita-2842", inode != NULL);
  17978. + return &reiser4_inode_data(inode)->flags;
  17979. +}
  17980. +
  17981. +/* set reiser4-specific flag @f in @inode */
  17982. +void reiser4_inode_set_flag(struct inode *inode, reiser4_file_plugin_flags f)
  17983. +{
  17984. + assert("nikita-2248", inode != NULL);
  17985. + set_bit((int)f, inode_flags(inode));
  17986. +}
  17987. +
  17988. +/* clear reiser4-specific flag @f in @inode */
  17989. +void reiser4_inode_clr_flag(struct inode *inode, reiser4_file_plugin_flags f)
  17990. +{
  17991. + assert("nikita-2250", inode != NULL);
  17992. + clear_bit((int)f, inode_flags(inode));
  17993. +}
  17994. +
  17995. +/* true if reiser4-specific flag @f is set in @inode */
  17996. +int reiser4_inode_get_flag(const struct inode *inode,
  17997. + reiser4_file_plugin_flags f)
  17998. +{
  17999. + assert("nikita-2251", inode != NULL);
  18000. + return test_bit((int)f, inode_flags(inode));
  18001. +}
  18002. +
  18003. +/* convert oid to inode number */
  18004. +ino_t oid_to_ino(oid_t oid)
  18005. +{
  18006. + return (ino_t) oid;
  18007. +}
  18008. +
  18009. +/* convert oid to user visible inode number */
  18010. +ino_t oid_to_uino(oid_t oid)
  18011. +{
  18012. + /* reiser4 object is uniquely identified by oid which is 64 bit
  18013. + quantity. Kernel in-memory inode is indexed (in the hash table) by
  18014. + 32 bit i_ino field, but this is not a problem, because there is a
  18015. + way to further distinguish inodes with identical inode numbers
  18016. + (find_actor supplied to iget()).
  18017. +
  18018. + But user space expects unique 32 bit inode number. Obviously this
  18019. + is impossible. Work-around is to somehow hash oid into user visible
  18020. + inode number.
  18021. + */
  18022. + oid_t max_ino = (ino_t) ~0;
  18023. +
  18024. + if (REISER4_INO_IS_OID || (oid <= max_ino))
  18025. + return oid;
  18026. + else
  18027. + /* this is remotely similar to algorithm used to find next pid
  18028. + to use for process: after wrap-around start from some
  18029. + offset rather than from 0. Idea is that there are some long
  18030. + living objects with which we don't want to collide.
  18031. + */
  18032. + return REISER4_UINO_SHIFT + ((oid - max_ino) & (max_ino >> 1));
  18033. +}
  18034. +
  18035. +/* check that "inode" is on reiser4 file-system */
  18036. +int is_reiser4_inode(const struct inode *inode/* inode queried */)
  18037. +{
  18038. + return inode != NULL && is_reiser4_super(inode->i_sb);
  18039. +}
  18040. +
  18041. +/* Maximal length of a name that can be stored in directory @inode.
  18042. +
  18043. + This is used in check during file creation and lookup. */
  18044. +int reiser4_max_filename_len(const struct inode *inode/* inode queried */)
  18045. +{
  18046. + assert("nikita-287", is_reiser4_inode(inode));
  18047. + assert("nikita-1710", inode_dir_item_plugin(inode));
  18048. + if (inode_dir_item_plugin(inode)->s.dir.max_name_len)
  18049. + return inode_dir_item_plugin(inode)->s.dir.max_name_len(inode);
  18050. + else
  18051. + return 255;
  18052. +}
  18053. +
  18054. +#if REISER4_USE_COLLISION_LIMIT
  18055. +/* Maximal number of hash collisions for this directory. */
  18056. +int max_hash_collisions(const struct inode *dir/* inode queried */)
  18057. +{
  18058. + assert("nikita-1711", dir != NULL);
  18059. + return reiser4_inode_data(dir)->plugin.max_collisions;
  18060. +}
  18061. +#endif /* REISER4_USE_COLLISION_LIMIT */
  18062. +
  18063. +/* Install file, inode, and address_space operation on @inode, depending on
  18064. + its mode. */
  18065. +int setup_inode_ops(struct inode *inode /* inode to intialize */ ,
  18066. + reiser4_object_create_data * data /* parameters to create
  18067. + * object */ )
  18068. +{
  18069. + reiser4_super_info_data *sinfo;
  18070. + file_plugin *fplug;
  18071. + dir_plugin *dplug;
  18072. +
  18073. + fplug = inode_file_plugin(inode);
  18074. + dplug = inode_dir_plugin(inode);
  18075. +
  18076. + sinfo = get_super_private(inode->i_sb);
  18077. +
  18078. + switch (inode->i_mode & S_IFMT) {
  18079. + case S_IFSOCK:
  18080. + case S_IFBLK:
  18081. + case S_IFCHR:
  18082. + case S_IFIFO:
  18083. + {
  18084. + dev_t rdev; /* to keep gcc happy */
  18085. +
  18086. + assert("vs-46", fplug != NULL);
  18087. + /* ugly hack with rdev */
  18088. + if (data == NULL) {
  18089. + rdev = inode->i_rdev;
  18090. + inode->i_rdev = 0;
  18091. + } else
  18092. + rdev = data->rdev;
  18093. + inode->i_blocks = 0;
  18094. + assert("vs-42", fplug->h.id == SPECIAL_FILE_PLUGIN_ID);
  18095. + inode->i_op = file_plugins[fplug->h.id].inode_ops;
  18096. + /* initialize inode->i_fop and inode->i_rdev for block
  18097. + and char devices */
  18098. + init_special_inode(inode, inode->i_mode, rdev);
  18099. + /* all address space operations are null */
  18100. + inode->i_mapping->a_ops =
  18101. + file_plugins[fplug->h.id].as_ops;
  18102. + break;
  18103. + }
  18104. + case S_IFLNK:
  18105. + assert("vs-46", fplug != NULL);
  18106. + assert("vs-42", fplug->h.id == SYMLINK_FILE_PLUGIN_ID);
  18107. + inode->i_op = file_plugins[fplug->h.id].inode_ops;
  18108. + inode->i_fop = NULL;
  18109. + /* all address space operations are null */
  18110. + inode->i_mapping->a_ops = file_plugins[fplug->h.id].as_ops;
  18111. + break;
  18112. + case S_IFDIR:
  18113. + assert("vs-46", dplug != NULL);
  18114. + assert("vs-43", (dplug->h.id == HASHED_DIR_PLUGIN_ID ||
  18115. + dplug->h.id == SEEKABLE_HASHED_DIR_PLUGIN_ID));
  18116. + inode->i_op = dir_plugins[dplug->h.id].inode_ops;
  18117. + inode->i_fop = dir_plugins[dplug->h.id].file_ops;
  18118. + inode->i_mapping->a_ops = dir_plugins[dplug->h.id].as_ops;
  18119. + break;
  18120. + case S_IFREG:
  18121. + assert("vs-46", fplug != NULL);
  18122. + assert("vs-43", (fplug->h.id == UNIX_FILE_PLUGIN_ID ||
  18123. + fplug->h.id == CRYPTCOMPRESS_FILE_PLUGIN_ID));
  18124. + inode->i_op = file_plugins[fplug->h.id].inode_ops;
  18125. + inode->i_fop = file_plugins[fplug->h.id].file_ops;
  18126. + inode->i_mapping->a_ops = file_plugins[fplug->h.id].as_ops;
  18127. + break;
  18128. + default:
  18129. + warning("nikita-291", "wrong file mode: %o for %llu",
  18130. + inode->i_mode,
  18131. + (unsigned long long)get_inode_oid(inode));
  18132. + reiser4_make_bad_inode(inode);
  18133. + return RETERR(-EINVAL);
  18134. + }
  18135. + return 0;
  18136. +}
  18137. +
  18138. +/* Initialize inode from disk data. Called with inode locked.
  18139. + Return inode locked. */
  18140. +static int init_inode(struct inode *inode /* inode to intialise */ ,
  18141. + coord_t *coord/* coord of stat data */)
  18142. +{
  18143. + int result;
  18144. + item_plugin *iplug;
  18145. + void *body;
  18146. + int length;
  18147. + reiser4_inode *state;
  18148. +
  18149. + assert("nikita-292", coord != NULL);
  18150. + assert("nikita-293", inode != NULL);
  18151. +
  18152. + coord_clear_iplug(coord);
  18153. + result = zload(coord->node);
  18154. + if (result)
  18155. + return result;
  18156. + iplug = item_plugin_by_coord(coord);
  18157. + body = item_body_by_coord(coord);
  18158. + length = item_length_by_coord(coord);
  18159. +
  18160. + assert("nikita-295", iplug != NULL);
  18161. + assert("nikita-296", body != NULL);
  18162. + assert("nikita-297", length > 0);
  18163. +
  18164. + /* inode is under I_LOCK now */
  18165. +
  18166. + state = reiser4_inode_data(inode);
  18167. + /* call stat-data plugin method to load sd content into inode */
  18168. + result = iplug->s.sd.init_inode(inode, body, length);
  18169. + set_plugin(&state->pset, PSET_SD, item_plugin_to_plugin(iplug));
  18170. + if (result == 0) {
  18171. + result = setup_inode_ops(inode, NULL);
  18172. + if (result == 0 && inode->i_sb->s_root &&
  18173. + inode->i_sb->s_root->d_inode)
  18174. + result = finish_pset(inode);
  18175. + }
  18176. + zrelse(coord->node);
  18177. + return result;
  18178. +}
  18179. +
  18180. +/* read `inode' from the disk. This is what was previously in
  18181. + reiserfs_read_inode2().
  18182. +
  18183. + Must be called with inode locked. Return inode still locked.
  18184. +*/
  18185. +static int read_inode(struct inode *inode /* inode to read from disk */ ,
  18186. + const reiser4_key * key /* key of stat data */ ,
  18187. + int silent)
  18188. +{
  18189. + int result;
  18190. + lock_handle lh;
  18191. + reiser4_inode *info;
  18192. + coord_t coord;
  18193. +
  18194. + assert("nikita-298", inode != NULL);
  18195. + assert("nikita-1945", !is_inode_loaded(inode));
  18196. +
  18197. + info = reiser4_inode_data(inode);
  18198. + assert("nikita-300", info->locality_id != 0);
  18199. +
  18200. + coord_init_zero(&coord);
  18201. + init_lh(&lh);
  18202. + /* locate stat-data in a tree and return znode locked */
  18203. + result = lookup_sd(inode, ZNODE_READ_LOCK, &coord, &lh, key, silent);
  18204. + assert("nikita-301", !is_inode_loaded(inode));
  18205. + if (result == 0) {
  18206. + /* use stat-data plugin to load sd into inode. */
  18207. + result = init_inode(inode, &coord);
  18208. + if (result == 0) {
  18209. + /* initialize stat-data seal */
  18210. + spin_lock_inode(inode);
  18211. + reiser4_seal_init(&info->sd_seal, &coord, key);
  18212. + info->sd_coord = coord;
  18213. + spin_unlock_inode(inode);
  18214. +
  18215. + /* call file plugin's method to initialize plugin
  18216. + * specific part of inode */
  18217. + if (inode_file_plugin(inode)->init_inode_data)
  18218. + inode_file_plugin(inode)->init_inode_data(inode,
  18219. + NULL,
  18220. + 0);
  18221. + /* load detached directory cursors for stateless
  18222. + * directory readers (NFS). */
  18223. + reiser4_load_cursors(inode);
  18224. +
  18225. + /* Check the opened inode for consistency. */
  18226. + result =
  18227. + get_super_private(inode->i_sb)->df_plug->
  18228. + check_open(inode);
  18229. + }
  18230. + }
  18231. + /* lookup_sd() doesn't release coord because we want znode
  18232. + stay read-locked while stat-data fields are accessed in
  18233. + init_inode() */
  18234. + done_lh(&lh);
  18235. +
  18236. + if (result != 0)
  18237. + reiser4_make_bad_inode(inode);
  18238. + return result;
  18239. +}
  18240. +
  18241. +/* initialise new reiser4 inode being inserted into hash table. */
  18242. +static int init_locked_inode(struct inode *inode /* new inode */ ,
  18243. + void *opaque /* key of stat data passed to
  18244. + * the iget5_locked as cookie */)
  18245. +{
  18246. + reiser4_key *key;
  18247. +
  18248. + assert("nikita-1995", inode != NULL);
  18249. + assert("nikita-1996", opaque != NULL);
  18250. + key = opaque;
  18251. + set_inode_oid(inode, get_key_objectid(key));
  18252. + reiser4_inode_data(inode)->locality_id = get_key_locality(key);
  18253. + return 0;
  18254. +}
  18255. +
  18256. +/* reiser4_inode_find_actor() - "find actor" supplied by reiser4 to
  18257. + iget5_locked().
  18258. +
  18259. + This function is called by iget5_locked() to distinguish reiser4 inodes
  18260. + having the same inode numbers. Such inodes can only exist due to some error
  18261. + condition. One of them should be bad. Inodes with identical inode numbers
  18262. + (objectids) are distinguished by their packing locality.
  18263. +
  18264. +*/
  18265. +static int reiser4_inode_find_actor(struct inode *inode /* inode from hash table
  18266. + * to check */ ,
  18267. + void *opaque /* "cookie" passed to
  18268. + * iget5_locked(). This
  18269. + * is stat-data key */)
  18270. +{
  18271. + reiser4_key *key;
  18272. +
  18273. + key = opaque;
  18274. + return
  18275. + /* oid is unique, so first term is enough, actually. */
  18276. + get_inode_oid(inode) == get_key_objectid(key) &&
  18277. + /*
  18278. + * also, locality should be checked, but locality is stored in
  18279. + * the reiser4-specific part of the inode, and actor can be
  18280. + * called against arbitrary inode that happened to be in this
  18281. + * hash chain. Hence we first have to check that this is
  18282. + * reiser4 inode at least. is_reiser4_inode() is probably too
  18283. + * early to call, as inode may have ->i_op not yet
  18284. + * initialised.
  18285. + */
  18286. + is_reiser4_super(inode->i_sb) &&
  18287. + /*
  18288. + * usually objectid is unique, but pseudo files use counter to
  18289. + * generate objectid. All pseudo files are placed into special
  18290. + * (otherwise unused) locality.
  18291. + */
  18292. + reiser4_inode_data(inode)->locality_id == get_key_locality(key);
  18293. +}
  18294. +
  18295. +/* hook for kmem_cache_create */
  18296. +void loading_init_once(reiser4_inode * info)
  18297. +{
  18298. + mutex_init(&info->loading);
  18299. +}
  18300. +
  18301. +/* for reiser4_alloc_inode */
  18302. +void loading_alloc(reiser4_inode * info)
  18303. +{
  18304. + assert("vs-1717", !mutex_is_locked(&info->loading));
  18305. +}
  18306. +
  18307. +/* for reiser4_destroy */
  18308. +void loading_destroy(reiser4_inode * info)
  18309. +{
  18310. + assert("vs-1717a", !mutex_is_locked(&info->loading));
  18311. +}
  18312. +
  18313. +static void loading_begin(reiser4_inode * info)
  18314. +{
  18315. + mutex_lock(&info->loading);
  18316. +}
  18317. +
  18318. +static void loading_end(reiser4_inode * info)
  18319. +{
  18320. + mutex_unlock(&info->loading);
  18321. +}
  18322. +
  18323. +/**
  18324. + * reiser4_iget - obtain inode via iget5_locked, read from disk if necessary
  18325. + * @super: super block of filesystem
  18326. + * @key: key of inode's stat-data
  18327. + * @silent:
  18328. + *
  18329. + * This is our helper function a la iget(). This is be called by
  18330. + * lookup_common() and reiser4_read_super(). Return inode locked or error
  18331. + * encountered.
  18332. + */
  18333. +struct inode *reiser4_iget(struct super_block *super, const reiser4_key *key,
  18334. + int silent)
  18335. +{
  18336. + struct inode *inode;
  18337. + int result;
  18338. + reiser4_inode *info;
  18339. +
  18340. + assert("nikita-302", super != NULL);
  18341. + assert("nikita-303", key != NULL);
  18342. +
  18343. + result = 0;
  18344. +
  18345. + /* call iget(). Our ->read_inode() is dummy, so this will either
  18346. + find inode in cache or return uninitialised inode */
  18347. + inode = iget5_locked(super,
  18348. + (unsigned long)get_key_objectid(key),
  18349. + reiser4_inode_find_actor,
  18350. + init_locked_inode, (reiser4_key *) key);
  18351. + if (inode == NULL)
  18352. + return ERR_PTR(RETERR(-ENOMEM));
  18353. + if (is_bad_inode(inode)) {
  18354. + warning("nikita-304", "Bad inode found");
  18355. + reiser4_print_key("key", key);
  18356. + iput(inode);
  18357. + return ERR_PTR(RETERR(-EIO));
  18358. + }
  18359. +
  18360. + info = reiser4_inode_data(inode);
  18361. +
  18362. + /* Reiser4 inode state bit REISER4_LOADED is used to distinguish fully
  18363. + loaded and initialized inode from just allocated inode. If
  18364. + REISER4_LOADED bit is not set, reiser4_iget() completes loading under
  18365. + info->loading. The place in reiser4 which uses not initialized inode
  18366. + is the reiser4 repacker, see repacker-related functions in
  18367. + plugin/item/extent.c */
  18368. + if (!is_inode_loaded(inode)) {
  18369. + loading_begin(info);
  18370. + if (!is_inode_loaded(inode)) {
  18371. + /* locking: iget5_locked returns locked inode */
  18372. + assert("nikita-1941", !is_inode_loaded(inode));
  18373. + assert("nikita-1949",
  18374. + reiser4_inode_find_actor(inode,
  18375. + (reiser4_key *) key));
  18376. + /* now, inode has objectid as ->i_ino and locality in
  18377. + reiser4-specific part. This is enough for
  18378. + read_inode() to read stat data from the disk */
  18379. + result = read_inode(inode, key, silent);
  18380. + } else
  18381. + loading_end(info);
  18382. + }
  18383. +
  18384. + if (inode->i_state & I_NEW)
  18385. + unlock_new_inode(inode);
  18386. +
  18387. + if (is_bad_inode(inode)) {
  18388. + assert("vs-1717", result != 0);
  18389. + loading_end(info);
  18390. + iput(inode);
  18391. + inode = ERR_PTR(result);
  18392. + } else if (REISER4_DEBUG) {
  18393. + reiser4_key found_key;
  18394. +
  18395. + assert("vs-1717", result == 0);
  18396. + build_sd_key(inode, &found_key);
  18397. + if (!keyeq(&found_key, key)) {
  18398. + warning("nikita-305", "Wrong key in sd");
  18399. + reiser4_print_key("sought for", key);
  18400. + reiser4_print_key("found", &found_key);
  18401. + }
  18402. + if (inode->i_nlink == 0) {
  18403. + warning("nikita-3559", "Unlinked inode found: %llu\n",
  18404. + (unsigned long long)get_inode_oid(inode));
  18405. + }
  18406. + }
  18407. + return inode;
  18408. +}
  18409. +
  18410. +/* reiser4_iget() may return not fully initialized inode, this function should
  18411. + * be called after one completes reiser4 inode initializing. */
  18412. +void reiser4_iget_complete(struct inode *inode)
  18413. +{
  18414. + assert("zam-988", is_reiser4_inode(inode));
  18415. +
  18416. + if (!is_inode_loaded(inode)) {
  18417. + reiser4_inode_set_flag(inode, REISER4_LOADED);
  18418. + loading_end(reiser4_inode_data(inode));
  18419. + }
  18420. +}
  18421. +
  18422. +void reiser4_make_bad_inode(struct inode *inode)
  18423. +{
  18424. + assert("nikita-1934", inode != NULL);
  18425. +
  18426. + /* clear LOADED bit */
  18427. + reiser4_inode_clr_flag(inode, REISER4_LOADED);
  18428. + make_bad_inode(inode);
  18429. + return;
  18430. +}
  18431. +
  18432. +file_plugin *inode_file_plugin(const struct inode *inode)
  18433. +{
  18434. + assert("nikita-1997", inode != NULL);
  18435. + return reiser4_inode_data(inode)->pset->file;
  18436. +}
  18437. +
  18438. +dir_plugin *inode_dir_plugin(const struct inode *inode)
  18439. +{
  18440. + assert("nikita-1998", inode != NULL);
  18441. + return reiser4_inode_data(inode)->pset->dir;
  18442. +}
  18443. +
  18444. +formatting_plugin *inode_formatting_plugin(const struct inode *inode)
  18445. +{
  18446. + assert("nikita-2000", inode != NULL);
  18447. + return reiser4_inode_data(inode)->pset->formatting;
  18448. +}
  18449. +
  18450. +hash_plugin *inode_hash_plugin(const struct inode *inode)
  18451. +{
  18452. + assert("nikita-2001", inode != NULL);
  18453. + return reiser4_inode_data(inode)->pset->hash;
  18454. +}
  18455. +
  18456. +fibration_plugin *inode_fibration_plugin(const struct inode *inode)
  18457. +{
  18458. + assert("nikita-2001", inode != NULL);
  18459. + return reiser4_inode_data(inode)->pset->fibration;
  18460. +}
  18461. +
  18462. +cipher_plugin *inode_cipher_plugin(const struct inode *inode)
  18463. +{
  18464. + assert("edward-36", inode != NULL);
  18465. + return reiser4_inode_data(inode)->pset->cipher;
  18466. +}
  18467. +
  18468. +compression_plugin *inode_compression_plugin(const struct inode *inode)
  18469. +{
  18470. + assert("edward-37", inode != NULL);
  18471. + return reiser4_inode_data(inode)->pset->compression;
  18472. +}
  18473. +
  18474. +compression_mode_plugin *inode_compression_mode_plugin(const struct inode *
  18475. + inode)
  18476. +{
  18477. + assert("edward-1330", inode != NULL);
  18478. + return reiser4_inode_data(inode)->pset->compression_mode;
  18479. +}
  18480. +
  18481. +cluster_plugin *inode_cluster_plugin(const struct inode *inode)
  18482. +{
  18483. + assert("edward-1328", inode != NULL);
  18484. + return reiser4_inode_data(inode)->pset->cluster;
  18485. +}
  18486. +
  18487. +file_plugin *inode_create_plugin(const struct inode *inode)
  18488. +{
  18489. + assert("edward-1329", inode != NULL);
  18490. + return reiser4_inode_data(inode)->pset->create;
  18491. +}
  18492. +
  18493. +digest_plugin *inode_digest_plugin(const struct inode *inode)
  18494. +{
  18495. + assert("edward-86", inode != NULL);
  18496. + return reiser4_inode_data(inode)->pset->digest;
  18497. +}
  18498. +
  18499. +item_plugin *inode_sd_plugin(const struct inode *inode)
  18500. +{
  18501. + assert("vs-534", inode != NULL);
  18502. + return reiser4_inode_data(inode)->pset->sd;
  18503. +}
  18504. +
  18505. +item_plugin *inode_dir_item_plugin(const struct inode *inode)
  18506. +{
  18507. + assert("vs-534", inode != NULL);
  18508. + return reiser4_inode_data(inode)->pset->dir_item;
  18509. +}
  18510. +
  18511. +file_plugin *child_create_plugin(const struct inode *inode)
  18512. +{
  18513. + assert("edward-1329", inode != NULL);
  18514. + return reiser4_inode_data(inode)->hset->create;
  18515. +}
  18516. +
  18517. +void inode_set_extension(struct inode *inode, sd_ext_bits ext)
  18518. +{
  18519. + reiser4_inode *state;
  18520. +
  18521. + assert("nikita-2716", inode != NULL);
  18522. + assert("nikita-2717", ext < LAST_SD_EXTENSION);
  18523. + assert("nikita-3491", spin_inode_is_locked(inode));
  18524. +
  18525. + state = reiser4_inode_data(inode);
  18526. + state->extmask |= 1 << ext;
  18527. + /* force re-calculation of stat-data length on next call to
  18528. + update_sd(). */
  18529. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  18530. +}
  18531. +
  18532. +void inode_clr_extension(struct inode *inode, sd_ext_bits ext)
  18533. +{
  18534. + reiser4_inode *state;
  18535. +
  18536. + assert("vpf-1926", inode != NULL);
  18537. + assert("vpf-1927", ext < LAST_SD_EXTENSION);
  18538. + assert("vpf-1928", spin_inode_is_locked(inode));
  18539. +
  18540. + state = reiser4_inode_data(inode);
  18541. + state->extmask &= ~(1 << ext);
  18542. + /* force re-calculation of stat-data length on next call to
  18543. + update_sd(). */
  18544. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  18545. +}
  18546. +
  18547. +void inode_check_scale_nolock(struct inode *inode, __u64 old, __u64 new)
  18548. +{
  18549. + assert("edward-1287", inode != NULL);
  18550. + if (!dscale_fit(old, new))
  18551. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  18552. + return;
  18553. +}
  18554. +
  18555. +void inode_check_scale(struct inode *inode, __u64 old, __u64 new)
  18556. +{
  18557. + assert("nikita-2875", inode != NULL);
  18558. + spin_lock_inode(inode);
  18559. + inode_check_scale_nolock(inode, old, new);
  18560. + spin_unlock_inode(inode);
  18561. +}
  18562. +
  18563. +/*
  18564. + * initialize ->ordering field of inode. This field defines how file stat-data
  18565. + * and body is ordered within a tree with respect to other objects within the
  18566. + * same parent directory.
  18567. + */
  18568. +void
  18569. +init_inode_ordering(struct inode *inode,
  18570. + reiser4_object_create_data * crd, int create)
  18571. +{
  18572. + reiser4_key key;
  18573. +
  18574. + if (create) {
  18575. + struct inode *parent;
  18576. +
  18577. + parent = crd->parent;
  18578. + assert("nikita-3224", inode_dir_plugin(parent) != NULL);
  18579. + inode_dir_plugin(parent)->build_entry_key(parent,
  18580. + &crd->dentry->d_name,
  18581. + &key);
  18582. + } else {
  18583. + coord_t *coord;
  18584. +
  18585. + coord = &reiser4_inode_data(inode)->sd_coord;
  18586. + coord_clear_iplug(coord);
  18587. + /* safe to use ->sd_coord, because node is under long term
  18588. + * lock */
  18589. + WITH_DATA(coord->node, item_key_by_coord(coord, &key));
  18590. + }
  18591. +
  18592. + set_inode_ordering(inode, get_key_ordering(&key));
  18593. +}
  18594. +
  18595. +znode *inode_get_vroot(struct inode *inode)
  18596. +{
  18597. + reiser4_block_nr blk;
  18598. + znode *result;
  18599. +
  18600. + spin_lock_inode(inode);
  18601. + blk = reiser4_inode_data(inode)->vroot;
  18602. + spin_unlock_inode(inode);
  18603. + if (!disk_addr_eq(&UBER_TREE_ADDR, &blk))
  18604. + result = zlook(reiser4_tree_by_inode(inode), &blk);
  18605. + else
  18606. + result = NULL;
  18607. + return result;
  18608. +}
  18609. +
  18610. +void inode_set_vroot(struct inode *inode, znode *vroot)
  18611. +{
  18612. + spin_lock_inode(inode);
  18613. + reiser4_inode_data(inode)->vroot = *znode_get_block(vroot);
  18614. + spin_unlock_inode(inode);
  18615. +}
  18616. +
  18617. +#if REISER4_DEBUG
  18618. +
  18619. +void reiser4_inode_invariant(const struct inode *inode)
  18620. +{
  18621. + assert("nikita-3077", spin_inode_is_locked(inode));
  18622. +}
  18623. +
  18624. +int inode_has_no_jnodes(reiser4_inode * r4_inode)
  18625. +{
  18626. + return radix_tree_empty(jnode_tree_by_reiser4_inode(r4_inode)) &&
  18627. + r4_inode->nr_jnodes == 0;
  18628. +}
  18629. +
  18630. +#endif
  18631. +
  18632. +/* true if directory is empty (only contains dot and dotdot) */
  18633. +/* FIXME: shouldn't it be dir plugin method? */
  18634. +int is_dir_empty(const struct inode *dir)
  18635. +{
  18636. + assert("nikita-1976", dir != NULL);
  18637. +
  18638. + /* rely on our method to maintain directory i_size being equal to the
  18639. + number of entries. */
  18640. + return dir->i_size <= 2 ? 0 : RETERR(-ENOTEMPTY);
  18641. +}
  18642. +
  18643. +/* Make Linus happy.
  18644. + Local variables:
  18645. + c-indentation-style: "K&R"
  18646. + mode-name: "LC"
  18647. + c-basic-offset: 8
  18648. + tab-width: 8
  18649. + fill-column: 120
  18650. + End:
  18651. +*/
  18652. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/inode.h linux-5.16.14/fs/reiser4/inode.h
  18653. --- linux-5.16.14.orig/fs/reiser4/inode.h 1970-01-01 01:00:00.000000000 +0100
  18654. +++ linux-5.16.14/fs/reiser4/inode.h 2022-03-12 13:26:19.653892731 +0100
  18655. @@ -0,0 +1,506 @@
  18656. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  18657. + reiser4/README */
  18658. +
  18659. +/* Inode functions. */
  18660. +
  18661. +#if !defined(__REISER4_INODE_H__)
  18662. +#define __REISER4_INODE_H__
  18663. +
  18664. +#include "forward.h"
  18665. +#include "debug.h"
  18666. +#include "key.h"
  18667. +#include "seal.h"
  18668. +#include "plugin/plugin.h"
  18669. +#include "plugin/file/cryptcompress.h"
  18670. +#include "plugin/file/file.h"
  18671. +#include "plugin/dir/dir.h"
  18672. +#include "plugin/plugin_set.h"
  18673. +#include "plugin/security/perm.h"
  18674. +#include "vfs_ops.h"
  18675. +#include "jnode.h"
  18676. +#include "fsdata.h"
  18677. +
  18678. +#include <linux/types.h> /* for __u?? , ino_t */
  18679. +#include <linux/fs.h> /* for struct super_block, struct
  18680. + * rw_semaphore, etc */
  18681. +#include <linux/spinlock.h>
  18682. +#include <asm/types.h>
  18683. +
  18684. +/* reiser4-specific inode flags. They are "transient" and are not
  18685. + supposed to be stored on disk. Used to trace "state" of
  18686. + inode
  18687. +*/
  18688. +typedef enum {
  18689. + /* this is light-weight inode, inheriting some state from its
  18690. + parent */
  18691. + REISER4_LIGHT_WEIGHT = 0,
  18692. + /* stat data wasn't yet created */
  18693. + REISER4_NO_SD = 1,
  18694. + /* internal immutable flag. Currently is only used
  18695. + to avoid race condition during file creation.
  18696. + See comment in create_object(). */
  18697. + REISER4_IMMUTABLE = 2,
  18698. + /* inode was read from storage */
  18699. + REISER4_LOADED = 3,
  18700. + /* this bit is set for symlinks. inode->i_private points to target
  18701. + name of symlink. */
  18702. + REISER4_GENERIC_PTR_USED = 4,
  18703. + /* set if size of stat-data item for this inode is known. If this is
  18704. + * set we can avoid recalculating size of stat-data on each update. */
  18705. + REISER4_SDLEN_KNOWN = 5,
  18706. + /* reiser4_inode->crypt points to the crypto stat */
  18707. + REISER4_CRYPTO_STAT_LOADED = 6,
  18708. + /* cryptcompress_inode_data points to the secret key */
  18709. + REISER4_SECRET_KEY_INSTALLED = 7,
  18710. + /* File (possibly) has pages corresponding to the tail items, that
  18711. + * were created by ->readpage. It is set by mmap_unix_file() and
  18712. + * sendfile_unix_file(). This bit is inspected by write_unix_file and
  18713. + * kill-hook of tail items. It is never cleared once set. This bit is
  18714. + * modified and inspected under i_mutex. */
  18715. + REISER4_HAS_MMAP = 8,
  18716. + REISER4_PART_MIXED = 9,
  18717. + REISER4_PART_IN_CONV = 10,
  18718. + /* This flag indicates that file plugin conversion is in progress */
  18719. + REISER4_FILE_CONV_IN_PROGRESS = 11
  18720. +} reiser4_file_plugin_flags;
  18721. +
  18722. +/* state associated with each inode.
  18723. + reiser4 inode.
  18724. +
  18725. + NOTE-NIKITA In 2.5 kernels it is not necessary that all file-system inodes
  18726. + be of the same size. File-system allocates inodes by itself through
  18727. + s_op->allocate_inode() method. So, it is possible to adjust size of inode
  18728. + at the time of its creation.
  18729. +
  18730. + Invariants involving parts of this data-type:
  18731. +
  18732. + [inode->eflushed]
  18733. +
  18734. +*/
  18735. +
  18736. +typedef struct reiser4_inode reiser4_inode;
  18737. +/* return pointer to reiser4-specific part of inode */
  18738. +static inline reiser4_inode *reiser4_inode_data(const struct inode *inode
  18739. + /* inode queried */ );
  18740. +
  18741. +#if BITS_PER_LONG == 64
  18742. +
  18743. +#define REISER4_INO_IS_OID (1)
  18744. +typedef struct {;
  18745. +} oid_hi_t;
  18746. +
  18747. +/* BITS_PER_LONG == 64 */
  18748. +#else
  18749. +
  18750. +#define REISER4_INO_IS_OID (0)
  18751. +typedef __u32 oid_hi_t;
  18752. +
  18753. +/* BITS_PER_LONG == 64 */
  18754. +#endif
  18755. +
  18756. +struct reiser4_inode {
  18757. + /* spin lock protecting fields of this structure. */
  18758. + spinlock_t guard;
  18759. + /* main plugin set that control the file
  18760. + (see comments in plugin/plugin_set.c) */
  18761. + plugin_set *pset;
  18762. + /* plugin set for inheritance
  18763. + (see comments in plugin/plugin_set.c) */
  18764. + plugin_set *hset;
  18765. + /* high 32 bits of object id */
  18766. + oid_hi_t oid_hi;
  18767. + /* seal for stat-data */
  18768. + seal_t sd_seal;
  18769. + /* locality id for this file */
  18770. + oid_t locality_id;
  18771. +#if REISER4_LARGE_KEY
  18772. + __u64 ordering;
  18773. +#endif
  18774. + /* coord of stat-data in sealed node */
  18775. + coord_t sd_coord;
  18776. + /* bit-mask of stat-data extentions used by this file */
  18777. + __u64 extmask;
  18778. + /* bitmask of non-default plugins for this inode */
  18779. + __u16 plugin_mask;
  18780. + /* bitmask of set heir plugins for this inode. */
  18781. + __u16 heir_mask;
  18782. + union {
  18783. + struct list_head readdir_list;
  18784. + struct list_head not_used;
  18785. + } lists;
  18786. + /* per-inode flags. Filled by values of reiser4_file_plugin_flags */
  18787. + unsigned long flags;
  18788. + union {
  18789. + /* fields specific to unix_file plugin */
  18790. + struct unix_file_info unix_file_info;
  18791. + /* fields specific to cryptcompress file plugin */
  18792. + struct cryptcompress_info cryptcompress_info;
  18793. + } file_plugin_data;
  18794. +
  18795. + /* this semaphore is to serialize readers and writers of @pset->file
  18796. + * when file plugin conversion is enabled
  18797. + */
  18798. + struct rw_semaphore conv_sem;
  18799. +
  18800. + /* tree of jnodes. Phantom jnodes (ones not attched to any atom) are
  18801. + tagged in that tree by EFLUSH_TAG_ANONYMOUS */
  18802. + struct radix_tree_root jnodes_tree;
  18803. +#if REISER4_DEBUG
  18804. + /* number of unformatted node jnodes of this file in jnode hash table */
  18805. + unsigned long nr_jnodes;
  18806. +#endif
  18807. +
  18808. + /* block number of virtual root for this object. See comment above
  18809. + * fs/reiser4/search.c:handle_vroot() */
  18810. + reiser4_block_nr vroot;
  18811. + struct mutex loading;
  18812. +};
  18813. +
  18814. +void loading_init_once(reiser4_inode *);
  18815. +void loading_alloc(reiser4_inode *);
  18816. +void loading_destroy(reiser4_inode *);
  18817. +
  18818. +struct reiser4_inode_object {
  18819. + /* private part */
  18820. + reiser4_inode p;
  18821. + /* generic fields not specific to reiser4, but used by VFS */
  18822. + struct inode vfs_inode;
  18823. +};
  18824. +
  18825. +/* return pointer to the reiser4 specific portion of @inode */
  18826. +static inline reiser4_inode *reiser4_inode_data(const struct inode *inode
  18827. + /* inode queried */ )
  18828. +{
  18829. + assert("nikita-254", inode != NULL);
  18830. + return &container_of(inode, struct reiser4_inode_object, vfs_inode)->p;
  18831. +}
  18832. +
  18833. +static inline struct inode *inode_by_reiser4_inode(const reiser4_inode *
  18834. + r4_inode /* inode queried */
  18835. + )
  18836. +{
  18837. + return &container_of(r4_inode, struct reiser4_inode_object,
  18838. + p)->vfs_inode;
  18839. +}
  18840. +
  18841. +/*
  18842. + * reiser4 inodes are identified by 64bit object-id (oid_t), but in struct
  18843. + * inode ->i_ino field is of type ino_t (long) that can be either 32 or 64
  18844. + * bits.
  18845. + *
  18846. + * If ->i_ino is 32 bits we store remaining 32 bits in reiser4 specific part
  18847. + * of inode, otherwise whole oid is stored in i_ino.
  18848. + *
  18849. + * Wrappers below ([sg]et_inode_oid()) are used to hide this difference.
  18850. + */
  18851. +
  18852. +#define OID_HI_SHIFT (sizeof(ino_t) * 8)
  18853. +
  18854. +#if REISER4_INO_IS_OID
  18855. +
  18856. +static inline oid_t get_inode_oid(const struct inode *inode)
  18857. +{
  18858. + return inode->i_ino;
  18859. +}
  18860. +
  18861. +static inline void set_inode_oid(struct inode *inode, oid_t oid)
  18862. +{
  18863. + inode->i_ino = oid;
  18864. +}
  18865. +
  18866. +/* REISER4_INO_IS_OID */
  18867. +#else
  18868. +
  18869. +static inline oid_t get_inode_oid(const struct inode *inode)
  18870. +{
  18871. + return
  18872. + ((__u64) reiser4_inode_data(inode)->oid_hi << OID_HI_SHIFT) |
  18873. + inode->i_ino;
  18874. +}
  18875. +
  18876. +static inline void set_inode_oid(struct inode *inode, oid_t oid)
  18877. +{
  18878. + assert("nikita-2519", inode != NULL);
  18879. + inode->i_ino = (ino_t) (oid);
  18880. + reiser4_inode_data(inode)->oid_hi = (oid) >> OID_HI_SHIFT;
  18881. + assert("nikita-2521", get_inode_oid(inode) == (oid));
  18882. +}
  18883. +
  18884. +/* REISER4_INO_IS_OID */
  18885. +#endif
  18886. +
  18887. +static inline oid_t get_inode_locality(const struct inode *inode)
  18888. +{
  18889. + return reiser4_inode_data(inode)->locality_id;
  18890. +}
  18891. +
  18892. +#if REISER4_LARGE_KEY
  18893. +static inline __u64 get_inode_ordering(const struct inode *inode)
  18894. +{
  18895. + return reiser4_inode_data(inode)->ordering;
  18896. +}
  18897. +
  18898. +static inline void set_inode_ordering(const struct inode *inode, __u64 ordering)
  18899. +{
  18900. + reiser4_inode_data(inode)->ordering = ordering;
  18901. +}
  18902. +
  18903. +#else
  18904. +
  18905. +#define get_inode_ordering(inode) (0)
  18906. +#define set_inode_ordering(inode, val) noop
  18907. +
  18908. +#endif
  18909. +
  18910. +/* return inode in which @uf_info is embedded */
  18911. +static inline struct inode *
  18912. +unix_file_info_to_inode(const struct unix_file_info *uf_info)
  18913. +{
  18914. + return &container_of(uf_info, struct reiser4_inode_object,
  18915. + p.file_plugin_data.unix_file_info)->vfs_inode;
  18916. +}
  18917. +
  18918. +extern ino_t oid_to_ino(oid_t oid) __attribute__ ((const));
  18919. +extern ino_t oid_to_uino(oid_t oid) __attribute__ ((const));
  18920. +
  18921. +extern reiser4_tree *reiser4_tree_by_inode(const struct inode *inode);
  18922. +
  18923. +#if REISER4_DEBUG
  18924. +extern void reiser4_inode_invariant(const struct inode *inode);
  18925. +extern int inode_has_no_jnodes(reiser4_inode *);
  18926. +#else
  18927. +#define reiser4_inode_invariant(inode) noop
  18928. +#endif
  18929. +
  18930. +static inline int spin_inode_is_locked(const struct inode *inode)
  18931. +{
  18932. + assert_spin_locked(&reiser4_inode_data(inode)->guard);
  18933. + return 1;
  18934. +}
  18935. +
  18936. +/**
  18937. + * spin_lock_inode - lock reiser4_inode' embedded spinlock
  18938. + * @inode: inode to lock
  18939. + *
  18940. + * In debug mode it checks that lower priority locks are not held and
  18941. + * increments reiser4_context's lock counters on which lock ordering checking
  18942. + * is based.
  18943. + */
  18944. +static inline void spin_lock_inode(struct inode *inode)
  18945. +{
  18946. + assert("", LOCK_CNT_NIL(spin_locked));
  18947. + /* check lock ordering */
  18948. + assert_spin_not_locked(&d_c_lock);
  18949. +
  18950. + spin_lock(&reiser4_inode_data(inode)->guard);
  18951. +
  18952. + LOCK_CNT_INC(spin_locked_inode);
  18953. + LOCK_CNT_INC(spin_locked);
  18954. +
  18955. + reiser4_inode_invariant(inode);
  18956. +}
  18957. +
  18958. +/**
  18959. + * spin_unlock_inode - unlock reiser4_inode' embedded spinlock
  18960. + * @inode: inode to unlock
  18961. + *
  18962. + * In debug mode it checks that spinlock is held and decrements
  18963. + * reiser4_context's lock counters on which lock ordering checking is based.
  18964. + */
  18965. +static inline void spin_unlock_inode(struct inode *inode)
  18966. +{
  18967. + assert_spin_locked(&reiser4_inode_data(inode)->guard);
  18968. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_inode));
  18969. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  18970. +
  18971. + reiser4_inode_invariant(inode);
  18972. +
  18973. + LOCK_CNT_DEC(spin_locked_inode);
  18974. + LOCK_CNT_DEC(spin_locked);
  18975. +
  18976. + spin_unlock(&reiser4_inode_data(inode)->guard);
  18977. +}
  18978. +
  18979. +extern znode *inode_get_vroot(struct inode *inode);
  18980. +extern void inode_set_vroot(struct inode *inode, znode * vroot);
  18981. +
  18982. +extern int reiser4_max_filename_len(const struct inode *inode);
  18983. +extern int max_hash_collisions(const struct inode *dir);
  18984. +extern void reiser4_unlock_inode(struct inode *inode);
  18985. +extern int is_reiser4_inode(const struct inode *inode);
  18986. +extern int setup_inode_ops(struct inode *inode, reiser4_object_create_data *);
  18987. +extern struct inode *reiser4_iget(struct super_block *super,
  18988. + const reiser4_key * key, int silent);
  18989. +extern void reiser4_iget_complete(struct inode *inode);
  18990. +extern void reiser4_inode_set_flag(struct inode *inode,
  18991. + reiser4_file_plugin_flags f);
  18992. +extern void reiser4_inode_clr_flag(struct inode *inode,
  18993. + reiser4_file_plugin_flags f);
  18994. +extern int reiser4_inode_get_flag(const struct inode *inode,
  18995. + reiser4_file_plugin_flags f);
  18996. +
  18997. +/* has inode been initialized? */
  18998. +static inline int
  18999. +is_inode_loaded(const struct inode *inode/* inode queried */)
  19000. +{
  19001. + assert("nikita-1120", inode != NULL);
  19002. + return reiser4_inode_get_flag(inode, REISER4_LOADED);
  19003. +}
  19004. +
  19005. +extern file_plugin *inode_file_plugin(const struct inode *inode);
  19006. +extern dir_plugin *inode_dir_plugin(const struct inode *inode);
  19007. +extern formatting_plugin *inode_formatting_plugin(const struct inode *inode);
  19008. +extern hash_plugin *inode_hash_plugin(const struct inode *inode);
  19009. +extern fibration_plugin *inode_fibration_plugin(const struct inode *inode);
  19010. +extern cipher_plugin *inode_cipher_plugin(const struct inode *inode);
  19011. +extern digest_plugin *inode_digest_plugin(const struct inode *inode);
  19012. +extern compression_plugin *inode_compression_plugin(const struct inode *inode);
  19013. +extern compression_mode_plugin *inode_compression_mode_plugin(const struct inode
  19014. + *inode);
  19015. +extern cluster_plugin *inode_cluster_plugin(const struct inode *inode);
  19016. +extern file_plugin *inode_create_plugin(const struct inode *inode);
  19017. +extern item_plugin *inode_sd_plugin(const struct inode *inode);
  19018. +extern item_plugin *inode_dir_item_plugin(const struct inode *inode);
  19019. +extern file_plugin *child_create_plugin(const struct inode *inode);
  19020. +
  19021. +extern void reiser4_make_bad_inode(struct inode *inode);
  19022. +
  19023. +extern void inode_set_extension(struct inode *inode, sd_ext_bits ext);
  19024. +extern void inode_clr_extension(struct inode *inode, sd_ext_bits ext);
  19025. +extern void inode_check_scale(struct inode *inode, __u64 old, __u64 new);
  19026. +extern void inode_check_scale_nolock(struct inode *inode, __u64 old, __u64 new);
  19027. +
  19028. +#define INODE_SET_SIZE(i, value) \
  19029. +({ \
  19030. + struct inode *__i; \
  19031. + typeof(value) __v; \
  19032. + \
  19033. + __i = (i); \
  19034. + __v = (value); \
  19035. + inode_check_scale(__i, __i->i_size, __v); \
  19036. + i_size_write(__i, __v); \
  19037. +})
  19038. +
  19039. +/*
  19040. + * update field @field in inode @i to contain value @value.
  19041. + */
  19042. +#define INODE_SET_FIELD(i, field, value) \
  19043. +({ \
  19044. + struct inode *__i; \
  19045. + typeof(value) __v; \
  19046. + \
  19047. + __i = (i); \
  19048. + __v = (value); \
  19049. + inode_check_scale(__i, __i->field, __v); \
  19050. + __i->field = __v; \
  19051. +})
  19052. +
  19053. +#define INODE_INC_FIELD(i, field) \
  19054. +({ \
  19055. + struct inode *__i; \
  19056. + \
  19057. + __i = (i); \
  19058. + inode_check_scale(__i, __i->field, __i->field + 1); \
  19059. + ++ __i->field; \
  19060. +})
  19061. +
  19062. +#define INODE_DEC_FIELD(i, field) \
  19063. +({ \
  19064. + struct inode *__i; \
  19065. + \
  19066. + __i = (i); \
  19067. + inode_check_scale(__i, __i->field, __i->field - 1); \
  19068. + -- __i->field; \
  19069. +})
  19070. +
  19071. +/*
  19072. + * Update field i_nlink in inode @i using library function @op.
  19073. + */
  19074. +#define INODE_SET_NLINK(i, value) \
  19075. +({ \
  19076. + struct inode *__i; \
  19077. + typeof(value) __v; \
  19078. + \
  19079. + __i = (i); \
  19080. + __v = (value); \
  19081. + inode_check_scale(__i, __i->i_nlink, __v); \
  19082. + set_nlink(__i, __v); \
  19083. +})
  19084. +
  19085. +#define INODE_INC_NLINK(i) \
  19086. + ({ \
  19087. + struct inode *__i; \
  19088. + \
  19089. + __i = (i); \
  19090. + inode_check_scale(__i, __i->i_nlink, __i->i_nlink + 1); \
  19091. + inc_nlink(__i); \
  19092. +})
  19093. +
  19094. +#define INODE_DROP_NLINK(i) \
  19095. + ({ \
  19096. + struct inode *__i; \
  19097. + \
  19098. + __i = (i); \
  19099. + inode_check_scale(__i, __i->i_nlink, __i->i_nlink - 1); \
  19100. + drop_nlink(__i); \
  19101. +})
  19102. +
  19103. +#define INODE_CLEAR_NLINK(i) \
  19104. + ({ \
  19105. + struct inode *__i; \
  19106. + \
  19107. + __i = (i); \
  19108. + inode_check_scale(__i, __i->i_nlink, 0); \
  19109. + clear_nlink(__i); \
  19110. +})
  19111. +
  19112. +
  19113. +static inline void inode_add_blocks(struct inode *inode, __u64 blocks)
  19114. +{
  19115. + inode_add_bytes(inode, blocks << inode->i_blkbits);
  19116. +}
  19117. +
  19118. +static inline void inode_sub_blocks(struct inode *inode, __u64 blocks)
  19119. +{
  19120. + inode_sub_bytes(inode, blocks << inode->i_blkbits);
  19121. +}
  19122. +
  19123. +
  19124. +/* See comment before reiser4_readdir_common() for description. */
  19125. +static inline struct list_head *get_readdir_list(const struct inode *inode)
  19126. +{
  19127. + return &reiser4_inode_data(inode)->lists.readdir_list;
  19128. +}
  19129. +
  19130. +extern void init_inode_ordering(struct inode *inode,
  19131. + reiser4_object_create_data * crd, int create);
  19132. +
  19133. +static inline struct radix_tree_root *jnode_tree_by_inode(struct inode *inode)
  19134. +{
  19135. + return &reiser4_inode_data(inode)->jnodes_tree;
  19136. +}
  19137. +
  19138. +static inline struct radix_tree_root *jnode_tree_by_reiser4_inode(reiser4_inode
  19139. + *r4_inode)
  19140. +{
  19141. + return &r4_inode->jnodes_tree;
  19142. +}
  19143. +
  19144. +#if REISER4_DEBUG
  19145. +extern void print_inode(const char *prefix, const struct inode *i);
  19146. +#endif
  19147. +
  19148. +int is_dir_empty(const struct inode *);
  19149. +
  19150. +/* __REISER4_INODE_H__ */
  19151. +#endif
  19152. +
  19153. +/* Make Linus happy.
  19154. + Local variables:
  19155. + c-indentation-style: "K&R"
  19156. + mode-name: "LC"
  19157. + c-basic-offset: 8
  19158. + tab-width: 8
  19159. + fill-column: 120
  19160. + End:
  19161. +*/
  19162. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/ioctl.h linux-5.16.14/fs/reiser4/ioctl.h
  19163. --- linux-5.16.14.orig/fs/reiser4/ioctl.h 1970-01-01 01:00:00.000000000 +0100
  19164. +++ linux-5.16.14/fs/reiser4/ioctl.h 2022-03-12 13:26:19.654892733 +0100
  19165. @@ -0,0 +1,41 @@
  19166. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  19167. + * reiser4/README */
  19168. +
  19169. +#if !defined(__REISER4_IOCTL_H__)
  19170. +#define __REISER4_IOCTL_H__
  19171. +
  19172. +#include <linux/fs.h>
  19173. +
  19174. +/*
  19175. + * ioctl(2) command used to "unpack" reiser4 file, that is, convert it into
  19176. + * extents and fix in this state. This is used by applications that rely on
  19177. + *
  19178. + * . files being block aligned, and
  19179. + *
  19180. + * . files never migrating on disk
  19181. + *
  19182. + * for example, boot loaders (LILO) need this.
  19183. + *
  19184. + * This ioctl should be used as
  19185. + *
  19186. + * result = ioctl(fd, REISER4_IOC_UNPACK);
  19187. + *
  19188. + * File behind fd descriptor will be converted to the extents (if necessary),
  19189. + * and its stat-data will be updated so that it will never be converted back
  19190. + * into tails again.
  19191. + */
  19192. +#define REISER4_IOC_UNPACK _IOW(0xCD, 1, long)
  19193. +
  19194. +/* __REISER4_IOCTL_H__ */
  19195. +#endif
  19196. +
  19197. +/* Make Linus happy.
  19198. + Local variables:
  19199. + c-indentation-style: "K&R"
  19200. + mode-name: "LC"
  19201. + c-basic-offset: 8
  19202. + tab-width: 8
  19203. + fill-column: 120
  19204. + scroll-step: 1
  19205. + End:
  19206. +*/
  19207. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/jnode.c linux-5.16.14/fs/reiser4/jnode.c
  19208. --- linux-5.16.14.orig/fs/reiser4/jnode.c 1970-01-01 01:00:00.000000000 +0100
  19209. +++ linux-5.16.14/fs/reiser4/jnode.c 2022-03-12 13:26:19.655892736 +0100
  19210. @@ -0,0 +1,1906 @@
  19211. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  19212. + * reiser4/README */
  19213. +/* Jnode manipulation functions. */
  19214. +/* Jnode is entity used to track blocks with data and meta-data in reiser4.
  19215. +
  19216. + In particular, jnodes are used to track transactional information
  19217. + associated with each block. Each znode contains jnode as ->zjnode field.
  19218. +
  19219. + Jnode stands for either Josh or Journal node.
  19220. +*/
  19221. +
  19222. +/*
  19223. + * Taxonomy.
  19224. + *
  19225. + * Jnode represents block containing data or meta-data. There are jnodes
  19226. + * for:
  19227. + *
  19228. + * unformatted blocks (jnodes proper). There are plans, however to
  19229. + * have a handle per extent unit rather than per each unformatted
  19230. + * block, because there are so many of them.
  19231. + *
  19232. + * For bitmaps. Each bitmap is actually represented by two jnodes--one
  19233. + * for working and another for "commit" data, together forming bnode.
  19234. + *
  19235. + * For io-heads. These are used by log writer.
  19236. + *
  19237. + * For formatted nodes (znode). See comment at the top of znode.c for
  19238. + * details specific to the formatted nodes (znodes).
  19239. + *
  19240. + * Node data.
  19241. + *
  19242. + * Jnode provides access to the data of node it represents. Data are
  19243. + * stored in a page. Page is kept in a page cache. This means, that jnodes
  19244. + * are highly interconnected with page cache and VM internals.
  19245. + *
  19246. + * jnode has a pointer to page (->pg) containing its data. Pointer to data
  19247. + * themselves is cached in ->data field to avoid frequent calls to
  19248. + * page_address().
  19249. + *
  19250. + * jnode and page are attached to each other by jnode_attach_page(). This
  19251. + * function places pointer to jnode in set_page_private(), sets PG_private
  19252. + * flag and increments page counter.
  19253. + *
  19254. + * Opposite operation is performed by page_clear_jnode().
  19255. + *
  19256. + * jnode->pg is protected by jnode spin lock, and page->private is
  19257. + * protected by page lock. See comment at the top of page_cache.c for
  19258. + * more.
  19259. + *
  19260. + * page can be detached from jnode for two reasons:
  19261. + *
  19262. + * . jnode is removed from a tree (file is truncated, of formatted
  19263. + * node is removed by balancing).
  19264. + *
  19265. + * . during memory pressure, VM calls ->releasepage() method
  19266. + * (reiser4_releasepage()) to evict page from memory.
  19267. + *
  19268. + * (there, of course, is also umount, but this is special case we are not
  19269. + * concerned with here).
  19270. + *
  19271. + * To protect jnode page from eviction, one calls jload() function that
  19272. + * "pins" page in memory (loading it if necessary), increments
  19273. + * jnode->d_count, and kmap()s page. Page is unpinned through call to
  19274. + * jrelse().
  19275. + *
  19276. + * Jnode life cycle.
  19277. + *
  19278. + * jnode is created, placed in hash table, and, optionally, in per-inode
  19279. + * radix tree. Page can be attached to jnode, pinned, released, etc.
  19280. + *
  19281. + * When jnode is captured into atom its reference counter is
  19282. + * increased. While being part of an atom, jnode can be "early
  19283. + * flushed". This means that as part of flush procedure, jnode is placed
  19284. + * into "relocate set", and its page is submitted to the disk. After io
  19285. + * completes, page can be detached, then loaded again, re-dirtied, etc.
  19286. + *
  19287. + * Thread acquired reference to jnode by calling jref() and releases it by
  19288. + * jput(). When last reference is removed, jnode is still retained in
  19289. + * memory (cached) if it has page attached, _unless_ it is scheduled for
  19290. + * destruction (has JNODE_HEARD_BANSHEE bit set).
  19291. + *
  19292. + * Tree read-write lock was used as "existential" lock for jnodes. That is,
  19293. + * jnode->x_count could be changed from 0 to 1 only under tree write lock,
  19294. + * that is, tree lock protected unreferenced jnodes stored in the hash
  19295. + * table, from recycling.
  19296. + *
  19297. + * This resulted in high contention on tree lock, because jref()/jput() is
  19298. + * frequent operation. To ameliorate this problem, RCU is used: when jput()
  19299. + * is just about to release last reference on jnode it sets JNODE_RIP bit
  19300. + * on it, and then proceed with jnode destruction (removing jnode from hash
  19301. + * table, cbk_cache, detaching page, etc.). All places that change jnode
  19302. + * reference counter from 0 to 1 (jlookup(), zlook(), zget(), and
  19303. + * cbk_cache_scan_slots()) check for JNODE_RIP bit (this is done by
  19304. + * jnode_rip_check() function), and pretend that nothing was found in hash
  19305. + * table if bit is set.
  19306. + *
  19307. + * jput defers actual return of jnode into slab cache to some later time
  19308. + * (by call_rcu()), this guarantees that other threads can safely continue
  19309. + * working with JNODE_RIP-ped jnode.
  19310. + *
  19311. + */
  19312. +
  19313. +#include "reiser4.h"
  19314. +#include "debug.h"
  19315. +#include "dformat.h"
  19316. +#include "jnode.h"
  19317. +#include "plugin/plugin_header.h"
  19318. +#include "plugin/plugin.h"
  19319. +#include "txnmgr.h"
  19320. +/*#include "jnode.h"*/
  19321. +#include "znode.h"
  19322. +#include "tree.h"
  19323. +#include "tree_walk.h"
  19324. +#include "super.h"
  19325. +#include "inode.h"
  19326. +#include "page_cache.h"
  19327. +
  19328. +#include <asm/uaccess.h> /* UML needs this for PAGE_OFFSET */
  19329. +#include <linux/types.h>
  19330. +#include <linux/slab.h>
  19331. +#include <linux/pagemap.h>
  19332. +#include <linux/swap.h>
  19333. +#include <linux/fs.h> /* for struct address_space */
  19334. +#include <linux/writeback.h> /* for inode_wb_list_lock */
  19335. +
  19336. +static struct kmem_cache *_jnode_slab = NULL;
  19337. +
  19338. +static void jnode_set_type(jnode * node, jnode_type type);
  19339. +static int jdelete(jnode * node);
  19340. +static int jnode_try_drop(jnode * node);
  19341. +
  19342. +#if REISER4_DEBUG
  19343. +static int jnode_invariant(jnode * node, int tlocked, int jlocked);
  19344. +#endif
  19345. +
  19346. +/* true if valid page is attached to jnode */
  19347. +static inline int jnode_is_parsed(jnode * node)
  19348. +{
  19349. + return JF_ISSET(node, JNODE_PARSED);
  19350. +}
  19351. +
  19352. +/* hash table support */
  19353. +
  19354. +/* compare two jnode keys for equality. Used by hash-table macros */
  19355. +static inline int jnode_key_eq(const struct jnode_key *k1,
  19356. + const struct jnode_key *k2)
  19357. +{
  19358. + assert("nikita-2350", k1 != NULL);
  19359. + assert("nikita-2351", k2 != NULL);
  19360. +
  19361. + return (k1->index == k2->index && k1->objectid == k2->objectid);
  19362. +}
  19363. +
  19364. +/* Hash jnode by its key (inode plus offset). Used by hash-table macros */
  19365. +static inline __u32 jnode_key_hashfn(j_hash_table * table,
  19366. + const struct jnode_key *key)
  19367. +{
  19368. + assert("nikita-2352", key != NULL);
  19369. + assert("nikita-3346", IS_POW(table->_buckets));
  19370. +
  19371. + /* yes, this is remarkable simply (where not stupid) hash function. */
  19372. + return (key->objectid + key->index) & (table->_buckets - 1);
  19373. +}
  19374. +
  19375. +/* The hash table definition */
  19376. +#define KMALLOC(size) reiser4_vmalloc(size)
  19377. +#define KFREE(ptr, size) vfree(ptr)
  19378. +TYPE_SAFE_HASH_DEFINE(j, jnode, struct jnode_key, key.j, link.j,
  19379. + jnode_key_hashfn, jnode_key_eq);
  19380. +#undef KFREE
  19381. +#undef KMALLOC
  19382. +
  19383. +/* call this to initialise jnode hash table */
  19384. +int jnodes_tree_init(reiser4_tree * tree/* tree to initialise jnodes for */)
  19385. +{
  19386. + assert("nikita-2359", tree != NULL);
  19387. + return j_hash_init(&tree->jhash_table, 16384);
  19388. +}
  19389. +
  19390. +/* call this to destroy jnode hash table. This is called during umount. */
  19391. +int jnodes_tree_done(reiser4_tree * tree/* tree to destroy jnodes for */)
  19392. +{
  19393. + j_hash_table *jtable;
  19394. + jnode *node;
  19395. + jnode *next;
  19396. +
  19397. + assert("nikita-2360", tree != NULL);
  19398. +
  19399. + /*
  19400. + * Scan hash table and free all jnodes.
  19401. + */
  19402. + jtable = &tree->jhash_table;
  19403. + if (jtable->_table) {
  19404. + for_all_in_htable(jtable, j, node, next) {
  19405. + assert("nikita-2361", !atomic_read(&node->x_count));
  19406. + jdrop(node);
  19407. + }
  19408. +
  19409. + j_hash_done(&tree->jhash_table);
  19410. + }
  19411. + return 0;
  19412. +}
  19413. +
  19414. +/**
  19415. + * init_jnodes - create jnode cache
  19416. + *
  19417. + * Initializes slab cache jnodes. It is part of reiser4 module initialization.
  19418. + */
  19419. +int init_jnodes(void)
  19420. +{
  19421. + assert("umka-168", _jnode_slab == NULL);
  19422. +
  19423. + _jnode_slab = kmem_cache_create("jnode", sizeof(jnode), 0,
  19424. + SLAB_HWCACHE_ALIGN |
  19425. + SLAB_RECLAIM_ACCOUNT, NULL);
  19426. + if (_jnode_slab == NULL)
  19427. + return RETERR(-ENOMEM);
  19428. +
  19429. + return 0;
  19430. +}
  19431. +
  19432. +/**
  19433. + * done_znodes - delete znode cache
  19434. + *
  19435. + * This is called on reiser4 module unloading or system shutdown.
  19436. + */
  19437. +void done_jnodes(void)
  19438. +{
  19439. + destroy_reiser4_cache(&_jnode_slab);
  19440. +}
  19441. +
  19442. +/* Initialize a jnode. */
  19443. +void jnode_init(jnode * node, reiser4_tree * tree, jnode_type type)
  19444. +{
  19445. + memset(node, 0, sizeof(jnode));
  19446. + ON_DEBUG(node->magic = JMAGIC);
  19447. + jnode_set_type(node, type);
  19448. + atomic_set(&node->d_count, 0);
  19449. + atomic_set(&node->x_count, 0);
  19450. + spin_lock_init(&node->guard);
  19451. + spin_lock_init(&node->load);
  19452. + node->atom = NULL;
  19453. + node->tree = tree;
  19454. + INIT_LIST_HEAD(&node->capture_link);
  19455. +
  19456. + ASSIGN_NODE_LIST(node, NOT_CAPTURED);
  19457. +
  19458. +#if REISER4_DEBUG
  19459. + {
  19460. + reiser4_super_info_data *sbinfo;
  19461. +
  19462. + sbinfo = get_super_private(tree->super);
  19463. + spin_lock_irq(&sbinfo->all_guard);
  19464. + list_add(&node->jnodes, &sbinfo->all_jnodes);
  19465. + spin_unlock_irq(&sbinfo->all_guard);
  19466. + }
  19467. +#endif
  19468. +}
  19469. +
  19470. +#if REISER4_DEBUG
  19471. +/*
  19472. + * Remove jnode from ->all_jnodes list.
  19473. + */
  19474. +static void jnode_done(jnode * node, reiser4_tree * tree)
  19475. +{
  19476. + reiser4_super_info_data *sbinfo;
  19477. +
  19478. + sbinfo = get_super_private(tree->super);
  19479. +
  19480. + spin_lock_irq(&sbinfo->all_guard);
  19481. + assert("nikita-2422", !list_empty(&node->jnodes));
  19482. + list_del_init(&node->jnodes);
  19483. + spin_unlock_irq(&sbinfo->all_guard);
  19484. +}
  19485. +#endif
  19486. +
  19487. +/* return already existing jnode of page */
  19488. +jnode *jnode_by_page(struct page *pg)
  19489. +{
  19490. + assert("nikita-2400", PageLocked(pg));
  19491. + assert("nikita-2068", PagePrivate(pg));
  19492. + assert("nikita-2067", jprivate(pg) != NULL);
  19493. + return jprivate(pg);
  19494. +}
  19495. +
  19496. +/* exported functions to allocate/free jnode objects outside this file */
  19497. +jnode *jalloc(void)
  19498. +{
  19499. + jnode *jal = kmem_cache_alloc(_jnode_slab, reiser4_ctx_gfp_mask_get());
  19500. + return jal;
  19501. +}
  19502. +
  19503. +/* return jnode back to the slab allocator */
  19504. +inline void jfree(jnode * node)
  19505. +{
  19506. + assert("nikita-2663", (list_empty_careful(&node->capture_link) &&
  19507. + NODE_LIST(node) == NOT_CAPTURED));
  19508. + assert("nikita-3222", list_empty(&node->jnodes));
  19509. + assert("nikita-3221", jnode_page(node) == NULL);
  19510. +
  19511. + /* not yet phash_jnode_destroy(node); */
  19512. +
  19513. + kmem_cache_free(_jnode_slab, node);
  19514. +}
  19515. +
  19516. +/*
  19517. + * This function is supplied as RCU callback. It actually frees jnode when
  19518. + * last reference to it is gone.
  19519. + */
  19520. +static void jnode_free_actor(struct rcu_head *head)
  19521. +{
  19522. + jnode *node;
  19523. + jnode_type jtype;
  19524. +
  19525. + node = container_of(head, jnode, rcu);
  19526. + jtype = jnode_get_type(node);
  19527. +
  19528. + ON_DEBUG(jnode_done(node, jnode_get_tree(node)));
  19529. +
  19530. + switch (jtype) {
  19531. + case JNODE_IO_HEAD:
  19532. + case JNODE_BITMAP:
  19533. + case JNODE_UNFORMATTED_BLOCK:
  19534. + jfree(node);
  19535. + break;
  19536. + case JNODE_FORMATTED_BLOCK:
  19537. + zfree(JZNODE(node));
  19538. + break;
  19539. + case JNODE_INODE:
  19540. + default:
  19541. + wrong_return_value("nikita-3197", "Wrong jnode type");
  19542. + }
  19543. +}
  19544. +
  19545. +/*
  19546. + * Free a jnode. Post a callback to be executed later through RCU when all
  19547. + * references to @node are released.
  19548. + */
  19549. +static inline void jnode_free(jnode * node, jnode_type jtype)
  19550. +{
  19551. + if (jtype != JNODE_INODE) {
  19552. + /*assert("nikita-3219", list_empty(&node->rcu.list)); */
  19553. + call_rcu(&node->rcu, jnode_free_actor);
  19554. + } else
  19555. + jnode_list_remove(node);
  19556. +}
  19557. +
  19558. +/* allocate new unformatted jnode */
  19559. +static jnode *jnew_unformatted(void)
  19560. +{
  19561. + jnode *jal;
  19562. +
  19563. + jal = jalloc();
  19564. + if (jal == NULL)
  19565. + return NULL;
  19566. +
  19567. + jnode_init(jal, current_tree, JNODE_UNFORMATTED_BLOCK);
  19568. + jal->key.j.mapping = NULL;
  19569. + jal->key.j.index = (unsigned long)-1;
  19570. + jal->key.j.objectid = 0;
  19571. + return jal;
  19572. +}
  19573. +
  19574. +/* look for jnode with given mapping and offset within hash table */
  19575. +jnode *jlookup(reiser4_tree * tree, oid_t objectid, unsigned long index)
  19576. +{
  19577. + struct jnode_key jkey;
  19578. + jnode *node;
  19579. +
  19580. + jkey.objectid = objectid;
  19581. + jkey.index = index;
  19582. +
  19583. + /*
  19584. + * hash table is _not_ protected by any lock during lookups. All we
  19585. + * have to do is to disable preemption to keep RCU happy.
  19586. + */
  19587. +
  19588. + rcu_read_lock();
  19589. + node = j_hash_find(&tree->jhash_table, &jkey);
  19590. + if (node != NULL) {
  19591. + /* protect @node from recycling */
  19592. + jref(node);
  19593. + assert("nikita-2955", jnode_invariant(node, 0, 0));
  19594. + node = jnode_rip_check(tree, node);
  19595. + }
  19596. + rcu_read_unlock();
  19597. + return node;
  19598. +}
  19599. +
  19600. +/* per inode radix tree of jnodes is protected by tree's read write spin lock */
  19601. +static jnode *jfind_nolock(struct address_space *mapping, unsigned long index)
  19602. +{
  19603. + assert("vs-1694", mapping->host != NULL);
  19604. +
  19605. + return radix_tree_lookup(jnode_tree_by_inode(mapping->host), index);
  19606. +}
  19607. +
  19608. +jnode *jfind(struct address_space *mapping, unsigned long index)
  19609. +{
  19610. + reiser4_tree *tree;
  19611. + jnode *node;
  19612. +
  19613. + assert("vs-1694", mapping->host != NULL);
  19614. + tree = reiser4_tree_by_inode(mapping->host);
  19615. +
  19616. + read_lock_tree(tree);
  19617. + node = jfind_nolock(mapping, index);
  19618. + if (node != NULL)
  19619. + jref(node);
  19620. + read_unlock_tree(tree);
  19621. + return node;
  19622. +}
  19623. +
  19624. +static void inode_attach_jnode(jnode * node)
  19625. +{
  19626. + struct inode *inode;
  19627. + reiser4_inode *info;
  19628. + struct radix_tree_root *rtree;
  19629. +
  19630. + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
  19631. + assert("zam-1043", node->key.j.mapping != NULL);
  19632. + inode = node->key.j.mapping->host;
  19633. + info = reiser4_inode_data(inode);
  19634. + rtree = jnode_tree_by_reiser4_inode(info);
  19635. + if (radix_tree_empty(rtree)) {
  19636. + /* prevent inode from being pruned when it has jnodes attached
  19637. + to it */
  19638. + xa_lock_irq(&inode->i_data.i_pages);
  19639. + inode->i_data.nrpages++;
  19640. + xa_unlock_irq(&inode->i_data.i_pages);
  19641. + }
  19642. + assert("zam-1049",
  19643. + equi(!radix_tree_empty(rtree), info->nr_jnodes != 0));
  19644. + check_me("zam-1045",
  19645. + !radix_tree_insert(rtree, node->key.j.index, node));
  19646. + ON_DEBUG(info->nr_jnodes++);
  19647. +}
  19648. +
  19649. +static void inode_detach_jnode(jnode * node)
  19650. +{
  19651. + struct inode *inode;
  19652. + reiser4_inode *info;
  19653. + struct radix_tree_root *rtree;
  19654. +
  19655. + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
  19656. + assert("zam-1044", node->key.j.mapping != NULL);
  19657. + inode = node->key.j.mapping->host;
  19658. + info = reiser4_inode_data(inode);
  19659. + rtree = jnode_tree_by_reiser4_inode(info);
  19660. +
  19661. + assert("zam-1051", info->nr_jnodes != 0);
  19662. + assert("zam-1052", !radix_tree_empty(rtree));
  19663. + ON_DEBUG(info->nr_jnodes--);
  19664. +
  19665. + /* delete jnode from inode's radix tree of jnodes */
  19666. + check_me("zam-1046", radix_tree_delete(rtree, node->key.j.index));
  19667. + if (radix_tree_empty(rtree)) {
  19668. + /* inode can be pruned now */
  19669. + xa_lock_irq(&inode->i_data.i_pages);
  19670. + inode->i_data.nrpages--;
  19671. + xa_unlock_irq(&inode->i_data.i_pages);
  19672. + }
  19673. +}
  19674. +
  19675. +/* put jnode into hash table (where they can be found by flush who does not know
  19676. + mapping) and to inode's tree of jnodes (where they can be found (hopefully
  19677. + faster) in places where mapping is known). Currently it is used by
  19678. + fs/reiser4/plugin/item/extent_file_ops.c:index_extent_jnode when new jnode is
  19679. + created */
  19680. +static void
  19681. +hash_unformatted_jnode(jnode * node, struct address_space *mapping,
  19682. + unsigned long index)
  19683. +{
  19684. + j_hash_table *jtable;
  19685. +
  19686. + assert("vs-1446", jnode_is_unformatted(node));
  19687. + assert("vs-1442", node->key.j.mapping == 0);
  19688. + assert("vs-1443", node->key.j.objectid == 0);
  19689. + assert("vs-1444", node->key.j.index == (unsigned long)-1);
  19690. + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
  19691. +
  19692. + node->key.j.mapping = mapping;
  19693. + node->key.j.objectid = get_inode_oid(mapping->host);
  19694. + node->key.j.index = index;
  19695. +
  19696. + jtable = &jnode_get_tree(node)->jhash_table;
  19697. +
  19698. + /* race with some other thread inserting jnode into the hash table is
  19699. + * impossible, because we keep the page lock. */
  19700. + /*
  19701. + * following assertion no longer holds because of RCU: it is possible
  19702. + * jnode is in the hash table, but with JNODE_RIP bit set.
  19703. + */
  19704. + /* assert("nikita-3211", j_hash_find(jtable, &node->key.j) == NULL); */
  19705. + j_hash_insert_rcu(jtable, node);
  19706. + inode_attach_jnode(node);
  19707. +}
  19708. +
  19709. +static void unhash_unformatted_node_nolock(jnode * node)
  19710. +{
  19711. + assert("vs-1683", node->key.j.mapping != NULL);
  19712. + assert("vs-1684",
  19713. + node->key.j.objectid ==
  19714. + get_inode_oid(node->key.j.mapping->host));
  19715. +
  19716. + /* remove jnode from hash-table */
  19717. + j_hash_remove_rcu(&node->tree->jhash_table, node);
  19718. + inode_detach_jnode(node);
  19719. + node->key.j.mapping = NULL;
  19720. + node->key.j.index = (unsigned long)-1;
  19721. + node->key.j.objectid = 0;
  19722. +
  19723. +}
  19724. +
  19725. +/* remove jnode from hash table and from inode's tree of jnodes. This is used in
  19726. + reiser4_invalidatepage and in kill_hook_extent -> truncate_inode_jnodes ->
  19727. + reiser4_uncapture_jnode */
  19728. +void unhash_unformatted_jnode(jnode * node)
  19729. +{
  19730. + assert("vs-1445", jnode_is_unformatted(node));
  19731. +
  19732. + write_lock_tree(node->tree);
  19733. + unhash_unformatted_node_nolock(node);
  19734. + write_unlock_tree(node->tree);
  19735. +}
  19736. +
  19737. +/*
  19738. + * search hash table for a jnode with given oid and index. If not found,
  19739. + * allocate new jnode, insert it, and also insert into radix tree for the
  19740. + * given inode/mapping.
  19741. + */
  19742. +static jnode *find_get_jnode(reiser4_tree * tree,
  19743. + struct address_space *mapping,
  19744. + oid_t oid, unsigned long index)
  19745. +{
  19746. + jnode *result;
  19747. + jnode *shadow;
  19748. + int preload;
  19749. +
  19750. + result = jnew_unformatted();
  19751. +
  19752. + if (unlikely(result == NULL))
  19753. + return ERR_PTR(RETERR(-ENOMEM));
  19754. +
  19755. + preload = radix_tree_preload(reiser4_ctx_gfp_mask_get());
  19756. + if (preload != 0)
  19757. + return ERR_PTR(preload);
  19758. +
  19759. + write_lock_tree(tree);
  19760. + shadow = jfind_nolock(mapping, index);
  19761. + if (likely(shadow == NULL)) {
  19762. + /* add new jnode to hash table and inode's radix tree of
  19763. + * jnodes */
  19764. + jref(result);
  19765. + hash_unformatted_jnode(result, mapping, index);
  19766. + } else {
  19767. + /* jnode is found in inode's radix tree of jnodes */
  19768. + jref(shadow);
  19769. + jnode_free(result, JNODE_UNFORMATTED_BLOCK);
  19770. + assert("vs-1498", shadow->key.j.mapping == mapping);
  19771. + result = shadow;
  19772. + }
  19773. + write_unlock_tree(tree);
  19774. +
  19775. + assert("nikita-2955",
  19776. + ergo(result != NULL, jnode_invariant(result, 0, 0)));
  19777. + radix_tree_preload_end();
  19778. + return result;
  19779. +}
  19780. +
  19781. +/* jget() (a la zget() but for unformatted nodes). Returns (and possibly
  19782. + creates) jnode corresponding to page @pg. jnode is attached to page and
  19783. + inserted into jnode hash-table. */
  19784. +static jnode *do_jget(reiser4_tree * tree, struct page *pg)
  19785. +{
  19786. + /*
  19787. + * There are two ways to create jnode: starting with pre-existing page
  19788. + * and without page.
  19789. + *
  19790. + * When page already exists, jnode is created
  19791. + * (jnode_of_page()->do_jget()) under page lock. This is done in
  19792. + * ->writepage(), or when capturing anonymous page dirtied through
  19793. + * mmap.
  19794. + *
  19795. + * Jnode without page is created by index_extent_jnode().
  19796. + *
  19797. + */
  19798. +
  19799. + jnode *result;
  19800. + oid_t oid = get_inode_oid(pg->mapping->host);
  19801. +
  19802. + assert("umka-176", pg != NULL);
  19803. + assert("nikita-2394", PageLocked(pg));
  19804. +
  19805. + result = jprivate(pg);
  19806. + if (likely(result != NULL))
  19807. + return jref(result);
  19808. +
  19809. + tree = reiser4_tree_by_page(pg);
  19810. +
  19811. + /* check hash-table first */
  19812. + result = jfind(pg->mapping, pg->index);
  19813. + if (unlikely(result != NULL)) {
  19814. + spin_lock_jnode(result);
  19815. + jnode_attach_page(result, pg);
  19816. + spin_unlock_jnode(result);
  19817. + result->key.j.mapping = pg->mapping;
  19818. + return result;
  19819. + }
  19820. +
  19821. + /* since page is locked, jnode should be allocated with GFP_NOFS flag */
  19822. + reiser4_ctx_gfp_mask_force(GFP_NOFS);
  19823. + result = find_get_jnode(tree, pg->mapping, oid, pg->index);
  19824. + if (unlikely(IS_ERR(result)))
  19825. + return result;
  19826. + /* attach jnode to page */
  19827. + spin_lock_jnode(result);
  19828. + jnode_attach_page(result, pg);
  19829. + spin_unlock_jnode(result);
  19830. + return result;
  19831. +}
  19832. +
  19833. +/*
  19834. + * return jnode for @pg, creating it if necessary.
  19835. + */
  19836. +jnode *jnode_of_page(struct page *pg)
  19837. +{
  19838. + jnode *result;
  19839. +
  19840. + assert("nikita-2394", PageLocked(pg));
  19841. +
  19842. + result = do_jget(reiser4_tree_by_page(pg), pg);
  19843. +
  19844. + if (REISER4_DEBUG && !IS_ERR(result)) {
  19845. + assert("nikita-3210", result == jprivate(pg));
  19846. + assert("nikita-2046", jnode_page(jprivate(pg)) == pg);
  19847. + if (jnode_is_unformatted(jprivate(pg))) {
  19848. + assert("nikita-2364",
  19849. + jprivate(pg)->key.j.index == pg->index);
  19850. + assert("nikita-2367",
  19851. + jprivate(pg)->key.j.mapping == pg->mapping);
  19852. + assert("nikita-2365",
  19853. + jprivate(pg)->key.j.objectid ==
  19854. + get_inode_oid(pg->mapping->host));
  19855. + assert("vs-1200",
  19856. + jprivate(pg)->key.j.objectid ==
  19857. + pg->mapping->host->i_ino);
  19858. + assert("nikita-2356",
  19859. + jnode_is_unformatted(jnode_by_page(pg)));
  19860. + }
  19861. + assert("nikita-2956", jnode_invariant(jprivate(pg), 0, 0));
  19862. + }
  19863. + return result;
  19864. +}
  19865. +
  19866. +/* attach page to jnode: set ->pg pointer in jnode, and ->private one in the
  19867. + * page.*/
  19868. +void jnode_attach_page(jnode * node, struct page *pg)
  19869. +{
  19870. + assert("nikita-2060", node != NULL);
  19871. + assert("nikita-2061", pg != NULL);
  19872. +
  19873. + assert("nikita-2050", jprivate(pg) == 0ul);
  19874. + assert("nikita-2393", !PagePrivate(pg));
  19875. + assert("vs-1741", node->pg == NULL);
  19876. +
  19877. + assert("nikita-2396", PageLocked(pg));
  19878. + assert_spin_locked(&(node->guard));
  19879. +
  19880. + get_page(pg);
  19881. + set_page_private(pg, (unsigned long)node);
  19882. + node->pg = pg;
  19883. + SetPagePrivate(pg);
  19884. +}
  19885. +
  19886. +/* Dual to jnode_attach_page: break a binding between page and jnode */
  19887. +void page_clear_jnode(struct page *page, jnode * node)
  19888. +{
  19889. + assert("nikita-2425", PageLocked(page));
  19890. + assert_spin_locked(&(node->guard));
  19891. + assert("nikita-2428", PagePrivate(page));
  19892. +
  19893. + assert("nikita-3551", !PageWriteback(page));
  19894. +
  19895. + JF_CLR(node, JNODE_PARSED);
  19896. + set_page_private(page, 0ul);
  19897. + ClearPagePrivate(page);
  19898. + node->pg = NULL;
  19899. + put_page(page);
  19900. +}
  19901. +
  19902. +#if 0
  19903. +/* it is only used in one place to handle error */
  19904. +void
  19905. +page_detach_jnode(struct page *page, struct address_space *mapping,
  19906. + unsigned long index)
  19907. +{
  19908. + assert("nikita-2395", page != NULL);
  19909. +
  19910. + lock_page(page);
  19911. + if ((page->mapping == mapping) && (page->index == index)
  19912. + && PagePrivate(page)) {
  19913. + jnode *node;
  19914. +
  19915. + node = jprivate(page);
  19916. + spin_lock_jnode(node);
  19917. + page_clear_jnode(page, node);
  19918. + spin_unlock_jnode(node);
  19919. + }
  19920. + unlock_page(page);
  19921. +}
  19922. +#endif /* 0 */
  19923. +
  19924. +/* return @node page locked.
  19925. +
  19926. + Locking ordering requires that one first takes page lock and afterwards
  19927. + spin lock on node attached to this page. Sometimes it is necessary to go in
  19928. + the opposite direction. This is done through standard trylock-and-release
  19929. + loop.
  19930. +*/
  19931. +static struct page *jnode_lock_page(jnode * node)
  19932. +{
  19933. + struct page *page;
  19934. +
  19935. + assert("nikita-2052", node != NULL);
  19936. + assert("nikita-2401", LOCK_CNT_NIL(spin_locked_jnode));
  19937. +
  19938. + while (1) {
  19939. +
  19940. + spin_lock_jnode(node);
  19941. + page = jnode_page(node);
  19942. + if (page == NULL)
  19943. + break;
  19944. +
  19945. + /* no need to get_page( page ) here, because page cannot
  19946. + be evicted from memory without detaching it from jnode and
  19947. + this requires spin lock on jnode that we already hold.
  19948. + */
  19949. + if (trylock_page(page)) {
  19950. + /* We won a lock on jnode page, proceed. */
  19951. + break;
  19952. + }
  19953. +
  19954. + /* Page is locked by someone else. */
  19955. + get_page(page);
  19956. + spin_unlock_jnode(node);
  19957. + wait_on_page_locked(page);
  19958. + /* it is possible that page was detached from jnode and
  19959. + returned to the free pool, or re-assigned while we were
  19960. + waiting on locked bit. This will be rechecked on the next
  19961. + loop iteration.
  19962. + */
  19963. + put_page(page);
  19964. +
  19965. + /* try again */
  19966. + }
  19967. + return page;
  19968. +}
  19969. +
  19970. +/*
  19971. + * is JNODE_PARSED bit is not set, call ->parse() method of jnode, to verify
  19972. + * validness of jnode content.
  19973. + */
  19974. +static inline int jparse(jnode * node)
  19975. +{
  19976. + int result;
  19977. +
  19978. + assert("nikita-2466", node != NULL);
  19979. +
  19980. + spin_lock_jnode(node);
  19981. + if (likely(!jnode_is_parsed(node))) {
  19982. + result = jnode_ops(node)->parse(node);
  19983. + if (likely(result == 0))
  19984. + JF_SET(node, JNODE_PARSED);
  19985. + } else
  19986. + result = 0;
  19987. + spin_unlock_jnode(node);
  19988. + return result;
  19989. +}
  19990. +
  19991. +/* Lock a page attached to jnode, create and attach page to jnode if it had no
  19992. + * one. */
  19993. +static struct page *jnode_get_page_locked(jnode * node, gfp_t gfp_flags)
  19994. +{
  19995. + struct page *page;
  19996. +
  19997. + spin_lock_jnode(node);
  19998. + page = jnode_page(node);
  19999. +
  20000. + if (page == NULL) {
  20001. + spin_unlock_jnode(node);
  20002. + page = find_or_create_page(jnode_get_mapping(node),
  20003. + jnode_get_index(node), gfp_flags);
  20004. + if (page == NULL)
  20005. + return ERR_PTR(RETERR(-ENOMEM));
  20006. + } else {
  20007. + if (trylock_page(page)) {
  20008. + spin_unlock_jnode(node);
  20009. + return page;
  20010. + }
  20011. + get_page(page);
  20012. + spin_unlock_jnode(node);
  20013. + lock_page(page);
  20014. + assert("nikita-3134", page->mapping == jnode_get_mapping(node));
  20015. + }
  20016. +
  20017. + spin_lock_jnode(node);
  20018. + if (!jnode_page(node))
  20019. + jnode_attach_page(node, page);
  20020. + spin_unlock_jnode(node);
  20021. +
  20022. + put_page(page);
  20023. + assert("zam-894", jnode_page(node) == page);
  20024. + return page;
  20025. +}
  20026. +
  20027. +/* Start read operation for jnode's page if page is not up-to-date. */
  20028. +static int jnode_start_read(jnode * node, struct page *page)
  20029. +{
  20030. + assert("zam-893", PageLocked(page));
  20031. +
  20032. + if (PageUptodate(page)) {
  20033. + unlock_page(page);
  20034. + return 0;
  20035. + }
  20036. + return reiser4_page_io(page, node, READ, reiser4_ctx_gfp_mask_get());
  20037. +}
  20038. +
  20039. +#if REISER4_DEBUG
  20040. +static void check_jload(jnode * node, struct page *page)
  20041. +{
  20042. + if (jnode_is_znode(node)) {
  20043. + znode *z = JZNODE(node);
  20044. +
  20045. + if (znode_is_any_locked(z)) {
  20046. + assert("nikita-3253",
  20047. + z->nr_items ==
  20048. + node_plugin_by_node(z)->num_of_items(z));
  20049. + kunmap(page);
  20050. + }
  20051. + assert("nikita-3565", znode_invariant(z));
  20052. + }
  20053. +}
  20054. +#else
  20055. +#define check_jload(node, page) noop
  20056. +#endif
  20057. +
  20058. +/* prefetch jnode to speed up next call to jload. Call this when you are going
  20059. + * to call jload() shortly. This will bring appropriate portion of jnode into
  20060. + * CPU cache. */
  20061. +void jload_prefetch(jnode * node)
  20062. +{
  20063. + prefetchw(&node->x_count);
  20064. +}
  20065. +
  20066. +/* load jnode's data into memory */
  20067. +int jload_gfp(jnode * node /* node to load */ ,
  20068. + gfp_t gfp_flags /* allocation flags */ ,
  20069. + int do_kmap/* true if page should be kmapped */)
  20070. +{
  20071. + struct page *page;
  20072. + int result = 0;
  20073. + int parsed;
  20074. +
  20075. + assert("nikita-3010", reiser4_schedulable());
  20076. +
  20077. + prefetchw(&node->pg);
  20078. +
  20079. + /* taking d-reference implies taking x-reference. */
  20080. + jref(node);
  20081. +
  20082. + /*
  20083. + * acquiring d-reference to @jnode and check for JNODE_PARSED bit
  20084. + * should be atomic, otherwise there is a race against
  20085. + * reiser4_releasepage().
  20086. + */
  20087. + spin_lock(&(node->load));
  20088. + add_d_ref(node);
  20089. + parsed = jnode_is_parsed(node);
  20090. + spin_unlock(&(node->load));
  20091. +
  20092. + if (unlikely(!parsed)) {
  20093. + page = jnode_get_page_locked(node, gfp_flags);
  20094. + if (unlikely(IS_ERR(page))) {
  20095. + result = PTR_ERR(page);
  20096. + goto failed;
  20097. + }
  20098. +
  20099. + result = jnode_start_read(node, page);
  20100. + if (unlikely(result != 0))
  20101. + goto failed;
  20102. +
  20103. + wait_on_page_locked(page);
  20104. + if (unlikely(!PageUptodate(page))) {
  20105. + result = RETERR(-EIO);
  20106. + goto failed;
  20107. + }
  20108. +
  20109. + if (do_kmap)
  20110. + node->data = kmap(page);
  20111. +
  20112. + result = jparse(node);
  20113. + if (unlikely(result != 0)) {
  20114. + if (do_kmap)
  20115. + kunmap(page);
  20116. + goto failed;
  20117. + }
  20118. + check_jload(node, page);
  20119. + } else {
  20120. + page = jnode_page(node);
  20121. + check_jload(node, page);
  20122. + if (do_kmap)
  20123. + node->data = kmap(page);
  20124. + }
  20125. +
  20126. + if (!is_writeout_mode())
  20127. + /* We do not mark pages active if jload is called as a part of
  20128. + * jnode_flush() or reiser4_write_logs(). Both jnode_flush()
  20129. + * and write_logs() add no value to cached data, there is no
  20130. + * sense to mark pages as active when they go to disk, it just
  20131. + * confuses vm scanning routines because clean page could be
  20132. + * moved out from inactive list as a result of this
  20133. + * mark_page_accessed() call. */
  20134. + mark_page_accessed(page);
  20135. +
  20136. + return 0;
  20137. +
  20138. +failed:
  20139. + jrelse_tail(node);
  20140. + return result;
  20141. +
  20142. +}
  20143. +
  20144. +/* start asynchronous reading for given jnode's page. */
  20145. +int jstartio(jnode * node)
  20146. +{
  20147. + struct page *page;
  20148. +
  20149. + page = jnode_get_page_locked(node, reiser4_ctx_gfp_mask_get());
  20150. + if (IS_ERR(page))
  20151. + return PTR_ERR(page);
  20152. +
  20153. + return jnode_start_read(node, page);
  20154. +}
  20155. +
  20156. +/* Initialize a node by calling appropriate plugin instead of reading
  20157. + * node from disk as in jload(). */
  20158. +int jinit_new(jnode * node, gfp_t gfp_flags)
  20159. +{
  20160. + struct page *page;
  20161. + int result;
  20162. +
  20163. + jref(node);
  20164. + add_d_ref(node);
  20165. +
  20166. + page = jnode_get_page_locked(node, gfp_flags);
  20167. + if (IS_ERR(page)) {
  20168. + result = PTR_ERR(page);
  20169. + goto failed;
  20170. + }
  20171. +
  20172. + SetPageUptodate(page);
  20173. + unlock_page(page);
  20174. +
  20175. + node->data = kmap(page);
  20176. +
  20177. + if (!jnode_is_parsed(node)) {
  20178. + jnode_plugin *jplug = jnode_ops(node);
  20179. + spin_lock_jnode(node);
  20180. + result = jplug->init(node);
  20181. + spin_unlock_jnode(node);
  20182. + if (result) {
  20183. + kunmap(page);
  20184. + goto failed;
  20185. + }
  20186. + JF_SET(node, JNODE_PARSED);
  20187. + }
  20188. +
  20189. + return 0;
  20190. +
  20191. +failed:
  20192. + jrelse(node);
  20193. + return result;
  20194. +}
  20195. +
  20196. +/* release a reference to jnode acquired by jload(), decrement ->d_count */
  20197. +void jrelse_tail(jnode * node/* jnode to release references to */)
  20198. +{
  20199. + assert("nikita-489", atomic_read(&node->d_count) > 0);
  20200. + atomic_dec(&node->d_count);
  20201. + /* release reference acquired in jload_gfp() or jinit_new() */
  20202. + if (jnode_is_unformatted(node) || jnode_is_znode(node))
  20203. + LOCK_CNT_DEC(d_refs);
  20204. + jput(node);
  20205. +}
  20206. +
  20207. +/* drop reference to node data. When last reference is dropped, data are
  20208. + unloaded. */
  20209. +void jrelse(jnode * node/* jnode to release references to */)
  20210. +{
  20211. + struct page *page;
  20212. +
  20213. + assert("nikita-487", node != NULL);
  20214. + assert_spin_not_locked(&(node->guard));
  20215. +
  20216. + page = jnode_page(node);
  20217. + if (likely(page != NULL)) {
  20218. + /*
  20219. + * it is safe not to lock jnode here, because at this point
  20220. + * @node->d_count is greater than zero (if jrelse() is used
  20221. + * correctly, that is). JNODE_PARSED may be not set yet, if,
  20222. + * for example, we got here as a result of error handling path
  20223. + * in jload(). Anyway, page cannot be detached by
  20224. + * reiser4_releasepage(). truncate will invalidate page
  20225. + * regardless, but this should not be a problem.
  20226. + */
  20227. + kunmap(page);
  20228. + }
  20229. + jrelse_tail(node);
  20230. +}
  20231. +
  20232. +/* called from jput() to wait for io completion */
  20233. +static void jnode_finish_io(jnode * node)
  20234. +{
  20235. + struct page *page;
  20236. +
  20237. + assert("nikita-2922", node != NULL);
  20238. +
  20239. + spin_lock_jnode(node);
  20240. + page = jnode_page(node);
  20241. + if (page != NULL) {
  20242. + get_page(page);
  20243. + spin_unlock_jnode(node);
  20244. + wait_on_page_writeback(page);
  20245. + put_page(page);
  20246. + } else
  20247. + spin_unlock_jnode(node);
  20248. +}
  20249. +
  20250. +/*
  20251. + * This is called by jput() when last reference to jnode is released. This is
  20252. + * separate function, because we want fast path of jput() to be inline and,
  20253. + * therefore, small.
  20254. + */
  20255. +void jput_final(jnode * node)
  20256. +{
  20257. + int r_i_p;
  20258. +
  20259. + /* A fast check for keeping node in cache. We always keep node in cache
  20260. + * if its page is present and node was not marked for deletion */
  20261. + if (jnode_page(node) != NULL && !JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
  20262. + rcu_read_unlock();
  20263. + return;
  20264. + }
  20265. + r_i_p = !JF_TEST_AND_SET(node, JNODE_RIP);
  20266. + /*
  20267. + * if r_i_p is true, we were first to set JNODE_RIP on this node. In
  20268. + * this case it is safe to access node after unlock.
  20269. + */
  20270. + rcu_read_unlock();
  20271. + if (r_i_p) {
  20272. + jnode_finish_io(node);
  20273. + if (JF_ISSET(node, JNODE_HEARD_BANSHEE))
  20274. + /* node is removed from the tree. */
  20275. + jdelete(node);
  20276. + else
  20277. + jnode_try_drop(node);
  20278. + }
  20279. + /* if !r_i_p some other thread is already killing it */
  20280. +}
  20281. +
  20282. +int jwait_io(jnode * node, int rw)
  20283. +{
  20284. + struct page *page;
  20285. + int result;
  20286. +
  20287. + assert("zam-448", jnode_page(node) != NULL);
  20288. +
  20289. + page = jnode_page(node);
  20290. +
  20291. + result = 0;
  20292. + if (rw == READ) {
  20293. + wait_on_page_locked(page);
  20294. + } else {
  20295. + assert("nikita-2227", rw == WRITE);
  20296. + wait_on_page_writeback(page);
  20297. + }
  20298. + if (PageError(page))
  20299. + result = RETERR(-EIO);
  20300. +
  20301. + return result;
  20302. +}
  20303. +
  20304. +/*
  20305. + * jnode types and plugins.
  20306. + *
  20307. + * jnode by itself is a "base type". There are several different jnode
  20308. + * flavors, called "jnode types" (see jnode_type for a list). Sometimes code
  20309. + * has to do different things based on jnode type. In the standard reiser4 way
  20310. + * this is done by having jnode plugin (see fs/reiser4/plugin.h:jnode_plugin).
  20311. + *
  20312. + * Functions below deal with jnode types and define methods of jnode plugin.
  20313. + *
  20314. + */
  20315. +
  20316. +/* set jnode type. This is done during jnode initialization. */
  20317. +static void jnode_set_type(jnode * node, jnode_type type)
  20318. +{
  20319. + static unsigned long type_to_mask[] = {
  20320. + [JNODE_UNFORMATTED_BLOCK] = 1,
  20321. + [JNODE_FORMATTED_BLOCK] = 0,
  20322. + [JNODE_BITMAP] = 2,
  20323. + [JNODE_IO_HEAD] = 6,
  20324. + [JNODE_INODE] = 4
  20325. + };
  20326. +
  20327. + assert("zam-647", type < LAST_JNODE_TYPE);
  20328. + assert("nikita-2815", !jnode_is_loaded(node));
  20329. + assert("nikita-3386", node->state == 0);
  20330. +
  20331. + node->state |= (type_to_mask[type] << JNODE_TYPE_1);
  20332. +}
  20333. +
  20334. +/* ->init() method of jnode plugin for jnodes that don't require plugin
  20335. + * specific initialization. */
  20336. +static int init_noinit(jnode * node UNUSED_ARG)
  20337. +{
  20338. + return 0;
  20339. +}
  20340. +
  20341. +/* ->parse() method of jnode plugin for jnodes that don't require plugin
  20342. + * specific pasring. */
  20343. +static int parse_noparse(jnode * node UNUSED_ARG)
  20344. +{
  20345. + return 0;
  20346. +}
  20347. +
  20348. +/* ->mapping() method for unformatted jnode */
  20349. +struct address_space *mapping_jnode(const jnode * node)
  20350. +{
  20351. + struct address_space *map;
  20352. +
  20353. + assert("nikita-2713", node != NULL);
  20354. +
  20355. + /* mapping is stored in jnode */
  20356. +
  20357. + map = node->key.j.mapping;
  20358. + assert("nikita-2714", map != NULL);
  20359. + assert("nikita-2897", is_reiser4_inode(map->host));
  20360. + assert("nikita-2715", get_inode_oid(map->host) == node->key.j.objectid);
  20361. + return map;
  20362. +}
  20363. +
  20364. +/* ->index() method for unformatted jnodes */
  20365. +unsigned long index_jnode(const jnode * node)
  20366. +{
  20367. + /* index is stored in jnode */
  20368. + return node->key.j.index;
  20369. +}
  20370. +
  20371. +/* ->remove() method for unformatted jnodes */
  20372. +static inline void remove_jnode(jnode * node, reiser4_tree * tree)
  20373. +{
  20374. + /* remove jnode from hash table and radix tree */
  20375. + if (node->key.j.mapping)
  20376. + unhash_unformatted_node_nolock(node);
  20377. +}
  20378. +
  20379. +/* ->mapping() method for znodes */
  20380. +static struct address_space *mapping_znode(const jnode * node)
  20381. +{
  20382. + /* all znodes belong to fake inode */
  20383. + return reiser4_get_super_fake(jnode_get_tree(node)->super)->i_mapping;
  20384. +}
  20385. +
  20386. +/* ->index() method for znodes */
  20387. +static unsigned long index_znode(const jnode * node)
  20388. +{
  20389. + unsigned long addr;
  20390. + assert("nikita-3317", (1 << znode_shift_order) < sizeof(znode));
  20391. +
  20392. + /* index of znode is just its address (shifted) */
  20393. + addr = (unsigned long)node;
  20394. + return (addr - PAGE_OFFSET) >> znode_shift_order;
  20395. +}
  20396. +
  20397. +/* ->mapping() method for bitmap jnode */
  20398. +static struct address_space *mapping_bitmap(const jnode * node)
  20399. +{
  20400. + /* all bitmap blocks belong to special bitmap inode */
  20401. + return get_super_private(jnode_get_tree(node)->super)->bitmap->
  20402. + i_mapping;
  20403. +}
  20404. +
  20405. +/* ->index() method for jnodes that are indexed by address */
  20406. +static unsigned long index_is_address(const jnode * node)
  20407. +{
  20408. + unsigned long ind;
  20409. +
  20410. + ind = (unsigned long)node;
  20411. + return ind - PAGE_OFFSET;
  20412. +}
  20413. +
  20414. +/* resolve race with jput */
  20415. +jnode *jnode_rip_sync(reiser4_tree *tree, jnode *node)
  20416. +{
  20417. + /*
  20418. + * This is used as part of RCU-based jnode handling.
  20419. + *
  20420. + * jlookup(), zlook(), zget(), and cbk_cache_scan_slots() have to work
  20421. + * with unreferenced jnodes (ones with ->x_count == 0). Hash table is
  20422. + * not protected during this, so concurrent thread may execute
  20423. + * zget-set-HEARD_BANSHEE-zput, or somehow else cause jnode to be
  20424. + * freed in jput_final(). To avoid such races, jput_final() sets
  20425. + * JNODE_RIP on jnode (under tree lock). All places that work with
  20426. + * unreferenced jnodes call this function. It checks for JNODE_RIP bit
  20427. + * (first without taking tree lock), and if this bit is set, released
  20428. + * reference acquired by the current thread and returns NULL.
  20429. + *
  20430. + * As a result, if jnode is being concurrently freed, NULL is returned
  20431. + * and caller should pretend that jnode wasn't found in the first
  20432. + * place.
  20433. + *
  20434. + * Otherwise it's safe to release "rcu-read-lock" and continue with
  20435. + * jnode.
  20436. + */
  20437. + if (unlikely(JF_ISSET(node, JNODE_RIP))) {
  20438. + read_lock_tree(tree);
  20439. + if (JF_ISSET(node, JNODE_RIP)) {
  20440. + dec_x_ref(node);
  20441. + node = NULL;
  20442. + }
  20443. + read_unlock_tree(tree);
  20444. + }
  20445. + return node;
  20446. +}
  20447. +
  20448. +reiser4_key *jnode_build_key(const jnode * node, reiser4_key * key)
  20449. +{
  20450. + struct inode *inode;
  20451. + item_plugin *iplug;
  20452. + loff_t off;
  20453. +
  20454. + assert("nikita-3092", node != NULL);
  20455. + assert("nikita-3093", key != NULL);
  20456. + assert("nikita-3094", jnode_is_unformatted(node));
  20457. +
  20458. + off = ((loff_t) index_jnode(node)) << PAGE_SHIFT;
  20459. + inode = mapping_jnode(node)->host;
  20460. +
  20461. + if (node->parent_item_id != 0)
  20462. + iplug = item_plugin_by_id(node->parent_item_id);
  20463. + else
  20464. + iplug = NULL;
  20465. +
  20466. + if (iplug != NULL && iplug->f.key_by_offset)
  20467. + iplug->f.key_by_offset(inode, off, key);
  20468. + else {
  20469. + file_plugin *fplug;
  20470. +
  20471. + fplug = inode_file_plugin(inode);
  20472. + assert("zam-1007", fplug != NULL);
  20473. + assert("zam-1008", fplug->key_by_inode != NULL);
  20474. +
  20475. + fplug->key_by_inode(inode, off, key);
  20476. + }
  20477. +
  20478. + return key;
  20479. +}
  20480. +
  20481. +/* ->parse() method for formatted nodes */
  20482. +static int parse_znode(jnode * node)
  20483. +{
  20484. + return zparse(JZNODE(node));
  20485. +}
  20486. +
  20487. +/* ->delete() method for formatted nodes */
  20488. +static void delete_znode(jnode * node, reiser4_tree * tree)
  20489. +{
  20490. + znode *z;
  20491. +
  20492. + assert_rw_write_locked(&(tree->tree_lock));
  20493. + assert("vs-898", JF_ISSET(node, JNODE_HEARD_BANSHEE));
  20494. +
  20495. + z = JZNODE(node);
  20496. + assert("vs-899", z->c_count == 0);
  20497. +
  20498. + /* delete znode from sibling list. */
  20499. + sibling_list_remove(z);
  20500. +
  20501. + znode_remove(z, tree);
  20502. +}
  20503. +
  20504. +/* ->remove() method for formatted nodes */
  20505. +static int remove_znode(jnode * node, reiser4_tree * tree)
  20506. +{
  20507. + znode *z;
  20508. +
  20509. + assert_rw_write_locked(&(tree->tree_lock));
  20510. + z = JZNODE(node);
  20511. +
  20512. + if (z->c_count == 0) {
  20513. + /* detach znode from sibling list. */
  20514. + sibling_list_drop(z);
  20515. + /* this is called with tree spin-lock held, so call
  20516. + znode_remove() directly (rather than znode_lock_remove()). */
  20517. + znode_remove(z, tree);
  20518. + return 0;
  20519. + }
  20520. + return RETERR(-EBUSY);
  20521. +}
  20522. +
  20523. +/* ->init() method for formatted nodes */
  20524. +int init_znode(jnode * node)
  20525. +{
  20526. + znode *z;
  20527. +
  20528. + z = JZNODE(node);
  20529. + /* call node plugin to do actual initialization */
  20530. + z->nr_items = 0;
  20531. + return z->nplug->init(z);
  20532. +}
  20533. +
  20534. +/* ->clone() method for formatted nodes */
  20535. +static jnode *clone_formatted(jnode * node)
  20536. +{
  20537. + znode *clone;
  20538. +
  20539. + assert("vs-1430", jnode_is_znode(node));
  20540. + clone = zalloc(reiser4_ctx_gfp_mask_get());
  20541. + if (clone == NULL)
  20542. + return ERR_PTR(RETERR(-ENOMEM));
  20543. + zinit(clone, NULL, current_tree);
  20544. + jnode_set_block(ZJNODE(clone), jnode_get_block(node));
  20545. + /* ZJNODE(clone)->key.z is not initialized */
  20546. + clone->level = JZNODE(node)->level;
  20547. +
  20548. + return ZJNODE(clone);
  20549. +}
  20550. +
  20551. +/* jplug->clone for unformatted nodes */
  20552. +static jnode *clone_unformatted(jnode * node)
  20553. +{
  20554. + jnode *clone;
  20555. +
  20556. + assert("vs-1431", jnode_is_unformatted(node));
  20557. + clone = jalloc();
  20558. + if (clone == NULL)
  20559. + return ERR_PTR(RETERR(-ENOMEM));
  20560. +
  20561. + jnode_init(clone, current_tree, JNODE_UNFORMATTED_BLOCK);
  20562. + jnode_set_block(clone, jnode_get_block(node));
  20563. +
  20564. + return clone;
  20565. +
  20566. +}
  20567. +
  20568. +/*
  20569. + * Setup jnode plugin methods for various jnode types.
  20570. + */
  20571. +jnode_plugin jnode_plugins[LAST_JNODE_TYPE] = {
  20572. + [JNODE_UNFORMATTED_BLOCK] = {
  20573. + .h = {
  20574. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20575. + .id = JNODE_UNFORMATTED_BLOCK,
  20576. + .pops = NULL,
  20577. + .label = "unformatted",
  20578. + .desc = "unformatted node",
  20579. + .linkage = {NULL, NULL}
  20580. + },
  20581. + .init = init_noinit,
  20582. + .parse = parse_noparse,
  20583. + .mapping = mapping_jnode,
  20584. + .index = index_jnode,
  20585. + .clone = clone_unformatted
  20586. + },
  20587. + [JNODE_FORMATTED_BLOCK] = {
  20588. + .h = {
  20589. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20590. + .id = JNODE_FORMATTED_BLOCK,
  20591. + .pops = NULL,
  20592. + .label = "formatted",
  20593. + .desc = "formatted tree node",
  20594. + .linkage = {NULL, NULL}
  20595. + },
  20596. + .init = init_znode,
  20597. + .parse = parse_znode,
  20598. + .mapping = mapping_znode,
  20599. + .index = index_znode,
  20600. + .clone = clone_formatted
  20601. + },
  20602. + [JNODE_BITMAP] = {
  20603. + .h = {
  20604. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20605. + .id = JNODE_BITMAP,
  20606. + .pops = NULL,
  20607. + .label = "bitmap",
  20608. + .desc = "bitmap node",
  20609. + .linkage = {NULL, NULL}
  20610. + },
  20611. + .init = init_noinit,
  20612. + .parse = parse_noparse,
  20613. + .mapping = mapping_bitmap,
  20614. + .index = index_is_address,
  20615. + .clone = NULL
  20616. + },
  20617. + [JNODE_IO_HEAD] = {
  20618. + .h = {
  20619. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20620. + .id = JNODE_IO_HEAD,
  20621. + .pops = NULL,
  20622. + .label = "io head",
  20623. + .desc = "io head",
  20624. + .linkage = {NULL, NULL}
  20625. + },
  20626. + .init = init_noinit,
  20627. + .parse = parse_noparse,
  20628. + .mapping = mapping_bitmap,
  20629. + .index = index_is_address,
  20630. + .clone = NULL
  20631. + },
  20632. + [JNODE_INODE] = {
  20633. + .h = {
  20634. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20635. + .id = JNODE_INODE,
  20636. + .pops = NULL,
  20637. + .label = "inode",
  20638. + .desc = "inode's builtin jnode",
  20639. + .linkage = {NULL, NULL}
  20640. + },
  20641. + .init = NULL,
  20642. + .parse = NULL,
  20643. + .mapping = NULL,
  20644. + .index = NULL,
  20645. + .clone = NULL
  20646. + }
  20647. +};
  20648. +
  20649. +/*
  20650. + * jnode destruction.
  20651. + *
  20652. + * Thread may use a jnode after it acquired a reference to it. References are
  20653. + * counted in ->x_count field. Reference protects jnode from being
  20654. + * recycled. This is different from protecting jnode data (that are stored in
  20655. + * jnode page) from being evicted from memory. Data are protected by jload()
  20656. + * and released by jrelse().
  20657. + *
  20658. + * If thread already possesses a reference to the jnode it can acquire another
  20659. + * one through jref(). Initial reference is obtained (usually) by locating
  20660. + * jnode in some indexing structure that depends on jnode type: formatted
  20661. + * nodes are kept in global hash table, where they are indexed by block
  20662. + * number, and also in the cbk cache. Unformatted jnodes are also kept in hash
  20663. + * table, which is indexed by oid and offset within file, and in per-inode
  20664. + * radix tree.
  20665. + *
  20666. + * Reference to jnode is released by jput(). If last reference is released,
  20667. + * jput_final() is called. This function determines whether jnode has to be
  20668. + * deleted (this happens when corresponding node is removed from the file
  20669. + * system, jnode is marked with JNODE_HEARD_BANSHEE bit in this case), or it
  20670. + * should be just "removed" (deleted from memory).
  20671. + *
  20672. + * Jnode destruction is signally delicate dance because of locking and RCU.
  20673. + */
  20674. +
  20675. +/*
  20676. + * Returns true if jnode cannot be removed right now. This check is called
  20677. + * under tree lock. If it returns true, jnode is irrevocably committed to be
  20678. + * deleted/removed.
  20679. + */
  20680. +static inline int jnode_is_busy(const jnode * node, jnode_type jtype)
  20681. +{
  20682. + /* if other thread managed to acquire a reference to this jnode, don't
  20683. + * free it. */
  20684. + if (atomic_read(&node->x_count) > 0)
  20685. + return 1;
  20686. + /* also, don't free znode that has children in memory */
  20687. + if (jtype == JNODE_FORMATTED_BLOCK && JZNODE(node)->c_count > 0)
  20688. + return 1;
  20689. + return 0;
  20690. +}
  20691. +
  20692. +/*
  20693. + * this is called as part of removing jnode. Based on jnode type, call
  20694. + * corresponding function that removes jnode from indices and returns it back
  20695. + * to the appropriate slab (through RCU).
  20696. + */
  20697. +static inline void
  20698. +jnode_remove(jnode * node, jnode_type jtype, reiser4_tree * tree)
  20699. +{
  20700. + switch (jtype) {
  20701. + case JNODE_UNFORMATTED_BLOCK:
  20702. + remove_jnode(node, tree);
  20703. + break;
  20704. + case JNODE_IO_HEAD:
  20705. + case JNODE_BITMAP:
  20706. + break;
  20707. + case JNODE_INODE:
  20708. + break;
  20709. + case JNODE_FORMATTED_BLOCK:
  20710. + remove_znode(node, tree);
  20711. + break;
  20712. + default:
  20713. + wrong_return_value("nikita-3196", "Wrong jnode type");
  20714. + }
  20715. +}
  20716. +
  20717. +/*
  20718. + * this is called as part of deleting jnode. Based on jnode type, call
  20719. + * corresponding function that removes jnode from indices and returns it back
  20720. + * to the appropriate slab (through RCU).
  20721. + *
  20722. + * This differs from jnode_remove() only for formatted nodes---for them
  20723. + * sibling list handling is different for removal and deletion.
  20724. + */
  20725. +static inline void
  20726. +jnode_delete(jnode * node, jnode_type jtype, reiser4_tree * tree UNUSED_ARG)
  20727. +{
  20728. + switch (jtype) {
  20729. + case JNODE_UNFORMATTED_BLOCK:
  20730. + remove_jnode(node, tree);
  20731. + break;
  20732. + case JNODE_IO_HEAD:
  20733. + case JNODE_BITMAP:
  20734. + break;
  20735. + case JNODE_FORMATTED_BLOCK:
  20736. + delete_znode(node, tree);
  20737. + break;
  20738. + case JNODE_INODE:
  20739. + default:
  20740. + wrong_return_value("nikita-3195", "Wrong jnode type");
  20741. + }
  20742. +}
  20743. +
  20744. +#if REISER4_DEBUG
  20745. +/*
  20746. + * remove jnode from the debugging list of all jnodes hanging off super-block.
  20747. + */
  20748. +void jnode_list_remove(jnode * node)
  20749. +{
  20750. + reiser4_super_info_data *sbinfo;
  20751. +
  20752. + sbinfo = get_super_private(jnode_get_tree(node)->super);
  20753. +
  20754. + spin_lock_irq(&sbinfo->all_guard);
  20755. + assert("nikita-2422", !list_empty(&node->jnodes));
  20756. + list_del_init(&node->jnodes);
  20757. + spin_unlock_irq(&sbinfo->all_guard);
  20758. +}
  20759. +#endif
  20760. +
  20761. +/*
  20762. + * this is called by jput_final() to remove jnode when last reference to it is
  20763. + * released.
  20764. + */
  20765. +static int jnode_try_drop(jnode * node)
  20766. +{
  20767. + int result;
  20768. + reiser4_tree *tree;
  20769. + jnode_type jtype;
  20770. +
  20771. + assert("nikita-2491", node != NULL);
  20772. + assert("nikita-2583", JF_ISSET(node, JNODE_RIP));
  20773. +
  20774. + tree = jnode_get_tree(node);
  20775. + jtype = jnode_get_type(node);
  20776. +
  20777. + spin_lock_jnode(node);
  20778. + write_lock_tree(tree);
  20779. + /*
  20780. + * if jnode has a page---leave it alone. Memory pressure will
  20781. + * eventually kill page and jnode.
  20782. + */
  20783. + if (jnode_page(node) != NULL) {
  20784. + write_unlock_tree(tree);
  20785. + spin_unlock_jnode(node);
  20786. + JF_CLR(node, JNODE_RIP);
  20787. + return RETERR(-EBUSY);
  20788. + }
  20789. +
  20790. + /* re-check ->x_count under tree lock. */
  20791. + result = jnode_is_busy(node, jtype);
  20792. + if (result == 0) {
  20793. + assert("nikita-2582", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
  20794. + assert("jmacd-511/b", atomic_read(&node->d_count) == 0);
  20795. +
  20796. + spin_unlock_jnode(node);
  20797. + /* no page and no references---despatch him. */
  20798. + jnode_remove(node, jtype, tree);
  20799. + write_unlock_tree(tree);
  20800. + jnode_free(node, jtype);
  20801. + } else {
  20802. + /* busy check failed: reference was acquired by concurrent
  20803. + * thread. */
  20804. + write_unlock_tree(tree);
  20805. + spin_unlock_jnode(node);
  20806. + JF_CLR(node, JNODE_RIP);
  20807. + }
  20808. + return result;
  20809. +}
  20810. +
  20811. +/* jdelete() -- Delete jnode from the tree and file system */
  20812. +static int jdelete(jnode * node/* jnode to finish with */)
  20813. +{
  20814. + struct page *page;
  20815. + int result;
  20816. + reiser4_tree *tree;
  20817. + jnode_type jtype;
  20818. +
  20819. + assert("nikita-467", node != NULL);
  20820. + assert("nikita-2531", JF_ISSET(node, JNODE_RIP));
  20821. +
  20822. + jtype = jnode_get_type(node);
  20823. +
  20824. + page = jnode_lock_page(node);
  20825. + assert_spin_locked(&(node->guard));
  20826. +
  20827. + tree = jnode_get_tree(node);
  20828. +
  20829. + write_lock_tree(tree);
  20830. + /* re-check ->x_count under tree lock. */
  20831. + result = jnode_is_busy(node, jtype);
  20832. + if (likely(!result)) {
  20833. + assert("nikita-2123", JF_ISSET(node, JNODE_HEARD_BANSHEE));
  20834. + assert("jmacd-511", atomic_read(&node->d_count) == 0);
  20835. +
  20836. + /* detach page */
  20837. + if (page != NULL) {
  20838. + /*
  20839. + * FIXME this is racy against jnode_extent_write().
  20840. + */
  20841. + page_clear_jnode(page, node);
  20842. + }
  20843. + spin_unlock_jnode(node);
  20844. + /* goodbye */
  20845. + jnode_delete(node, jtype, tree);
  20846. + write_unlock_tree(tree);
  20847. + jnode_free(node, jtype);
  20848. + /* @node is no longer valid pointer */
  20849. + if (page != NULL)
  20850. + reiser4_drop_page(page);
  20851. + } else {
  20852. + /* busy check failed: reference was acquired by concurrent
  20853. + * thread. */
  20854. + JF_CLR(node, JNODE_RIP);
  20855. + write_unlock_tree(tree);
  20856. + spin_unlock_jnode(node);
  20857. + if (page != NULL)
  20858. + unlock_page(page);
  20859. + }
  20860. + return result;
  20861. +}
  20862. +
  20863. +/* drop jnode on the floor.
  20864. +
  20865. + Return value:
  20866. +
  20867. + -EBUSY: failed to drop jnode, because there are still references to it
  20868. +
  20869. + 0: successfully dropped jnode
  20870. +
  20871. +*/
  20872. +static int jdrop_in_tree(jnode * node, reiser4_tree * tree)
  20873. +{
  20874. + struct page *page;
  20875. + jnode_type jtype;
  20876. + int result;
  20877. +
  20878. + assert("zam-602", node != NULL);
  20879. + assert_rw_not_read_locked(&(tree->tree_lock));
  20880. + assert_rw_not_write_locked(&(tree->tree_lock));
  20881. + assert("nikita-2403", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
  20882. +
  20883. + jtype = jnode_get_type(node);
  20884. +
  20885. + page = jnode_lock_page(node);
  20886. + assert_spin_locked(&(node->guard));
  20887. +
  20888. + write_lock_tree(tree);
  20889. +
  20890. + /* re-check ->x_count under tree lock. */
  20891. + result = jnode_is_busy(node, jtype);
  20892. + if (!result) {
  20893. + assert("nikita-2488", page == jnode_page(node));
  20894. + assert("nikita-2533", atomic_read(&node->d_count) == 0);
  20895. + if (page != NULL) {
  20896. + assert("nikita-2126", !PageDirty(page));
  20897. + assert("nikita-2127", PageUptodate(page));
  20898. + assert("nikita-2181", PageLocked(page));
  20899. + page_clear_jnode(page, node);
  20900. + }
  20901. + spin_unlock_jnode(node);
  20902. + jnode_remove(node, jtype, tree);
  20903. + write_unlock_tree(tree);
  20904. + jnode_free(node, jtype);
  20905. + if (page != NULL)
  20906. + reiser4_drop_page(page);
  20907. + } else {
  20908. + /* busy check failed: reference was acquired by concurrent
  20909. + * thread. */
  20910. + JF_CLR(node, JNODE_RIP);
  20911. + write_unlock_tree(tree);
  20912. + spin_unlock_jnode(node);
  20913. + if (page != NULL)
  20914. + unlock_page(page);
  20915. + }
  20916. + return result;
  20917. +}
  20918. +
  20919. +/* This function frees jnode "if possible". In particular, [dcx]_count has to
  20920. + be 0 (where applicable). */
  20921. +void jdrop(jnode * node)
  20922. +{
  20923. + jdrop_in_tree(node, jnode_get_tree(node));
  20924. +}
  20925. +
  20926. +/* IO head jnode implementation; The io heads are simple j-nodes with limited
  20927. + functionality (these j-nodes are not in any hash table) just for reading
  20928. + from and writing to disk. */
  20929. +
  20930. +jnode *reiser4_alloc_io_head(const reiser4_block_nr * block)
  20931. +{
  20932. + jnode *jal = jalloc();
  20933. +
  20934. + if (jal != NULL) {
  20935. + jnode_init(jal, current_tree, JNODE_IO_HEAD);
  20936. + jnode_set_block(jal, block);
  20937. + }
  20938. +
  20939. + jref(jal);
  20940. +
  20941. + return jal;
  20942. +}
  20943. +
  20944. +void reiser4_drop_io_head(jnode * node)
  20945. +{
  20946. + assert("zam-648", jnode_get_type(node) == JNODE_IO_HEAD);
  20947. +
  20948. + jput(node);
  20949. + jdrop(node);
  20950. +}
  20951. +
  20952. +/* protect keep jnode data from reiser4_releasepage() */
  20953. +void pin_jnode_data(jnode * node)
  20954. +{
  20955. + assert("zam-671", jnode_page(node) != NULL);
  20956. + get_page(jnode_page(node));
  20957. +}
  20958. +
  20959. +/* make jnode data free-able again */
  20960. +void unpin_jnode_data(jnode * node)
  20961. +{
  20962. + assert("zam-672", jnode_page(node) != NULL);
  20963. + put_page(jnode_page(node));
  20964. +}
  20965. +
  20966. +struct address_space *jnode_get_mapping(const jnode * node)
  20967. +{
  20968. + return jnode_ops(node)->mapping(node);
  20969. +}
  20970. +
  20971. +#if REISER4_DEBUG
  20972. +/* debugging aid: jnode invariant */
  20973. +int jnode_invariant_f(const jnode * node, char const **msg)
  20974. +{
  20975. +#define _ergo(ant, con) \
  20976. + ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
  20977. +#define _check(exp) ((*msg) = #exp, (exp))
  20978. +
  20979. + return _check(node != NULL) &&
  20980. + /* [jnode-queued] */
  20981. + /* only relocated node can be queued, except that when znode
  20982. + * is being deleted, its JNODE_RELOC bit is cleared */
  20983. + _ergo(JF_ISSET(node, JNODE_FLUSH_QUEUED),
  20984. + JF_ISSET(node, JNODE_RELOC) ||
  20985. + JF_ISSET(node, JNODE_HEARD_BANSHEE)) &&
  20986. + _check(node->jnodes.prev != NULL) &&
  20987. + _check(node->jnodes.next != NULL) &&
  20988. + /* [jnode-dirty] invariant */
  20989. + /* dirty inode is part of atom */
  20990. + _ergo(JF_ISSET(node, JNODE_DIRTY), node->atom != NULL) &&
  20991. + /* [jnode-oid] invariant */
  20992. + /* for unformatted node ->objectid and ->mapping fields are
  20993. + * consistent */
  20994. + _ergo(jnode_is_unformatted(node) && node->key.j.mapping != NULL,
  20995. + node->key.j.objectid ==
  20996. + get_inode_oid(node->key.j.mapping->host)) &&
  20997. + /* [jnode-atom-valid] invariant */
  20998. + /* node atom has valid state */
  20999. + _ergo(node->atom != NULL, node->atom->stage != ASTAGE_INVALID) &&
  21000. + /* [jnode-page-binding] invariant */
  21001. + /* if node points to page, it points back to node */
  21002. + _ergo(node->pg != NULL, jprivate(node->pg) == node) &&
  21003. + /* [jnode-refs] invariant */
  21004. + /* only referenced jnode can be loaded */
  21005. + _check(atomic_read(&node->x_count) >= atomic_read(&node->d_count));
  21006. +
  21007. +}
  21008. +
  21009. +static const char *jnode_type_name(jnode_type type)
  21010. +{
  21011. + switch (type) {
  21012. + case JNODE_UNFORMATTED_BLOCK:
  21013. + return "unformatted";
  21014. + case JNODE_FORMATTED_BLOCK:
  21015. + return "formatted";
  21016. + case JNODE_BITMAP:
  21017. + return "bitmap";
  21018. + case JNODE_IO_HEAD:
  21019. + return "io head";
  21020. + case JNODE_INODE:
  21021. + return "inode";
  21022. + case LAST_JNODE_TYPE:
  21023. + return "last";
  21024. + default:{
  21025. + static char unknown[30];
  21026. +
  21027. + sprintf(unknown, "unknown %i", type);
  21028. + return unknown;
  21029. + }
  21030. + }
  21031. +}
  21032. +
  21033. +#define jnode_state_name(node, flag) \
  21034. + (JF_ISSET((node), (flag)) ? ((#flag "|")+6) : "")
  21035. +
  21036. +/* debugging aid: output human readable information about @node */
  21037. +static void info_jnode(const char *prefix /* prefix to print */ ,
  21038. + const jnode * node/* node to print */)
  21039. +{
  21040. + assert("umka-068", prefix != NULL);
  21041. +
  21042. + if (node == NULL) {
  21043. + printk("%s: null\n", prefix);
  21044. + return;
  21045. + }
  21046. +
  21047. + printk
  21048. + ("%s: %p: state: %lx: [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s], level: %i,"
  21049. + " block: %s, d_count: %d, x_count: %d, "
  21050. + "pg: %p, atom: %p, lock: %i:%i, type: %s, ", prefix, node,
  21051. + node->state,
  21052. + jnode_state_name(node, JNODE_PARSED),
  21053. + jnode_state_name(node, JNODE_HEARD_BANSHEE),
  21054. + jnode_state_name(node, JNODE_LEFT_CONNECTED),
  21055. + jnode_state_name(node, JNODE_RIGHT_CONNECTED),
  21056. + jnode_state_name(node, JNODE_ORPHAN),
  21057. + jnode_state_name(node, JNODE_CREATED),
  21058. + jnode_state_name(node, JNODE_RELOC),
  21059. + jnode_state_name(node, JNODE_OVRWR),
  21060. + jnode_state_name(node, JNODE_DIRTY),
  21061. + jnode_state_name(node, JNODE_IS_DYING),
  21062. + jnode_state_name(node, JNODE_RIP),
  21063. + jnode_state_name(node, JNODE_MISSED_IN_CAPTURE),
  21064. + jnode_state_name(node, JNODE_WRITEBACK),
  21065. + jnode_state_name(node, JNODE_DKSET),
  21066. + jnode_state_name(node, JNODE_REPACK),
  21067. + jnode_state_name(node, JNODE_CLUSTER_PAGE),
  21068. + jnode_get_level(node), sprint_address(jnode_get_block(node)),
  21069. + atomic_read(&node->d_count), atomic_read(&node->x_count),
  21070. + jnode_page(node), node->atom, 0, 0,
  21071. + jnode_type_name(jnode_get_type(node)));
  21072. + if (jnode_is_unformatted(node)) {
  21073. + printk("inode: %llu, index: %lu, ",
  21074. + node->key.j.objectid, node->key.j.index);
  21075. + }
  21076. +}
  21077. +
  21078. +/* debugging aid: check znode invariant and panic if it doesn't hold */
  21079. +static int jnode_invariant(jnode * node, int tlocked, int jlocked)
  21080. +{
  21081. + char const *failed_msg;
  21082. + int result;
  21083. + reiser4_tree *tree;
  21084. +
  21085. + tree = jnode_get_tree(node);
  21086. +
  21087. + assert("umka-063312", node != NULL);
  21088. + assert("umka-064321", tree != NULL);
  21089. +
  21090. + if (!jlocked && !tlocked)
  21091. + spin_lock_jnode((jnode *) node);
  21092. + if (!tlocked)
  21093. + read_lock_tree(jnode_get_tree(node));
  21094. + result = jnode_invariant_f(node, &failed_msg);
  21095. + if (!result) {
  21096. + info_jnode("corrupted node", node);
  21097. + warning("jmacd-555", "Condition %s failed", failed_msg);
  21098. + }
  21099. + if (!tlocked)
  21100. + read_unlock_tree(jnode_get_tree(node));
  21101. + if (!jlocked && !tlocked)
  21102. + spin_unlock_jnode((jnode *) node);
  21103. + return result;
  21104. +}
  21105. +
  21106. +#endif /* REISER4_DEBUG */
  21107. +
  21108. +/* Make Linus happy.
  21109. + Local variables:
  21110. + c-indentation-style: "K&R"
  21111. + mode-name: "LC"
  21112. + c-basic-offset: 8
  21113. + tab-width: 8
  21114. + fill-column: 80
  21115. + End:
  21116. +*/
  21117. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/jnode.h linux-5.16.14/fs/reiser4/jnode.h
  21118. --- linux-5.16.14.orig/fs/reiser4/jnode.h 1970-01-01 01:00:00.000000000 +0100
  21119. +++ linux-5.16.14/fs/reiser4/jnode.h 2022-03-12 13:26:19.656892738 +0100
  21120. @@ -0,0 +1,704 @@
  21121. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  21122. + * reiser4/README */
  21123. +
  21124. +/* Declaration of jnode. See jnode.c for details. */
  21125. +
  21126. +#ifndef __JNODE_H__
  21127. +#define __JNODE_H__
  21128. +
  21129. +#include "forward.h"
  21130. +#include "type_safe_hash.h"
  21131. +#include "txnmgr.h"
  21132. +#include "key.h"
  21133. +#include "debug.h"
  21134. +#include "dformat.h"
  21135. +#include "page_cache.h"
  21136. +#include "context.h"
  21137. +
  21138. +#include "plugin/plugin.h"
  21139. +
  21140. +#include <linux/fs.h>
  21141. +#include <linux/mm.h>
  21142. +#include <linux/spinlock.h>
  21143. +#include <asm/atomic.h>
  21144. +#include <linux/bitops.h>
  21145. +#include <linux/list.h>
  21146. +#include <linux/rcupdate.h>
  21147. +
  21148. +/* declare hash table of jnodes (jnodes proper, that is, unformatted
  21149. + nodes) */
  21150. +TYPE_SAFE_HASH_DECLARE(j, jnode);
  21151. +
  21152. +/* declare hash table of znodes */
  21153. +TYPE_SAFE_HASH_DECLARE(z, znode);
  21154. +
  21155. +struct jnode_key {
  21156. + __u64 objectid;
  21157. + unsigned long index;
  21158. + struct address_space *mapping;
  21159. +};
  21160. +
  21161. +/*
  21162. + Jnode is the "base class" of other nodes in reiser4. It is also happens to
  21163. + be exactly the node we use for unformatted tree nodes.
  21164. +
  21165. + Jnode provides following basic functionality:
  21166. +
  21167. + . reference counting and indexing.
  21168. +
  21169. + . integration with page cache. Jnode has ->pg reference to which page can
  21170. + be attached.
  21171. +
  21172. + . interface to transaction manager. It is jnode that is kept in transaction
  21173. + manager lists, attached to atoms, etc. (NOTE-NIKITA one may argue that this
  21174. + means, there should be special type of jnode for inode.)
  21175. +
  21176. + Locking:
  21177. +
  21178. + Spin lock: the following fields are protected by the per-jnode spin lock:
  21179. +
  21180. + ->state
  21181. + ->atom
  21182. + ->capture_link
  21183. +
  21184. + Following fields are protected by the global tree lock:
  21185. +
  21186. + ->link
  21187. + ->key.z (content of ->key.z is only changed in znode_rehash())
  21188. + ->key.j
  21189. +
  21190. + Atomic counters
  21191. +
  21192. + ->x_count
  21193. + ->d_count
  21194. +
  21195. + ->pg, and ->data are protected by spin lock for unused jnode and are
  21196. + immutable for used jnode (one for which fs/reiser4/vfs_ops.c:releasable()
  21197. + is false).
  21198. +
  21199. + ->tree is immutable after creation
  21200. +
  21201. + Unclear
  21202. +
  21203. + ->blocknr: should be under jnode spin-lock, but current interface is based
  21204. + on passing of block address.
  21205. +
  21206. + If you ever need to spin lock two nodes at once, do this in "natural"
  21207. + memory order: lock znode with lower address first. (See lock_two_nodes().)
  21208. +
  21209. + Invariants involving this data-type:
  21210. +
  21211. + [jnode-dirty]
  21212. + [jnode-refs]
  21213. + [jnode-oid]
  21214. + [jnode-queued]
  21215. + [jnode-atom-valid]
  21216. + [jnode-page-binding]
  21217. +*/
  21218. +
  21219. +struct jnode {
  21220. +#if REISER4_DEBUG
  21221. +#define JMAGIC 0x52654973 /* "ReIs" */
  21222. + int magic;
  21223. +#endif
  21224. + /* FIRST CACHE LINE (16 bytes): data used by jload */
  21225. +
  21226. + /* jnode's state: bitwise flags from the reiser4_jnode_state enum. */
  21227. + /* 0 */ unsigned long state;
  21228. +
  21229. + /* lock, protecting jnode's fields. */
  21230. + /* 4 */ spinlock_t load;
  21231. +
  21232. + /* counter of references to jnode itself. Increased on jref().
  21233. + Decreased on jput().
  21234. + */
  21235. + /* 8 */ atomic_t x_count;
  21236. +
  21237. + /* counter of references to jnode's data. Pin data page(s) in
  21238. + memory while this is greater than 0. Increased on jload().
  21239. + Decreased on jrelse().
  21240. + */
  21241. + /* 12 */ atomic_t d_count;
  21242. +
  21243. + /* SECOND CACHE LINE: data used by hash table lookups */
  21244. +
  21245. + /* 16 */ union {
  21246. + /* znodes are hashed by block number */
  21247. + reiser4_block_nr z;
  21248. + /* unformatted nodes are hashed by mapping plus offset */
  21249. + struct jnode_key j;
  21250. + } key;
  21251. +
  21252. + /* THIRD CACHE LINE */
  21253. +
  21254. + /* 32 */ union {
  21255. + /* pointers to maintain hash-table */
  21256. + z_hash_link z;
  21257. + j_hash_link j;
  21258. + } link;
  21259. +
  21260. + /* pointer to jnode page. */
  21261. + /* 36 */ struct page *pg;
  21262. + /* pointer to node itself. This is page_address(node->pg) when page is
  21263. + attached to the jnode
  21264. + */
  21265. + /* 40 */ void *data;
  21266. +
  21267. + /* 44 */ reiser4_tree *tree;
  21268. +
  21269. + /* FOURTH CACHE LINE: atom related fields */
  21270. +
  21271. + /* 48 */ spinlock_t guard;
  21272. +
  21273. + /* atom the block is in, if any */
  21274. + /* 52 */ txn_atom *atom;
  21275. +
  21276. + /* capture list */
  21277. + /* 56 */ struct list_head capture_link;
  21278. +
  21279. + /* FIFTH CACHE LINE */
  21280. +
  21281. + /* 64 */ struct rcu_head rcu;
  21282. + /* crosses cache line */
  21283. +
  21284. + /* SIXTH CACHE LINE */
  21285. +
  21286. + /* the real blocknr (where io is going to/from) */
  21287. + /* 80 */ reiser4_block_nr blocknr;
  21288. + /* Parent item type, unformatted and CRC need it for
  21289. + * offset => key conversion. */
  21290. + /* NOTE: this parent_item_id looks like jnode type. */
  21291. + /* 88 */ reiser4_plugin_id parent_item_id;
  21292. + /* 92 */
  21293. +#if REISER4_DEBUG
  21294. + /* list of all jnodes for debugging purposes. */
  21295. + struct list_head jnodes;
  21296. + /* how many times this jnode was written in one transaction */
  21297. + int written;
  21298. + /* this indicates which atom's list the jnode is on */
  21299. + atom_list list;
  21300. +#endif
  21301. +} __attribute__ ((aligned(16)));
  21302. +
  21303. +/*
  21304. + * jnode types. Enumeration of existing jnode types.
  21305. + */
  21306. +typedef enum {
  21307. + JNODE_UNFORMATTED_BLOCK, /* unformatted block */
  21308. + JNODE_FORMATTED_BLOCK, /* formatted block, znode */
  21309. + JNODE_BITMAP, /* bitmap */
  21310. + JNODE_IO_HEAD, /* jnode representing a block in the
  21311. + * wandering log */
  21312. + JNODE_INODE, /* jnode embedded into inode */
  21313. + LAST_JNODE_TYPE
  21314. +} jnode_type;
  21315. +
  21316. +/* jnode states */
  21317. +typedef enum {
  21318. + /* jnode's page is loaded and data checked */
  21319. + JNODE_PARSED = 0,
  21320. + /* node was deleted, not all locks on it were released. This
  21321. + node is empty and is going to be removed from the tree
  21322. + shortly. */
  21323. + JNODE_HEARD_BANSHEE = 1,
  21324. + /* left sibling pointer is valid */
  21325. + JNODE_LEFT_CONNECTED = 2,
  21326. + /* right sibling pointer is valid */
  21327. + JNODE_RIGHT_CONNECTED = 3,
  21328. +
  21329. + /* znode was just created and doesn't yet have a pointer from
  21330. + its parent */
  21331. + JNODE_ORPHAN = 4,
  21332. +
  21333. + /* this node was created by its transaction and has not been assigned
  21334. + a block address. */
  21335. + JNODE_CREATED = 5,
  21336. +
  21337. + /* this node is currently relocated */
  21338. + JNODE_RELOC = 6,
  21339. + /* this node is currently wandered */
  21340. + JNODE_OVRWR = 7,
  21341. +
  21342. + /* this znode has been modified */
  21343. + JNODE_DIRTY = 8,
  21344. +
  21345. + /* znode lock is being invalidated */
  21346. + JNODE_IS_DYING = 9,
  21347. +
  21348. + /* THIS PLACE IS INTENTIONALLY LEFT BLANK */
  21349. +
  21350. + /* jnode is queued for flushing. */
  21351. + JNODE_FLUSH_QUEUED = 12,
  21352. +
  21353. + /* In the following bits jnode type is encoded. */
  21354. + JNODE_TYPE_1 = 13,
  21355. + JNODE_TYPE_2 = 14,
  21356. + JNODE_TYPE_3 = 15,
  21357. +
  21358. + /* jnode is being destroyed */
  21359. + JNODE_RIP = 16,
  21360. +
  21361. + /* znode was not captured during locking (it might so be because
  21362. + ->level != LEAF_LEVEL and lock_mode == READ_LOCK) */
  21363. + JNODE_MISSED_IN_CAPTURE = 17,
  21364. +
  21365. + /* write is in progress */
  21366. + JNODE_WRITEBACK = 18,
  21367. +
  21368. + /* unused flag */
  21369. + JNODE_NEW = 19,
  21370. +
  21371. + /* delimiting keys are already set for this znode. */
  21372. + JNODE_DKSET = 20,
  21373. +
  21374. + /* when this bit is set page and jnode can not be disconnected */
  21375. + JNODE_WRITE_PREPARED = 21,
  21376. +
  21377. + JNODE_CLUSTER_PAGE = 22,
  21378. + /* Jnode is marked for repacking, that means the reiser4 flush and the
  21379. + * block allocator should process this node special way */
  21380. + JNODE_REPACK = 23,
  21381. + /* node should be converted by flush in squalloc phase */
  21382. + JNODE_CONVERTIBLE = 24,
  21383. + /*
  21384. + * When jnode is dirtied for the first time in given transaction,
  21385. + * do_jnode_make_dirty() checks whether this jnode can possible became
  21386. + * member of overwrite set. If so, this bit is set, and one block is
  21387. + * reserved in the ->flush_reserved space of atom.
  21388. + *
  21389. + * This block is "used" (and JNODE_FLUSH_RESERVED bit is cleared) when
  21390. + *
  21391. + * (1) flush decides that we want this block to go into relocate
  21392. + * set after all.
  21393. + *
  21394. + * (2) wandering log is allocated (by log writer)
  21395. + *
  21396. + * (3) extent is allocated
  21397. + *
  21398. + */
  21399. + JNODE_FLUSH_RESERVED = 29
  21400. +} reiser4_jnode_state;
  21401. +
  21402. +/* Macros for accessing the jnode state. */
  21403. +
  21404. +static inline void JF_CLR(jnode * j, int f)
  21405. +{
  21406. + assert("unknown-1", j->magic == JMAGIC);
  21407. + clear_bit(f, &j->state);
  21408. +}
  21409. +static inline int JF_ISSET(const jnode * j, int f)
  21410. +{
  21411. + assert("unknown-2", j->magic == JMAGIC);
  21412. + return test_bit(f, &((jnode *) j)->state);
  21413. +}
  21414. +static inline void JF_SET(jnode * j, int f)
  21415. +{
  21416. + assert("unknown-3", j->magic == JMAGIC);
  21417. + set_bit(f, &j->state);
  21418. +}
  21419. +
  21420. +static inline int JF_TEST_AND_SET(jnode * j, int f)
  21421. +{
  21422. + assert("unknown-4", j->magic == JMAGIC);
  21423. + return test_and_set_bit(f, &j->state);
  21424. +}
  21425. +
  21426. +static inline void spin_lock_jnode(jnode *node)
  21427. +{
  21428. + /* check that spinlocks of lower priorities are not held */
  21429. + assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
  21430. + LOCK_CNT_NIL(spin_locked_txnh) &&
  21431. + LOCK_CNT_NIL(spin_locked_zlock) &&
  21432. + LOCK_CNT_NIL(rw_locked_dk) &&
  21433. + LOCK_CNT_LT(spin_locked_jnode, 2)));
  21434. +
  21435. + spin_lock(&(node->guard));
  21436. +
  21437. + LOCK_CNT_INC(spin_locked_jnode);
  21438. + LOCK_CNT_INC(spin_locked);
  21439. +}
  21440. +
  21441. +static inline void spin_unlock_jnode(jnode *node)
  21442. +{
  21443. + assert_spin_locked(&(node->guard));
  21444. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_jnode));
  21445. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  21446. +
  21447. + LOCK_CNT_DEC(spin_locked_jnode);
  21448. + LOCK_CNT_DEC(spin_locked);
  21449. +
  21450. + spin_unlock(&(node->guard));
  21451. +}
  21452. +
  21453. +static inline int jnode_is_in_deleteset(const jnode * node)
  21454. +{
  21455. + return JF_ISSET(node, JNODE_RELOC);
  21456. +}
  21457. +
  21458. +extern int init_jnodes(void);
  21459. +extern void done_jnodes(void);
  21460. +
  21461. +/* Jnode routines */
  21462. +extern jnode *jalloc(void);
  21463. +extern void jfree(jnode * node) NONNULL;
  21464. +extern jnode *jclone(jnode *);
  21465. +extern jnode *jlookup(reiser4_tree * tree,
  21466. + oid_t objectid, unsigned long ind) NONNULL;
  21467. +extern jnode *jfind(struct address_space *, unsigned long index) NONNULL;
  21468. +extern jnode *jnode_by_page(struct page *pg) NONNULL;
  21469. +extern jnode *jnode_of_page(struct page *pg) NONNULL;
  21470. +void jnode_attach_page(jnode * node, struct page *pg);
  21471. +
  21472. +void unhash_unformatted_jnode(jnode *);
  21473. +extern jnode *page_next_jnode(jnode * node) NONNULL;
  21474. +extern void jnode_init(jnode * node, reiser4_tree * tree, jnode_type) NONNULL;
  21475. +extern void jnode_make_dirty(jnode * node) NONNULL;
  21476. +extern void jnode_make_clean(jnode * node) NONNULL;
  21477. +extern void jnode_make_wander_nolock(jnode * node) NONNULL;
  21478. +extern void jnode_make_wander(jnode *) NONNULL;
  21479. +extern void znode_make_reloc(znode * , flush_queue_t *) NONNULL;
  21480. +extern void unformatted_make_reloc(jnode *, flush_queue_t *) NONNULL;
  21481. +extern struct address_space *jnode_get_mapping(const jnode * node) NONNULL;
  21482. +
  21483. +/**
  21484. + * jnode_get_block
  21485. + * @node: jnode to query
  21486. + *
  21487. + */
  21488. +static inline const reiser4_block_nr *jnode_get_block(const jnode *node)
  21489. +{
  21490. + assert("nikita-528", node != NULL);
  21491. +
  21492. + return &node->blocknr;
  21493. +}
  21494. +
  21495. +/**
  21496. + * jnode_set_block
  21497. + * @node: jnode to update
  21498. + * @blocknr: new block nr
  21499. + */
  21500. +static inline void jnode_set_block(jnode *node, const reiser4_block_nr *blocknr)
  21501. +{
  21502. + assert("nikita-2020", node != NULL);
  21503. + assert("umka-055", blocknr != NULL);
  21504. + node->blocknr = *blocknr;
  21505. +}
  21506. +
  21507. +
  21508. +/* block number for IO. Usually this is the same as jnode_get_block(), unless
  21509. + * jnode was emergency flushed---then block number chosen by eflush is
  21510. + * used. */
  21511. +static inline const reiser4_block_nr *jnode_get_io_block(jnode * node)
  21512. +{
  21513. + assert("nikita-2768", node != NULL);
  21514. + assert_spin_locked(&(node->guard));
  21515. +
  21516. + return jnode_get_block(node);
  21517. +}
  21518. +
  21519. +/* Jnode flush interface. */
  21520. +extern reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t *pos);
  21521. +extern flush_queue_t *reiser4_pos_fq(flush_pos_t *pos);
  21522. +
  21523. +/* FIXME-VS: these are used in plugin/item/extent.c */
  21524. +
  21525. +/* does extent_get_block have to be called */
  21526. +#define jnode_mapped(node) JF_ISSET (node, JNODE_MAPPED)
  21527. +#define jnode_set_mapped(node) JF_SET (node, JNODE_MAPPED)
  21528. +
  21529. +/* the node should be converted during flush squalloc phase */
  21530. +#define jnode_convertible(node) JF_ISSET (node, JNODE_CONVERTIBLE)
  21531. +#define jnode_set_convertible(node) JF_SET (node, JNODE_CONVERTIBLE)
  21532. +
  21533. +/* Macros to convert from jnode to znode, znode to jnode. These are macros
  21534. + because C doesn't allow overloading of const prototypes. */
  21535. +#define ZJNODE(x) (&(x)->zjnode)
  21536. +#define JZNODE(x) \
  21537. +({ \
  21538. + typeof(x) __tmp_x; \
  21539. + \
  21540. + __tmp_x = (x); \
  21541. + assert("jmacd-1300", jnode_is_znode(__tmp_x)); \
  21542. + (znode*) __tmp_x; \
  21543. +})
  21544. +
  21545. +extern int jnodes_tree_init(reiser4_tree * tree);
  21546. +extern int jnodes_tree_done(reiser4_tree * tree);
  21547. +
  21548. +#if REISER4_DEBUG
  21549. +
  21550. +extern int znode_is_any_locked(const znode * node);
  21551. +extern void jnode_list_remove(jnode * node);
  21552. +
  21553. +#else
  21554. +
  21555. +#define jnode_list_remove(node) noop
  21556. +
  21557. +#endif
  21558. +
  21559. +int znode_is_root(const znode * node) NONNULL;
  21560. +
  21561. +/* bump reference counter on @node */
  21562. +static inline void add_x_ref(jnode * node/* node to increase x_count of */)
  21563. +{
  21564. + assert("nikita-1911", node != NULL);
  21565. +
  21566. + atomic_inc(&node->x_count);
  21567. + LOCK_CNT_INC(x_refs);
  21568. +}
  21569. +
  21570. +static inline void dec_x_ref(jnode * node)
  21571. +{
  21572. + assert("nikita-3215", node != NULL);
  21573. + assert("nikita-3216", atomic_read(&node->x_count) > 0);
  21574. +
  21575. + atomic_dec(&node->x_count);
  21576. + assert("nikita-3217", LOCK_CNT_GTZ(x_refs));
  21577. + LOCK_CNT_DEC(x_refs);
  21578. +}
  21579. +
  21580. +/* jref() - increase counter of references to jnode/znode (x_count) */
  21581. +static inline jnode *jref(jnode * node)
  21582. +{
  21583. + assert("jmacd-508", (node != NULL) && !IS_ERR(node));
  21584. + add_x_ref(node);
  21585. + return node;
  21586. +}
  21587. +
  21588. +/* get the page of jnode */
  21589. +static inline struct page *jnode_page(const jnode * node)
  21590. +{
  21591. + return node->pg;
  21592. +}
  21593. +
  21594. +/* return pointer to jnode data */
  21595. +static inline char *jdata(const jnode * node)
  21596. +{
  21597. + assert("nikita-1415", node != NULL);
  21598. + assert("nikita-3198", jnode_page(node) != NULL);
  21599. + return node->data;
  21600. +}
  21601. +
  21602. +static inline int jnode_is_loaded(const jnode * node)
  21603. +{
  21604. + assert("zam-506", node != NULL);
  21605. + return atomic_read(&node->d_count) > 0;
  21606. +}
  21607. +
  21608. +extern void page_clear_jnode(struct page *page, jnode * node) NONNULL;
  21609. +
  21610. +static inline void jnode_set_reloc(jnode * node)
  21611. +{
  21612. + assert("nikita-2431", node != NULL);
  21613. + assert("nikita-2432", !JF_ISSET(node, JNODE_OVRWR));
  21614. + JF_SET(node, JNODE_RELOC);
  21615. +}
  21616. +
  21617. +/* jload/jwrite/junload give a bread/bwrite/brelse functionality for jnodes */
  21618. +
  21619. +extern int jload_gfp(jnode *, gfp_t, int do_kmap) NONNULL;
  21620. +
  21621. +static inline int jload(jnode *node)
  21622. +{
  21623. + return jload_gfp(node, reiser4_ctx_gfp_mask_get(), 1);
  21624. +}
  21625. +
  21626. +extern int jinit_new(jnode *, gfp_t) NONNULL;
  21627. +extern int jstartio(jnode *) NONNULL;
  21628. +
  21629. +extern void jdrop(jnode *) NONNULL;
  21630. +extern int jwait_io(jnode *, int rw) NONNULL;
  21631. +
  21632. +void jload_prefetch(jnode *);
  21633. +
  21634. +extern jnode *reiser4_alloc_io_head(const reiser4_block_nr * block) NONNULL;
  21635. +extern void reiser4_drop_io_head(jnode * node) NONNULL;
  21636. +
  21637. +static inline reiser4_tree *jnode_get_tree(const jnode * node)
  21638. +{
  21639. + assert("nikita-2691", node != NULL);
  21640. + return node->tree;
  21641. +}
  21642. +
  21643. +extern void pin_jnode_data(jnode *);
  21644. +extern void unpin_jnode_data(jnode *);
  21645. +
  21646. +static inline jnode_type jnode_get_type(const jnode * node)
  21647. +{
  21648. + static const unsigned long state_mask =
  21649. + (1 << JNODE_TYPE_1) | (1 << JNODE_TYPE_2) | (1 << JNODE_TYPE_3);
  21650. +
  21651. + static jnode_type mask_to_type[] = {
  21652. + /* JNODE_TYPE_3 : JNODE_TYPE_2 : JNODE_TYPE_1 */
  21653. +
  21654. + /* 000 */
  21655. + [0] = JNODE_FORMATTED_BLOCK,
  21656. + /* 001 */
  21657. + [1] = JNODE_UNFORMATTED_BLOCK,
  21658. + /* 010 */
  21659. + [2] = JNODE_BITMAP,
  21660. + /* 011 */
  21661. + [3] = LAST_JNODE_TYPE, /*invalid */
  21662. + /* 100 */
  21663. + [4] = JNODE_INODE,
  21664. + /* 101 */
  21665. + [5] = LAST_JNODE_TYPE,
  21666. + /* 110 */
  21667. + [6] = JNODE_IO_HEAD,
  21668. + /* 111 */
  21669. + [7] = LAST_JNODE_TYPE, /* invalid */
  21670. + };
  21671. +
  21672. + return mask_to_type[(node->state & state_mask) >> JNODE_TYPE_1];
  21673. +}
  21674. +
  21675. +/* returns true if node is a znode */
  21676. +static inline int jnode_is_znode(const jnode * node)
  21677. +{
  21678. + return jnode_get_type(node) == JNODE_FORMATTED_BLOCK;
  21679. +}
  21680. +
  21681. +static inline int jnode_is_flushprepped(jnode * node)
  21682. +{
  21683. + assert("jmacd-78212", node != NULL);
  21684. + assert_spin_locked(&(node->guard));
  21685. + return !JF_ISSET(node, JNODE_DIRTY) || JF_ISSET(node, JNODE_RELOC) ||
  21686. + JF_ISSET(node, JNODE_OVRWR);
  21687. +}
  21688. +
  21689. +/* Return true if @node has already been processed by the squeeze and allocate
  21690. + process. This implies the block address has been finalized for the
  21691. + duration of this atom (or it is clean and will remain in place). If this
  21692. + returns true you may use the block number as a hint. */
  21693. +static inline int jnode_check_flushprepped(jnode * node)
  21694. +{
  21695. + int result;
  21696. +
  21697. + /* It must be clean or relocated or wandered. New allocations are set
  21698. + * to relocate. */
  21699. + spin_lock_jnode(node);
  21700. + result = jnode_is_flushprepped(node);
  21701. + spin_unlock_jnode(node);
  21702. + return result;
  21703. +}
  21704. +
  21705. +/* returns true if node is unformatted */
  21706. +static inline int jnode_is_unformatted(const jnode * node)
  21707. +{
  21708. + assert("jmacd-0123", node != NULL);
  21709. + return jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK;
  21710. +}
  21711. +
  21712. +/* returns true if node represents a cluster cache page */
  21713. +static inline int jnode_is_cluster_page(const jnode * node)
  21714. +{
  21715. + assert("edward-50", node != NULL);
  21716. + return (JF_ISSET(node, JNODE_CLUSTER_PAGE));
  21717. +}
  21718. +
  21719. +/* returns true is node is builtin inode's jnode */
  21720. +static inline int jnode_is_inode(const jnode * node)
  21721. +{
  21722. + assert("vs-1240", node != NULL);
  21723. + return jnode_get_type(node) == JNODE_INODE;
  21724. +}
  21725. +
  21726. +static inline jnode_plugin *jnode_ops_of(const jnode_type type)
  21727. +{
  21728. + assert("nikita-2367", type < LAST_JNODE_TYPE);
  21729. + return jnode_plugin_by_id((reiser4_plugin_id) type);
  21730. +}
  21731. +
  21732. +static inline jnode_plugin *jnode_ops(const jnode * node)
  21733. +{
  21734. + assert("nikita-2366", node != NULL);
  21735. +
  21736. + return jnode_ops_of(jnode_get_type(node));
  21737. +}
  21738. +
  21739. +/* Get the index of a block. */
  21740. +static inline unsigned long jnode_get_index(jnode * node)
  21741. +{
  21742. + return jnode_ops(node)->index(node);
  21743. +}
  21744. +
  21745. +/* return true if "node" is the root */
  21746. +static inline int jnode_is_root(const jnode * node)
  21747. +{
  21748. + return jnode_is_znode(node) && znode_is_root(JZNODE(node));
  21749. +}
  21750. +
  21751. +extern struct address_space *mapping_jnode(const jnode * node);
  21752. +extern unsigned long index_jnode(const jnode * node);
  21753. +
  21754. +static inline void jput(jnode * node);
  21755. +extern void jput_final(jnode * node);
  21756. +
  21757. +/* bump data counter on @node */
  21758. +static inline void add_d_ref(jnode * node/* node to increase d_count of */)
  21759. +{
  21760. + assert("nikita-1962", node != NULL);
  21761. +
  21762. + atomic_inc(&node->d_count);
  21763. + if (jnode_is_unformatted(node) || jnode_is_znode(node))
  21764. + LOCK_CNT_INC(d_refs);
  21765. +}
  21766. +
  21767. +/* jput() - decrement x_count reference counter on znode.
  21768. +
  21769. + Count may drop to 0, jnode stays in cache until memory pressure causes the
  21770. + eviction of its page. The c_count variable also ensures that children are
  21771. + pressured out of memory before the parent. The jnode remains hashed as
  21772. + long as the VM allows its page to stay in memory.
  21773. +*/
  21774. +static inline void jput(jnode * node)
  21775. +{
  21776. + assert("jmacd-509", node != NULL);
  21777. + assert("jmacd-510", atomic_read(&node->x_count) > 0);
  21778. + assert("zam-926", reiser4_schedulable());
  21779. + LOCK_CNT_DEC(x_refs);
  21780. +
  21781. + rcu_read_lock();
  21782. + /*
  21783. + * we don't need any kind of lock here--jput_final() uses RCU.
  21784. + */
  21785. + if (unlikely(atomic_dec_and_test(&node->x_count)))
  21786. + jput_final(node);
  21787. + else
  21788. + rcu_read_unlock();
  21789. + assert("nikita-3473", reiser4_schedulable());
  21790. +}
  21791. +
  21792. +extern void jrelse(jnode * node);
  21793. +extern void jrelse_tail(jnode * node);
  21794. +
  21795. +extern jnode *jnode_rip_sync(reiser4_tree * t, jnode * node);
  21796. +
  21797. +/* resolve race with jput */
  21798. +static inline jnode *jnode_rip_check(reiser4_tree * tree, jnode * node)
  21799. +{
  21800. + if (unlikely(JF_ISSET(node, JNODE_RIP)))
  21801. + node = jnode_rip_sync(tree, node);
  21802. + return node;
  21803. +}
  21804. +
  21805. +extern reiser4_key *jnode_build_key(const jnode *node, reiser4_key * key);
  21806. +
  21807. +#if REISER4_DEBUG
  21808. +extern int jnode_invariant_f(const jnode *node, char const **msg);
  21809. +#endif
  21810. +
  21811. +extern jnode_plugin jnode_plugins[LAST_JNODE_TYPE];
  21812. +
  21813. +/* __JNODE_H__ */
  21814. +#endif
  21815. +
  21816. +/* Make Linus happy.
  21817. + Local variables:
  21818. + c-indentation-style: "K&R"
  21819. + mode-name: "LC"
  21820. + c-basic-offset: 8
  21821. + tab-width: 8
  21822. + fill-column: 120
  21823. + End:
  21824. +*/
  21825. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/kassign.c linux-5.16.14/fs/reiser4/kassign.c
  21826. --- linux-5.16.14.orig/fs/reiser4/kassign.c 1970-01-01 01:00:00.000000000 +0100
  21827. +++ linux-5.16.14/fs/reiser4/kassign.c 2022-03-12 13:26:19.656892738 +0100
  21828. @@ -0,0 +1,677 @@
  21829. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  21830. + * reiser4/README */
  21831. +
  21832. +/* Key assignment policy implementation */
  21833. +
  21834. +/*
  21835. + * In reiser4 every piece of file system data and meta-data has a key. Keys
  21836. + * are used to store information in and retrieve it from reiser4 internal
  21837. + * tree. In addition to this, keys define _ordering_ of all file system
  21838. + * information: things having close keys are placed into the same or
  21839. + * neighboring (in the tree order) nodes of the tree. As our block allocator
  21840. + * tries to respect tree order (see flush.c), keys also define order in which
  21841. + * things are laid out on the disk, and hence, affect performance directly.
  21842. + *
  21843. + * Obviously, assignment of keys to data and meta-data should be consistent
  21844. + * across whole file system. Algorithm that calculates a key for a given piece
  21845. + * of data or meta-data is referred to as "key assignment".
  21846. + *
  21847. + * Key assignment is too expensive to be implemented as a plugin (that is,
  21848. + * with an ability to support different key assignment schemas in the same
  21849. + * compiled kernel image). As a compromise, all key-assignment functions and
  21850. + * data-structures are collected in this single file, so that modifications to
  21851. + * key assignment algorithm can be localized. Additional changes may be
  21852. + * required in key.[ch].
  21853. + *
  21854. + * Current default reiser4 key assignment algorithm is dubbed "Plan A". As one
  21855. + * may guess, there is "Plan B" too.
  21856. + *
  21857. + */
  21858. +
  21859. +/*
  21860. + * Additional complication with key assignment implementation is a requirement
  21861. + * to support different key length.
  21862. + */
  21863. +
  21864. +/*
  21865. + * KEY ASSIGNMENT: PLAN A, LONG KEYS.
  21866. + *
  21867. + * DIRECTORY ITEMS
  21868. + *
  21869. + * | 60 | 4 | 7 |1| 56 | 64 | 64 |
  21870. + * +--------------+---+---+-+-------------+------------------+-----------------+
  21871. + * | dirid | 0 | F |H| prefix-1 | prefix-2 | prefix-3/hash |
  21872. + * +--------------+---+---+-+-------------+------------------+-----------------+
  21873. + * | | | | |
  21874. + * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
  21875. + *
  21876. + * dirid objectid of directory this item is for
  21877. + *
  21878. + * F fibration, see fs/reiser4/plugin/fibration.[ch]
  21879. + *
  21880. + * H 1 if last 8 bytes of the key contain hash,
  21881. + * 0 if last 8 bytes of the key contain prefix-3
  21882. + *
  21883. + * prefix-1 first 7 characters of file name.
  21884. + * Padded by zeroes if name is not long enough.
  21885. + *
  21886. + * prefix-2 next 8 characters of the file name.
  21887. + *
  21888. + * prefix-3 next 8 characters of the file name.
  21889. + *
  21890. + * hash hash of the rest of file name (i.e., portion of file
  21891. + * name not included into prefix-1 and prefix-2).
  21892. + *
  21893. + * File names shorter than 23 (== 7 + 8 + 8) characters are completely encoded
  21894. + * in the key. Such file names are called "short". They are distinguished by H
  21895. + * bit set 0 in the key.
  21896. + *
  21897. + * Other file names are "long". For long name, H bit is 1, and first 15 (== 7
  21898. + * + 8) characters are encoded in prefix-1 and prefix-2 portions of the
  21899. + * key. Last 8 bytes of the key are occupied by hash of the remaining
  21900. + * characters of the name.
  21901. + *
  21902. + * This key assignment reaches following important goals:
  21903. + *
  21904. + * (1) directory entries are sorted in approximately lexicographical
  21905. + * order.
  21906. + *
  21907. + * (2) collisions (when multiple directory items have the same key), while
  21908. + * principally unavoidable in a tree with fixed length keys, are rare.
  21909. + *
  21910. + * STAT DATA
  21911. + *
  21912. + * | 60 | 4 | 64 | 4 | 60 | 64 |
  21913. + * +--------------+---+-----------------+---+--------------+-----------------+
  21914. + * | locality id | 1 | ordering | 0 | objectid | 0 |
  21915. + * +--------------+---+-----------------+---+--------------+-----------------+
  21916. + * | | | | |
  21917. + * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
  21918. + *
  21919. + * locality id object id of a directory where first name was created for
  21920. + * the object
  21921. + *
  21922. + * ordering copy of second 8-byte portion of the key of directory
  21923. + * entry for the first name of this object. Ordering has a form
  21924. + * {
  21925. + * fibration :7;
  21926. + * h :1;
  21927. + * prefix1 :56;
  21928. + * }
  21929. + * see description of key for directory entry above.
  21930. + *
  21931. + * objectid object id for this object
  21932. + *
  21933. + * This key assignment policy is designed to keep stat-data in the same order
  21934. + * as corresponding directory items, thus speeding up readdir/stat types of
  21935. + * workload.
  21936. + *
  21937. + * FILE BODY
  21938. + *
  21939. + * | 60 | 4 | 64 | 4 | 60 | 64 |
  21940. + * +--------------+---+-----------------+---+--------------+-----------------+
  21941. + * | locality id | 4 | ordering | 0 | objectid | offset |
  21942. + * +--------------+---+-----------------+---+--------------+-----------------+
  21943. + * | | | | |
  21944. + * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
  21945. + *
  21946. + * locality id object id of a directory where first name was created for
  21947. + * the object
  21948. + *
  21949. + * ordering the same as in the key of stat-data for this object
  21950. + *
  21951. + * objectid object id for this object
  21952. + *
  21953. + * offset logical offset from the beginning of this file.
  21954. + * Measured in bytes.
  21955. + *
  21956. + *
  21957. + * KEY ASSIGNMENT: PLAN A, SHORT KEYS.
  21958. + *
  21959. + * DIRECTORY ITEMS
  21960. + *
  21961. + * | 60 | 4 | 7 |1| 56 | 64 |
  21962. + * +--------------+---+---+-+-------------+-----------------+
  21963. + * | dirid | 0 | F |H| prefix-1 | prefix-2/hash |
  21964. + * +--------------+---+---+-+-------------+-----------------+
  21965. + * | | | |
  21966. + * | 8 bytes | 8 bytes | 8 bytes |
  21967. + *
  21968. + * dirid objectid of directory this item is for
  21969. + *
  21970. + * F fibration, see fs/reiser4/plugin/fibration.[ch]
  21971. + *
  21972. + * H 1 if last 8 bytes of the key contain hash,
  21973. + * 0 if last 8 bytes of the key contain prefix-2
  21974. + *
  21975. + * prefix-1 first 7 characters of file name.
  21976. + * Padded by zeroes if name is not long enough.
  21977. + *
  21978. + * prefix-2 next 8 characters of the file name.
  21979. + *
  21980. + * hash hash of the rest of file name (i.e., portion of file
  21981. + * name not included into prefix-1).
  21982. + *
  21983. + * File names shorter than 15 (== 7 + 8) characters are completely encoded in
  21984. + * the key. Such file names are called "short". They are distinguished by H
  21985. + * bit set in the key.
  21986. + *
  21987. + * Other file names are "long". For long name, H bit is 0, and first 7
  21988. + * characters are encoded in prefix-1 portion of the key. Last 8 bytes of the
  21989. + * key are occupied by hash of the remaining characters of the name.
  21990. + *
  21991. + * STAT DATA
  21992. + *
  21993. + * | 60 | 4 | 4 | 60 | 64 |
  21994. + * +--------------+---+---+--------------+-----------------+
  21995. + * | locality id | 1 | 0 | objectid | 0 |
  21996. + * +--------------+---+---+--------------+-----------------+
  21997. + * | | | |
  21998. + * | 8 bytes | 8 bytes | 8 bytes |
  21999. + *
  22000. + * locality id object id of a directory where first name was created for
  22001. + * the object
  22002. + *
  22003. + * objectid object id for this object
  22004. + *
  22005. + * FILE BODY
  22006. + *
  22007. + * | 60 | 4 | 4 | 60 | 64 |
  22008. + * +--------------+---+---+--------------+-----------------+
  22009. + * | locality id | 4 | 0 | objectid | offset |
  22010. + * +--------------+---+---+--------------+-----------------+
  22011. + * | | | |
  22012. + * | 8 bytes | 8 bytes | 8 bytes |
  22013. + *
  22014. + * locality id object id of a directory where first name was created for
  22015. + * the object
  22016. + *
  22017. + * objectid object id for this object
  22018. + *
  22019. + * offset logical offset from the beginning of this file.
  22020. + * Measured in bytes.
  22021. + *
  22022. + *
  22023. + */
  22024. +
  22025. +#include "debug.h"
  22026. +#include "key.h"
  22027. +#include "kassign.h"
  22028. +#include "vfs_ops.h"
  22029. +#include "inode.h"
  22030. +#include "super.h"
  22031. +#include "dscale.h"
  22032. +
  22033. +#include <linux/types.h> /* for __u?? */
  22034. +#include <linux/fs.h> /* for struct super_block, etc */
  22035. +
  22036. +/* bitmask for H bit (see comment at the beginning of this file */
  22037. +static const __u64 longname_mark = 0x0100000000000000ull;
  22038. +/* bitmask for F and H portions of the key. */
  22039. +static const __u64 fibration_mask = 0xff00000000000000ull;
  22040. +
  22041. +/* return true if name is not completely encoded in @key */
  22042. +int is_longname_key(const reiser4_key * key)
  22043. +{
  22044. + __u64 highpart;
  22045. +
  22046. + assert("nikita-2863", key != NULL);
  22047. + if (get_key_type(key) != KEY_FILE_NAME_MINOR)
  22048. + reiser4_print_key("oops", key);
  22049. + assert("nikita-2864", get_key_type(key) == KEY_FILE_NAME_MINOR);
  22050. +
  22051. + if (REISER4_LARGE_KEY)
  22052. + highpart = get_key_ordering(key);
  22053. + else
  22054. + highpart = get_key_objectid(key);
  22055. +
  22056. + return (highpart & longname_mark) ? 1 : 0;
  22057. +}
  22058. +
  22059. +/* return true if @name is too long to be completely encoded in the key */
  22060. +int is_longname(const char *name UNUSED_ARG, int len)
  22061. +{
  22062. + if (REISER4_LARGE_KEY)
  22063. + return len > 23;
  22064. + else
  22065. + return len > 15;
  22066. +}
  22067. +
  22068. +/* code ascii string into __u64.
  22069. +
  22070. + Put characters of @name into result (@str) one after another starting
  22071. + from @start_idx-th highest (arithmetically) byte. This produces
  22072. + endian-safe encoding. memcpy(2) will not do.
  22073. +
  22074. +*/
  22075. +static __u64 pack_string(const char *name /* string to encode */ ,
  22076. + int start_idx /* highest byte in result from
  22077. + * which to start encoding */ )
  22078. +{
  22079. + unsigned i;
  22080. + __u64 str;
  22081. +
  22082. + str = 0;
  22083. + for (i = 0; (i < sizeof str - start_idx) && name[i]; ++i) {
  22084. + str <<= 8;
  22085. + str |= (unsigned char)name[i];
  22086. + }
  22087. + str <<= (sizeof str - i - start_idx) << 3;
  22088. + return str;
  22089. +}
  22090. +
  22091. +/* opposite to pack_string(). Takes value produced by pack_string(), restores
  22092. + * string encoded in it and stores result in @buf */
  22093. +char *reiser4_unpack_string(__u64 value, char *buf)
  22094. +{
  22095. + do {
  22096. + *buf = value >> (64 - 8);
  22097. + if (*buf)
  22098. + ++buf;
  22099. + value <<= 8;
  22100. + } while (value != 0);
  22101. + *buf = 0;
  22102. + return buf;
  22103. +}
  22104. +
  22105. +/* obtain name encoded in @key and store it in @buf */
  22106. +char *extract_name_from_key(const reiser4_key * key, char *buf)
  22107. +{
  22108. + char *c;
  22109. +
  22110. + assert("nikita-2868", !is_longname_key(key));
  22111. +
  22112. + c = buf;
  22113. + if (REISER4_LARGE_KEY) {
  22114. + c = reiser4_unpack_string(get_key_ordering(key) &
  22115. + ~fibration_mask, c);
  22116. + c = reiser4_unpack_string(get_key_fulloid(key), c);
  22117. + } else
  22118. + c = reiser4_unpack_string(get_key_fulloid(key) &
  22119. + ~fibration_mask, c);
  22120. + reiser4_unpack_string(get_key_offset(key), c);
  22121. + return buf;
  22122. +}
  22123. +
  22124. +/**
  22125. + * complete_entry_key - calculate entry key by name
  22126. + * @dir: directory where entry is (or will be) in
  22127. + * @name: name to calculate key of
  22128. + * @len: lenth of name
  22129. + * @result: place to store result in
  22130. + *
  22131. + * Sets fields of entry key @result which depend on file name.
  22132. + * When REISER4_LARGE_KEY is defined three fields of @result are set: ordering,
  22133. + * objectid and offset. Otherwise, objectid and offset are set.
  22134. + */
  22135. +void complete_entry_key(const struct inode *dir, const char *name,
  22136. + int len, reiser4_key *result)
  22137. +{
  22138. +#if REISER4_LARGE_KEY
  22139. + __u64 ordering;
  22140. + __u64 objectid;
  22141. + __u64 offset;
  22142. +
  22143. + assert("nikita-1139", dir != NULL);
  22144. + assert("nikita-1142", result != NULL);
  22145. + assert("nikita-2867", strlen(name) == len);
  22146. +
  22147. + /*
  22148. + * key allocation algorithm for directory entries in case of large
  22149. + * keys:
  22150. + *
  22151. + * If name is not longer than 7 + 8 + 8 = 23 characters, put first 7
  22152. + * characters into ordering field of key, next 8 charactes (if any)
  22153. + * into objectid field of key and next 8 ones (of any) into offset
  22154. + * field of key
  22155. + *
  22156. + * If file name is longer than 23 characters, put first 7 characters
  22157. + * into key's ordering, next 8 to objectid and hash of remaining
  22158. + * characters into offset field.
  22159. + *
  22160. + * To distinguish above cases, in latter set up unused high bit in
  22161. + * ordering field.
  22162. + */
  22163. +
  22164. + /* [0-6] characters to ordering */
  22165. + ordering = pack_string(name, 1);
  22166. + if (len > 7) {
  22167. + /* [7-14] characters to objectid */
  22168. + objectid = pack_string(name + 7, 0);
  22169. + if (len > 15) {
  22170. + if (len <= 23) {
  22171. + /* [15-23] characters to offset */
  22172. + offset = pack_string(name + 15, 0);
  22173. + } else {
  22174. + /* note in a key the fact that offset contains
  22175. + * hash */
  22176. + ordering |= longname_mark;
  22177. +
  22178. + /* offset is the hash of the file name's tail */
  22179. + offset = inode_hash_plugin(dir)->hash(name + 15,
  22180. + len - 15);
  22181. + }
  22182. + } else {
  22183. + offset = 0ull;
  22184. + }
  22185. + } else {
  22186. + objectid = 0ull;
  22187. + offset = 0ull;
  22188. + }
  22189. +
  22190. + assert("nikita-3480", inode_fibration_plugin(dir) != NULL);
  22191. + ordering |= inode_fibration_plugin(dir)->fibre(dir, name, len);
  22192. +
  22193. + set_key_ordering(result, ordering);
  22194. + set_key_fulloid(result, objectid);
  22195. + set_key_offset(result, offset);
  22196. + return;
  22197. +
  22198. +#else
  22199. + __u64 objectid;
  22200. + __u64 offset;
  22201. +
  22202. + assert("nikita-1139", dir != NULL);
  22203. + assert("nikita-1142", result != NULL);
  22204. + assert("nikita-2867", strlen(name) == len);
  22205. +
  22206. + /*
  22207. + * key allocation algorithm for directory entries in case of not large
  22208. + * keys:
  22209. + *
  22210. + * If name is not longer than 7 + 8 = 15 characters, put first 7
  22211. + * characters into objectid field of key, next 8 charactes (if any)
  22212. + * into offset field of key
  22213. + *
  22214. + * If file name is longer than 15 characters, put first 7 characters
  22215. + * into key's objectid, and hash of remaining characters into offset
  22216. + * field.
  22217. + *
  22218. + * To distinguish above cases, in latter set up unused high bit in
  22219. + * objectid field.
  22220. + */
  22221. +
  22222. + /* [0-6] characters to objectid */
  22223. + objectid = pack_string(name, 1);
  22224. + if (len > 7) {
  22225. + if (len <= 15) {
  22226. + /* [7-14] characters to offset */
  22227. + offset = pack_string(name + 7, 0);
  22228. + } else {
  22229. + /* note in a key the fact that offset contains hash. */
  22230. + objectid |= longname_mark;
  22231. +
  22232. + /* offset is the hash of the file name. */
  22233. + offset = inode_hash_plugin(dir)->hash(name + 7,
  22234. + len - 7);
  22235. + }
  22236. + } else
  22237. + offset = 0ull;
  22238. +
  22239. + assert("nikita-3480", inode_fibration_plugin(dir) != NULL);
  22240. + objectid |= inode_fibration_plugin(dir)->fibre(dir, name, len);
  22241. +
  22242. + set_key_fulloid(result, objectid);
  22243. + set_key_offset(result, offset);
  22244. + return;
  22245. +#endif /* ! REISER4_LARGE_KEY */
  22246. +}
  22247. +
  22248. +/* true, if @key is the key of "." */
  22249. +int is_dot_key(const reiser4_key * key/* key to check */)
  22250. +{
  22251. + assert("nikita-1717", key != NULL);
  22252. + assert("nikita-1718", get_key_type(key) == KEY_FILE_NAME_MINOR);
  22253. + return
  22254. + (get_key_ordering(key) == 0ull) &&
  22255. + (get_key_objectid(key) == 0ull) && (get_key_offset(key) == 0ull);
  22256. +}
  22257. +
  22258. +/* build key for stat-data.
  22259. +
  22260. + return key of stat-data of this object. This should became sd plugin
  22261. + method in the future. For now, let it be here.
  22262. +
  22263. +*/
  22264. +reiser4_key *build_sd_key(const struct inode *target /* inode of an object */ ,
  22265. + reiser4_key * result /* resulting key of @target
  22266. + stat-data */ )
  22267. +{
  22268. + assert("nikita-261", result != NULL);
  22269. +
  22270. + reiser4_key_init(result);
  22271. + set_key_locality(result, reiser4_inode_data(target)->locality_id);
  22272. + set_key_ordering(result, get_inode_ordering(target));
  22273. + set_key_objectid(result, get_inode_oid(target));
  22274. + set_key_type(result, KEY_SD_MINOR);
  22275. + set_key_offset(result, (__u64) 0);
  22276. + return result;
  22277. +}
  22278. +
  22279. +/* encode part of key into &obj_key_id
  22280. +
  22281. + This encodes into @id part of @key sufficient to restore @key later,
  22282. + given that latter is key of object (key of stat-data).
  22283. +
  22284. + See &obj_key_id
  22285. +*/
  22286. +int build_obj_key_id(const reiser4_key * key /* key to encode */ ,
  22287. + obj_key_id * id/* id where key is encoded in */)
  22288. +{
  22289. + assert("nikita-1151", key != NULL);
  22290. + assert("nikita-1152", id != NULL);
  22291. +
  22292. + memcpy(id, key, sizeof *id);
  22293. + return 0;
  22294. +}
  22295. +
  22296. +/* encode reference to @obj in @id.
  22297. +
  22298. + This is like build_obj_key_id() above, but takes inode as parameter. */
  22299. +int build_inode_key_id(const struct inode *obj /* object to build key of */ ,
  22300. + obj_key_id * id/* result */)
  22301. +{
  22302. + reiser4_key sdkey;
  22303. +
  22304. + assert("nikita-1166", obj != NULL);
  22305. + assert("nikita-1167", id != NULL);
  22306. +
  22307. + build_sd_key(obj, &sdkey);
  22308. + build_obj_key_id(&sdkey, id);
  22309. + return 0;
  22310. +}
  22311. +
  22312. +/* decode @id back into @key
  22313. +
  22314. + Restore key of object stat-data from @id. This is dual to
  22315. + build_obj_key_id() above.
  22316. +*/
  22317. +int extract_key_from_id(const obj_key_id * id /* object key id to extract key
  22318. + * from */ ,
  22319. + reiser4_key * key/* result */)
  22320. +{
  22321. + assert("nikita-1153", id != NULL);
  22322. + assert("nikita-1154", key != NULL);
  22323. +
  22324. + reiser4_key_init(key);
  22325. + memcpy(key, id, sizeof *id);
  22326. + return 0;
  22327. +}
  22328. +
  22329. +/* extract objectid of directory from key of directory entry within said
  22330. + directory.
  22331. + */
  22332. +oid_t extract_dir_id_from_key(const reiser4_key * de_key /* key of
  22333. + * directory
  22334. + * entry */ )
  22335. +{
  22336. + assert("nikita-1314", de_key != NULL);
  22337. + return get_key_locality(de_key);
  22338. +}
  22339. +
  22340. +/* encode into @id key of directory entry.
  22341. +
  22342. + Encode into @id information sufficient to later distinguish directory
  22343. + entries within the same directory. This is not whole key, because all
  22344. + directory entries within directory item share locality which is equal
  22345. + to objectid of their directory.
  22346. +
  22347. +*/
  22348. +int build_de_id(const struct inode *dir /* inode of directory */ ,
  22349. + const struct qstr *name /* name to be given to @obj by
  22350. + * directory entry being
  22351. + * constructed */ ,
  22352. + de_id * id/* short key of directory entry */)
  22353. +{
  22354. + reiser4_key key;
  22355. +
  22356. + assert("nikita-1290", dir != NULL);
  22357. + assert("nikita-1292", id != NULL);
  22358. +
  22359. + /* NOTE-NIKITA this is suboptimal. */
  22360. + inode_dir_plugin(dir)->build_entry_key(dir, name, &key);
  22361. + return build_de_id_by_key(&key, id);
  22362. +}
  22363. +
  22364. +/* encode into @id key of directory entry.
  22365. +
  22366. + Encode into @id information sufficient to later distinguish directory
  22367. + entries within the same directory. This is not whole key, because all
  22368. + directory entries within directory item share locality which is equal
  22369. + to objectid of their directory.
  22370. +
  22371. +*/
  22372. +int build_de_id_by_key(const reiser4_key * entry_key /* full key of directory
  22373. + * entry */ ,
  22374. + de_id * id/* short key of directory entry */)
  22375. +{
  22376. + memcpy(id, ((__u64 *) entry_key) + 1, sizeof *id);
  22377. + return 0;
  22378. +}
  22379. +
  22380. +/* restore from @id key of directory entry.
  22381. +
  22382. + Function dual to build_de_id(): given @id and locality, build full
  22383. + key of directory entry within directory item.
  22384. +
  22385. +*/
  22386. +int extract_key_from_de_id(const oid_t locality /* locality of directory
  22387. + * entry */ ,
  22388. + const de_id * id /* directory entry id */ ,
  22389. + reiser4_key * key/* result */)
  22390. +{
  22391. + /* no need to initialise key here: all fields are overwritten */
  22392. + memcpy(((__u64 *) key) + 1, id, sizeof *id);
  22393. + set_key_locality(key, locality);
  22394. + set_key_type(key, KEY_FILE_NAME_MINOR);
  22395. + return 0;
  22396. +}
  22397. +
  22398. +/* compare two &de_id's */
  22399. +cmp_t de_id_cmp(const de_id * id1 /* first &de_id to compare */ ,
  22400. + const de_id * id2/* second &de_id to compare */)
  22401. +{
  22402. + /* NOTE-NIKITA ugly implementation */
  22403. + reiser4_key k1;
  22404. + reiser4_key k2;
  22405. +
  22406. + extract_key_from_de_id((oid_t) 0, id1, &k1);
  22407. + extract_key_from_de_id((oid_t) 0, id2, &k2);
  22408. + return keycmp(&k1, &k2);
  22409. +}
  22410. +
  22411. +/* compare &de_id with key */
  22412. +cmp_t de_id_key_cmp(const de_id * id /* directory entry id to compare */ ,
  22413. + const reiser4_key * key/* key to compare */)
  22414. +{
  22415. + cmp_t result;
  22416. + reiser4_key *k1;
  22417. +
  22418. + k1 = (reiser4_key *) (((unsigned long)id) - sizeof key->el[0]);
  22419. + result = KEY_DIFF_EL(k1, key, 1);
  22420. + if (result == EQUAL_TO) {
  22421. + result = KEY_DIFF_EL(k1, key, 2);
  22422. + if (REISER4_LARGE_KEY && result == EQUAL_TO)
  22423. + result = KEY_DIFF_EL(k1, key, 3);
  22424. + }
  22425. + return result;
  22426. +}
  22427. +
  22428. +/*
  22429. + * return number of bytes necessary to encode @inode identity.
  22430. + */
  22431. +int inode_onwire_size(const struct inode *inode)
  22432. +{
  22433. + int result;
  22434. +
  22435. + result = dscale_bytes_to_write(get_inode_oid(inode));
  22436. + result += dscale_bytes_to_write(get_inode_locality(inode));
  22437. +
  22438. + /*
  22439. + * ordering is large (it usually has highest bits set), so it makes
  22440. + * little sense to dscale it.
  22441. + */
  22442. + if (REISER4_LARGE_KEY)
  22443. + result += sizeof(get_inode_ordering(inode));
  22444. + return result;
  22445. +}
  22446. +
  22447. +/*
  22448. + * encode @inode identity at @start
  22449. + */
  22450. +char *build_inode_onwire(const struct inode *inode, char *start)
  22451. +{
  22452. + start += dscale_write(start, get_inode_locality(inode));
  22453. + start += dscale_write(start, get_inode_oid(inode));
  22454. +
  22455. + if (REISER4_LARGE_KEY) {
  22456. + put_unaligned(cpu_to_le64(get_inode_ordering(inode)), (__le64 *)start);
  22457. + start += sizeof(get_inode_ordering(inode));
  22458. + }
  22459. + return start;
  22460. +}
  22461. +
  22462. +/*
  22463. + * extract key that was previously encoded by build_inode_onwire() at @addr
  22464. + */
  22465. +char *extract_obj_key_id_from_onwire(char *addr, obj_key_id * key_id)
  22466. +{
  22467. + __u64 val;
  22468. +
  22469. + addr += dscale_read(addr, &val);
  22470. + val = (val << KEY_LOCALITY_SHIFT) | KEY_SD_MINOR;
  22471. + put_unaligned(cpu_to_le64(val), (__le64 *)key_id->locality);
  22472. + addr += dscale_read(addr, &val);
  22473. + put_unaligned(cpu_to_le64(val), (__le64 *)key_id->objectid);
  22474. +#if REISER4_LARGE_KEY
  22475. + memcpy(&key_id->ordering, addr, sizeof key_id->ordering);
  22476. + addr += sizeof key_id->ordering;
  22477. +#endif
  22478. + return addr;
  22479. +}
  22480. +
  22481. +/*
  22482. + * skip a key that was previously encoded by build_inode_onwire() at @addr
  22483. + * FIXME: handle IO errors.
  22484. + */
  22485. +char * locate_obj_key_id_onwire(char * addr)
  22486. +{
  22487. + /* locality */
  22488. + addr += dscale_bytes_to_read(addr);
  22489. + /* objectid */
  22490. + addr += dscale_bytes_to_read(addr);
  22491. +#if REISER4_LARGE_KEY
  22492. + addr += sizeof ((obj_key_id *)0)->ordering;
  22493. +#endif
  22494. + return addr;
  22495. +}
  22496. +
  22497. +/* Make Linus happy.
  22498. + Local variables:
  22499. + c-indentation-style: "K&R"
  22500. + mode-name: "LC"
  22501. + c-basic-offset: 8
  22502. + tab-width: 8
  22503. + fill-column: 120
  22504. + End:
  22505. +*/
  22506. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/kassign.h linux-5.16.14/fs/reiser4/kassign.h
  22507. --- linux-5.16.14.orig/fs/reiser4/kassign.h 1970-01-01 01:00:00.000000000 +0100
  22508. +++ linux-5.16.14/fs/reiser4/kassign.h 2022-03-12 13:26:19.656892738 +0100
  22509. @@ -0,0 +1,111 @@
  22510. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  22511. + * reiser4/README */
  22512. +
  22513. +/* Key assignment policy interface. See kassign.c for details. */
  22514. +
  22515. +#if !defined(__KASSIGN_H__)
  22516. +#define __KASSIGN_H__
  22517. +
  22518. +#include "forward.h"
  22519. +#include "key.h"
  22520. +#include "dformat.h"
  22521. +
  22522. +#include <linux/types.h> /* for __u?? */
  22523. +#include <linux/fs.h> /* for struct super_block, etc */
  22524. +#include <linux/dcache.h> /* for struct qstr */
  22525. +
  22526. +/* key assignment functions */
  22527. +
  22528. +/* Information from which key of file stat-data can be uniquely
  22529. + restored. This depends on key assignment policy for
  22530. + stat-data. Currently it's enough to store object id and locality id
  22531. + (60+60==120) bits, because minor packing locality and offset of
  22532. + stat-data key are always known constants: KEY_SD_MINOR and 0
  22533. + respectively. For simplicity 4 bits are wasted in each id, and just
  22534. + two 64 bit integers are stored.
  22535. +
  22536. + This field has to be byte-aligned, because we don't want to waste
  22537. + space in directory entries. There is another side of a coin of
  22538. + course: we waste CPU and bus bandwidth in stead, by copying data back
  22539. + and forth.
  22540. +
  22541. + Next optimization: &obj_key_id is mainly used to address stat data from
  22542. + directory entries. Under the assumption that majority of files only have
  22543. + only name (one hard link) from *the* parent directory it seems reasonable
  22544. + to only store objectid of stat data and take its locality from key of
  22545. + directory item.
  22546. +
  22547. + This requires some flag to be added to the &obj_key_id to distinguish
  22548. + between these two cases. Remaining bits in flag byte are then asking to be
  22549. + used to store file type.
  22550. +
  22551. + This optimization requires changes in directory item handling code.
  22552. +
  22553. +*/
  22554. +typedef struct obj_key_id {
  22555. + d8 locality[sizeof(__u64)];
  22556. + ON_LARGE_KEY(d8 ordering[sizeof(__u64)];
  22557. + )
  22558. + d8 objectid[sizeof(__u64)];
  22559. +}
  22560. +obj_key_id;
  22561. +
  22562. +/* Information sufficient to uniquely identify directory entry within
  22563. + compressed directory item.
  22564. +
  22565. + For alignment issues see &obj_key_id above.
  22566. +*/
  22567. +typedef struct de_id {
  22568. + ON_LARGE_KEY(d8 ordering[sizeof(__u64)];)
  22569. + d8 objectid[sizeof(__u64)];
  22570. + d8 offset[sizeof(__u64)];
  22571. +}
  22572. +de_id;
  22573. +
  22574. +extern int inode_onwire_size(const struct inode *obj);
  22575. +extern char *build_inode_onwire(const struct inode *obj, char *area);
  22576. +extern char *locate_obj_key_id_onwire(char *area);
  22577. +extern char *extract_obj_key_id_from_onwire(char *area, obj_key_id * key_id);
  22578. +
  22579. +extern int build_inode_key_id(const struct inode *obj, obj_key_id * id);
  22580. +extern int extract_key_from_id(const obj_key_id * id, reiser4_key * key);
  22581. +extern int build_obj_key_id(const reiser4_key * key, obj_key_id * id);
  22582. +extern oid_t extract_dir_id_from_key(const reiser4_key * de_key);
  22583. +extern int build_de_id(const struct inode *dir, const struct qstr *name,
  22584. + de_id * id);
  22585. +extern int build_de_id_by_key(const reiser4_key * entry_key, de_id * id);
  22586. +extern int extract_key_from_de_id(const oid_t locality, const de_id * id,
  22587. + reiser4_key * key);
  22588. +extern cmp_t de_id_cmp(const de_id * id1, const de_id * id2);
  22589. +extern cmp_t de_id_key_cmp(const de_id * id, const reiser4_key * key);
  22590. +
  22591. +extern int build_readdir_key_common(struct file *dir, reiser4_key * result);
  22592. +extern void build_entry_key_common(const struct inode *dir,
  22593. + const struct qstr *name,
  22594. + reiser4_key * result);
  22595. +extern void build_entry_key_stable_entry(const struct inode *dir,
  22596. + const struct qstr *name,
  22597. + reiser4_key * result);
  22598. +extern int is_dot_key(const reiser4_key * key);
  22599. +extern reiser4_key *build_sd_key(const struct inode *target,
  22600. + reiser4_key * result);
  22601. +
  22602. +extern int is_longname_key(const reiser4_key * key);
  22603. +extern int is_longname(const char *name, int len);
  22604. +extern char *extract_name_from_key(const reiser4_key * key, char *buf);
  22605. +extern char *reiser4_unpack_string(__u64 value, char *buf);
  22606. +extern void complete_entry_key(const struct inode *dir, const char *name,
  22607. + int len, reiser4_key *result);
  22608. +
  22609. +/* __KASSIGN_H__ */
  22610. +#endif
  22611. +
  22612. +/* Make Linus happy.
  22613. + Local variables:
  22614. + c-indentation-style: "K&R"
  22615. + mode-name: "LC"
  22616. + c-basic-offset: 8
  22617. + tab-width: 8
  22618. + fill-column: 120
  22619. + End:
  22620. +*/
  22621. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/Kconfig linux-5.16.14/fs/reiser4/Kconfig
  22622. --- linux-5.16.14.orig/fs/reiser4/Kconfig 1970-01-01 01:00:00.000000000 +0100
  22623. +++ linux-5.16.14/fs/reiser4/Kconfig 2022-03-12 13:26:19.638892695 +0100
  22624. @@ -0,0 +1,36 @@
  22625. +config REISER4_FS
  22626. + tristate "Reiser4 (EXPERIMENTAL)"
  22627. + select ZLIB_INFLATE
  22628. + select ZLIB_DEFLATE
  22629. + select LZO_COMPRESS
  22630. + select LZO_DECOMPRESS
  22631. + select ZSTD_COMPRESS
  22632. + select ZSTD_DECOMPRESS
  22633. + select CRYPTO
  22634. + select CRYPTO_CRC32C
  22635. + help
  22636. + Reiser4 is a filesystem that performs all filesystem operations
  22637. + as atomic transactions, which means that it either performs a
  22638. + write, or it does not, and in the event of a crash it does not
  22639. + partially perform it or corrupt it.
  22640. +
  22641. + It stores files in dancing trees, which are like balanced trees but
  22642. + faster. It packs small files together so that they share blocks
  22643. + without wasting space. This means you can use it to store really
  22644. + small files. It also means that it saves you disk space. It avoids
  22645. + hassling you with anachronisms like having a maximum number of
  22646. + inodes, and wasting space if you use less than that number.
  22647. +
  22648. + Reiser4 is a distinct filesystem type from reiserfs (V3).
  22649. + It's therefore not possible to use reiserfs file systems
  22650. + with reiser4.
  22651. +
  22652. + To learn more about reiser4, go to http://www.namesys.com
  22653. +
  22654. +config REISER4_DEBUG
  22655. + bool "Enable reiser4 debug mode"
  22656. + depends on REISER4_FS
  22657. + help
  22658. + Don't use this unless you are debugging reiser4.
  22659. +
  22660. + If unsure, say N.
  22661. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/key.c linux-5.16.14/fs/reiser4/key.c
  22662. --- linux-5.16.14.orig/fs/reiser4/key.c 1970-01-01 01:00:00.000000000 +0100
  22663. +++ linux-5.16.14/fs/reiser4/key.c 2022-03-12 13:26:19.656892738 +0100
  22664. @@ -0,0 +1,138 @@
  22665. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  22666. + * reiser4/README */
  22667. +
  22668. +/* Key manipulations. */
  22669. +
  22670. +#include "debug.h"
  22671. +#include "key.h"
  22672. +#include "super.h"
  22673. +#include "reiser4.h"
  22674. +
  22675. +#include <linux/types.h> /* for __u?? */
  22676. +
  22677. +/* Minimal possible key: all components are zero. It is presumed that this is
  22678. + independent of key scheme. */
  22679. +static const reiser4_key MINIMAL_KEY = {
  22680. + .el = {
  22681. + 0ull,
  22682. + ON_LARGE_KEY(0ull,)
  22683. + 0ull,
  22684. + 0ull
  22685. + }
  22686. +};
  22687. +
  22688. +/* Maximal possible key: all components are ~0. It is presumed that this is
  22689. + independent of key scheme. */
  22690. +static const reiser4_key MAXIMAL_KEY = {
  22691. + .el = {
  22692. + __constant_cpu_to_le64(~0ull),
  22693. + ON_LARGE_KEY(__constant_cpu_to_le64(~0ull),)
  22694. + __constant_cpu_to_le64(~0ull),
  22695. + __constant_cpu_to_le64(~0ull)
  22696. + }
  22697. +};
  22698. +
  22699. +/* Initialize key. */
  22700. +void reiser4_key_init(reiser4_key * key/* key to init */)
  22701. +{
  22702. + assert("nikita-1169", key != NULL);
  22703. + memset(key, 0, sizeof *key);
  22704. +}
  22705. +
  22706. +/* minimal possible key in the tree. Return pointer to the static storage. */
  22707. +const reiser4_key * reiser4_min_key(void)
  22708. +{
  22709. + return &MINIMAL_KEY;
  22710. +}
  22711. +
  22712. +/* maximum possible key in the tree. Return pointer to the static storage. */
  22713. +const reiser4_key * reiser4_max_key(void)
  22714. +{
  22715. + return &MAXIMAL_KEY;
  22716. +}
  22717. +
  22718. +#if REISER4_DEBUG
  22719. +/* debugging aid: print symbolic name of key type */
  22720. +static const char *type_name(unsigned int key_type/* key type */)
  22721. +{
  22722. + switch (key_type) {
  22723. + case KEY_FILE_NAME_MINOR:
  22724. + return "file name";
  22725. + case KEY_SD_MINOR:
  22726. + return "stat data";
  22727. + case KEY_ATTR_NAME_MINOR:
  22728. + return "attr name";
  22729. + case KEY_ATTR_BODY_MINOR:
  22730. + return "attr body";
  22731. + case KEY_BODY_MINOR:
  22732. + return "file body";
  22733. + default:
  22734. + return "unknown";
  22735. + }
  22736. +}
  22737. +
  22738. +/* debugging aid: print human readable information about key */
  22739. +void reiser4_print_key(const char *prefix /* prefix to print */ ,
  22740. + const reiser4_key * key/* key to print */)
  22741. +{
  22742. + /* turn bold on */
  22743. + /* printf ("\033[1m"); */
  22744. + if (key == NULL)
  22745. + printk("%s: null key\n", prefix);
  22746. + else {
  22747. + if (REISER4_LARGE_KEY)
  22748. + printk("%s: (%Lx:%x:%Lx:%Lx:%Lx:%Lx)", prefix,
  22749. + get_key_locality(key),
  22750. + get_key_type(key),
  22751. + get_key_ordering(key),
  22752. + get_key_band(key),
  22753. + get_key_objectid(key), get_key_offset(key));
  22754. + else
  22755. + printk("%s: (%Lx:%x:%Lx:%Lx:%Lx)", prefix,
  22756. + get_key_locality(key),
  22757. + get_key_type(key),
  22758. + get_key_band(key),
  22759. + get_key_objectid(key), get_key_offset(key));
  22760. + /*
  22761. + * if this is a key of directory entry, try to decode part of
  22762. + * a name stored in the key, and output it.
  22763. + */
  22764. + if (get_key_type(key) == KEY_FILE_NAME_MINOR) {
  22765. + char buf[DE_NAME_BUF_LEN];
  22766. + char *c;
  22767. +
  22768. + c = buf;
  22769. + c = reiser4_unpack_string(get_key_ordering(key), c);
  22770. + reiser4_unpack_string(get_key_fulloid(key), c);
  22771. + printk("[%s", buf);
  22772. + if (is_longname_key(key))
  22773. + /*
  22774. + * only part of the name is stored in the key.
  22775. + */
  22776. + printk("...]\n");
  22777. + else {
  22778. + /*
  22779. + * whole name is stored in the key.
  22780. + */
  22781. + reiser4_unpack_string(get_key_offset(key), buf);
  22782. + printk("%s]\n", buf);
  22783. + }
  22784. + } else {
  22785. + printk("[%s]\n", type_name(get_key_type(key)));
  22786. + }
  22787. + }
  22788. + /* turn bold off */
  22789. + /* printf ("\033[m\017"); */
  22790. +}
  22791. +
  22792. +#endif
  22793. +
  22794. +/* Make Linus happy.
  22795. + Local variables:
  22796. + c-indentation-style: "K&R"
  22797. + mode-name: "LC"
  22798. + c-basic-offset: 8
  22799. + tab-width: 8
  22800. + fill-column: 120
  22801. + End:
  22802. +*/
  22803. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/key.h linux-5.16.14/fs/reiser4/key.h
  22804. --- linux-5.16.14.orig/fs/reiser4/key.h 1970-01-01 01:00:00.000000000 +0100
  22805. +++ linux-5.16.14/fs/reiser4/key.h 2022-03-12 13:26:19.657892741 +0100
  22806. @@ -0,0 +1,393 @@
  22807. +/* Copyright 2000, 2001, 2002, 2003 by Hans Reiser, licensing governed by
  22808. + * reiser4/README */
  22809. +
  22810. +/* Declarations of key-related data-structures and operations on keys. */
  22811. +
  22812. +#if !defined(__REISER4_KEY_H__)
  22813. +#define __REISER4_KEY_H__
  22814. +
  22815. +#include "dformat.h"
  22816. +#include "forward.h"
  22817. +#include "debug.h"
  22818. +
  22819. +#include <linux/prefetch.h>
  22820. +#include <linux/types.h> /* for __u?? */
  22821. +
  22822. +/* Operations on keys in reiser4 tree */
  22823. +
  22824. +/* No access to any of these fields shall be done except via a
  22825. + wrapping macro/function, and that wrapping macro/function shall
  22826. + convert to little endian order. Compare keys will consider cpu byte order. */
  22827. +
  22828. +/* A storage layer implementation difference between a regular unix file body
  22829. + and its attributes is in the typedef below which causes all of the attributes
  22830. + of a file to be near in key to all of the other attributes for all of the
  22831. + files within that directory, and not near to the file itself. It is
  22832. + interesting to consider whether this is the wrong approach, and whether there
  22833. + should be no difference at all. For current usage patterns this choice is
  22834. + probably the right one. */
  22835. +
  22836. +/* possible values for minor packing locality (4 bits required) */
  22837. +typedef enum {
  22838. + /* file name */
  22839. + KEY_FILE_NAME_MINOR = 0,
  22840. + /* stat-data */
  22841. + KEY_SD_MINOR = 1,
  22842. + /* file attribute name */
  22843. + KEY_ATTR_NAME_MINOR = 2,
  22844. + /* file attribute value */
  22845. + KEY_ATTR_BODY_MINOR = 3,
  22846. + /* file body (tail or extent) */
  22847. + KEY_BODY_MINOR = 4,
  22848. +} key_minor_locality;
  22849. +
  22850. +/* Everything stored in the tree has a unique key, which means that the tree is
  22851. + (logically) fully ordered by key. Physical order is determined by dynamic
  22852. + heuristics that attempt to reflect key order when allocating available space,
  22853. + and by the repacker. It is stylistically better to put aggregation
  22854. + information into the key. Thus, if you want to segregate extents from tails,
  22855. + it is better to give them distinct minor packing localities rather than
  22856. + changing block_alloc.c to check the node type when deciding where to allocate
  22857. + the node.
  22858. +
  22859. + The need to randomly displace new directories and large files disturbs this
  22860. + symmetry unfortunately. However, it should be noted that this is a need that
  22861. + is not clearly established given the existence of a repacker. Also, in our
  22862. + current implementation tails have a different minor packing locality from
  22863. + extents, and no files have both extents and tails, so maybe symmetry can be
  22864. + had without performance cost after all. Symmetry is what we ship for now....
  22865. +*/
  22866. +
  22867. +/* Arbitrary major packing localities can be assigned to objects using
  22868. + the reiser4(filenameA/..packing<=some_number) system call.
  22869. +
  22870. + In reiser4, the creat() syscall creates a directory
  22871. +
  22872. + whose default flow (that which is referred to if the directory is
  22873. + read as a file) is the traditional unix file body.
  22874. +
  22875. + whose directory plugin is the 'filedir'
  22876. +
  22877. + whose major packing locality is that of the parent of the object created.
  22878. +
  22879. + The static_stat item is a particular commonly used directory
  22880. + compression (the one for normal unix files).
  22881. +
  22882. + The filedir plugin checks to see if the static_stat item exists.
  22883. + There is a unique key for static_stat. If yes, then it uses the
  22884. + static_stat item for all of the values that it contains. The
  22885. + static_stat item contains a flag for each stat it contains which
  22886. + indicates whether one should look outside the static_stat item for its
  22887. + contents.
  22888. +*/
  22889. +
  22890. +/* offset of fields in reiser4_key. Value of each element of this enum
  22891. + is index within key (thought as array of __u64's) where this field
  22892. + is. */
  22893. +typedef enum {
  22894. + /* major "locale", aka dirid. Sits in 1st element */
  22895. + KEY_LOCALITY_INDEX = 0,
  22896. + /* minor "locale", aka item type. Sits in 1st element */
  22897. + KEY_TYPE_INDEX = 0,
  22898. + ON_LARGE_KEY(KEY_ORDERING_INDEX,)
  22899. + /* "object band". Sits in 2nd element */
  22900. + KEY_BAND_INDEX,
  22901. + /* objectid. Sits in 2nd element */
  22902. + KEY_OBJECTID_INDEX = KEY_BAND_INDEX,
  22903. + /* full objectid. Sits in 2nd element */
  22904. + KEY_FULLOID_INDEX = KEY_BAND_INDEX,
  22905. + /* Offset. Sits in 3rd element */
  22906. + KEY_OFFSET_INDEX,
  22907. + /* Name hash. Sits in 3rd element */
  22908. + KEY_HASH_INDEX = KEY_OFFSET_INDEX,
  22909. + KEY_CACHELINE_END = KEY_OFFSET_INDEX,
  22910. + KEY_LAST_INDEX
  22911. +} reiser4_key_field_index;
  22912. +
  22913. +/* key in reiser4 internal "balanced" tree. It is just array of three
  22914. + 64bit integers in disk byte order (little-endian by default). This
  22915. + array is actually indexed by reiser4_key_field. Each __u64 within
  22916. + this array is called "element". Logical key component encoded within
  22917. + elements are called "fields".
  22918. +
  22919. + We declare this as union with second component dummy to suppress
  22920. + inconvenient array<->pointer casts implied in C. */
  22921. +union reiser4_key {
  22922. + __le64 el[KEY_LAST_INDEX];
  22923. + int pad;
  22924. +};
  22925. +
  22926. +/* bitmasks showing where within reiser4_key particular key is stored. */
  22927. +/* major locality occupies higher 60 bits of the first element */
  22928. +#define KEY_LOCALITY_MASK 0xfffffffffffffff0ull
  22929. +
  22930. +/* minor locality occupies lower 4 bits of the first element */
  22931. +#define KEY_TYPE_MASK 0xfull
  22932. +
  22933. +/* controversial band occupies higher 4 bits of the 2nd element */
  22934. +#define KEY_BAND_MASK 0xf000000000000000ull
  22935. +
  22936. +/* objectid occupies lower 60 bits of the 2nd element */
  22937. +#define KEY_OBJECTID_MASK 0x0fffffffffffffffull
  22938. +
  22939. +/* full 64bit objectid*/
  22940. +#define KEY_FULLOID_MASK 0xffffffffffffffffull
  22941. +
  22942. +/* offset is just 3rd L.M.Nt itself */
  22943. +#define KEY_OFFSET_MASK 0xffffffffffffffffull
  22944. +
  22945. +/* ordering is whole second element */
  22946. +#define KEY_ORDERING_MASK 0xffffffffffffffffull
  22947. +
  22948. +/* how many bits key element should be shifted to left to get particular field
  22949. + */
  22950. +typedef enum {
  22951. + KEY_LOCALITY_SHIFT = 4,
  22952. + KEY_TYPE_SHIFT = 0,
  22953. + KEY_BAND_SHIFT = 60,
  22954. + KEY_OBJECTID_SHIFT = 0,
  22955. + KEY_FULLOID_SHIFT = 0,
  22956. + KEY_OFFSET_SHIFT = 0,
  22957. + KEY_ORDERING_SHIFT = 0,
  22958. +} reiser4_key_field_shift;
  22959. +
  22960. +static inline __u64
  22961. +get_key_el(const reiser4_key * key, reiser4_key_field_index off)
  22962. +{
  22963. + assert("nikita-753", key != NULL);
  22964. + assert("nikita-754", off < KEY_LAST_INDEX);
  22965. + return le64_to_cpu(get_unaligned(&key->el[off]));
  22966. +}
  22967. +
  22968. +static inline void
  22969. +set_key_el(reiser4_key * key, reiser4_key_field_index off, __u64 value)
  22970. +{
  22971. + assert("nikita-755", key != NULL);
  22972. + assert("nikita-756", off < KEY_LAST_INDEX);
  22973. + put_unaligned(cpu_to_le64(value), &key->el[off]);
  22974. +}
  22975. +
  22976. +/* macro to define getter and setter functions for field F with type T */
  22977. +#define DEFINE_KEY_FIELD(L, U, T) \
  22978. +static inline T get_key_ ## L(const reiser4_key *key) \
  22979. +{ \
  22980. + assert("nikita-750", key != NULL); \
  22981. + return (T) (get_key_el(key, KEY_ ## U ## _INDEX) & \
  22982. + KEY_ ## U ## _MASK) >> KEY_ ## U ## _SHIFT; \
  22983. +} \
  22984. + \
  22985. +static inline void set_key_ ## L(reiser4_key * key, T loc) \
  22986. +{ \
  22987. + __u64 el; \
  22988. + \
  22989. + assert("nikita-752", key != NULL); \
  22990. + \
  22991. + el = get_key_el(key, KEY_ ## U ## _INDEX); \
  22992. + /* clear field bits in the key */ \
  22993. + el &= ~KEY_ ## U ## _MASK; \
  22994. + /* actually it should be \
  22995. + \
  22996. + el |= ( loc << KEY_ ## U ## _SHIFT ) & KEY_ ## U ## _MASK; \
  22997. + \
  22998. + but we trust user to never pass values that wouldn't fit \
  22999. + into field. Clearing extra bits is one operation, but this \
  23000. + function is time-critical. \
  23001. + But check this in assertion. */ \
  23002. + assert("nikita-759", ((loc << KEY_ ## U ## _SHIFT) & \
  23003. + ~KEY_ ## U ## _MASK) == 0); \
  23004. + el |= (loc << KEY_ ## U ## _SHIFT); \
  23005. + set_key_el(key, KEY_ ## U ## _INDEX, el); \
  23006. +}
  23007. +
  23008. +typedef __u64 oid_t;
  23009. +
  23010. +/* define get_key_locality(), set_key_locality() */
  23011. +DEFINE_KEY_FIELD(locality, LOCALITY, oid_t);
  23012. +/* define get_key_type(), set_key_type() */
  23013. +DEFINE_KEY_FIELD(type, TYPE, key_minor_locality);
  23014. +/* define get_key_band(), set_key_band() */
  23015. +DEFINE_KEY_FIELD(band, BAND, __u64);
  23016. +/* define get_key_objectid(), set_key_objectid() */
  23017. +DEFINE_KEY_FIELD(objectid, OBJECTID, oid_t);
  23018. +/* define get_key_fulloid(), set_key_fulloid() */
  23019. +DEFINE_KEY_FIELD(fulloid, FULLOID, oid_t);
  23020. +/* define get_key_offset(), set_key_offset() */
  23021. +DEFINE_KEY_FIELD(offset, OFFSET, __u64);
  23022. +#if (REISER4_LARGE_KEY)
  23023. +/* define get_key_ordering(), set_key_ordering() */
  23024. +DEFINE_KEY_FIELD(ordering, ORDERING, __u64);
  23025. +#else
  23026. +static inline __u64 get_key_ordering(const reiser4_key * key)
  23027. +{
  23028. + return 0;
  23029. +}
  23030. +
  23031. +static inline void set_key_ordering(reiser4_key * key, __u64 val)
  23032. +{
  23033. +}
  23034. +#endif
  23035. +
  23036. +/* key comparison result */
  23037. +typedef enum { LESS_THAN = -1, /* if first key is less than second */
  23038. + EQUAL_TO = 0, /* if keys are equal */
  23039. + GREATER_THAN = +1 /* if first key is greater than second */
  23040. +} cmp_t;
  23041. +
  23042. +void reiser4_key_init(reiser4_key * key);
  23043. +
  23044. +/* minimal possible key in the tree. Return pointer to the static storage. */
  23045. +extern const reiser4_key *reiser4_min_key(void);
  23046. +extern const reiser4_key *reiser4_max_key(void);
  23047. +
  23048. +/* helper macro for keycmp() */
  23049. +#define KEY_DIFF(k1, k2, field) \
  23050. +({ \
  23051. + typeof(get_key_ ## field(k1)) f1; \
  23052. + typeof(get_key_ ## field(k2)) f2; \
  23053. + \
  23054. + f1 = get_key_ ## field(k1); \
  23055. + f2 = get_key_ ## field(k2); \
  23056. + \
  23057. + (f1 < f2) ? LESS_THAN : ((f1 == f2) ? EQUAL_TO : GREATER_THAN); \
  23058. +})
  23059. +
  23060. +/* helper macro for keycmp() */
  23061. +#define KEY_DIFF_EL(k1, k2, off) \
  23062. +({ \
  23063. + __u64 e1; \
  23064. + __u64 e2; \
  23065. + \
  23066. + e1 = get_key_el(k1, off); \
  23067. + e2 = get_key_el(k2, off); \
  23068. + \
  23069. + (e1 < e2) ? LESS_THAN : ((e1 == e2) ? EQUAL_TO : GREATER_THAN); \
  23070. +})
  23071. +
  23072. +/* compare `k1' and `k2'. This function is a heart of "key allocation
  23073. + policy". All you need to implement new policy is to add yet another
  23074. + clause here. */
  23075. +static inline cmp_t keycmp(const reiser4_key * k1 /* first key to compare */ ,
  23076. + const reiser4_key * k2/* second key to compare */)
  23077. +{
  23078. + cmp_t result;
  23079. +
  23080. + /*
  23081. + * This function is the heart of reiser4 tree-routines. Key comparison
  23082. + * is among most heavily used operations in the file system.
  23083. + */
  23084. +
  23085. + assert("nikita-439", k1 != NULL);
  23086. + assert("nikita-440", k2 != NULL);
  23087. +
  23088. + /* there is no actual branch here: condition is compile time constant
  23089. + * and constant folding and propagation ensures that only one branch
  23090. + * is actually compiled in. */
  23091. +
  23092. + if (REISER4_PLANA_KEY_ALLOCATION) {
  23093. + /* if physical order of fields in a key is identical
  23094. + with logical order, we can implement key comparison
  23095. + as three 64bit comparisons. */
  23096. + /* logical order of fields in plan-a:
  23097. + locality->type->objectid->offset. */
  23098. + /* compare locality and type at once */
  23099. + result = KEY_DIFF_EL(k1, k2, 0);
  23100. + if (result == EQUAL_TO) {
  23101. + /* compare objectid (and band if it's there) */
  23102. + result = KEY_DIFF_EL(k1, k2, 1);
  23103. + /* compare offset */
  23104. + if (result == EQUAL_TO) {
  23105. + result = KEY_DIFF_EL(k1, k2, 2);
  23106. + if (REISER4_LARGE_KEY && result == EQUAL_TO)
  23107. + result = KEY_DIFF_EL(k1, k2, 3);
  23108. + }
  23109. + }
  23110. + } else if (REISER4_3_5_KEY_ALLOCATION) {
  23111. + result = KEY_DIFF(k1, k2, locality);
  23112. + if (result == EQUAL_TO) {
  23113. + result = KEY_DIFF(k1, k2, objectid);
  23114. + if (result == EQUAL_TO) {
  23115. + result = KEY_DIFF(k1, k2, type);
  23116. + if (result == EQUAL_TO)
  23117. + result = KEY_DIFF(k1, k2, offset);
  23118. + }
  23119. + }
  23120. + } else
  23121. + impossible("nikita-441", "Unknown key allocation scheme!");
  23122. + return result;
  23123. +}
  23124. +
  23125. +/* true if @k1 equals @k2 */
  23126. +static inline int keyeq(const reiser4_key * k1 /* first key to compare */ ,
  23127. + const reiser4_key * k2/* second key to compare */)
  23128. +{
  23129. + assert("nikita-1879", k1 != NULL);
  23130. + assert("nikita-1880", k2 != NULL);
  23131. + return !memcmp(k1, k2, sizeof *k1);
  23132. +}
  23133. +
  23134. +/* true if @k1 is less than @k2 */
  23135. +static inline int keylt(const reiser4_key * k1 /* first key to compare */ ,
  23136. + const reiser4_key * k2/* second key to compare */)
  23137. +{
  23138. + assert("nikita-1952", k1 != NULL);
  23139. + assert("nikita-1953", k2 != NULL);
  23140. + return keycmp(k1, k2) == LESS_THAN;
  23141. +}
  23142. +
  23143. +/* true if @k1 is less than or equal to @k2 */
  23144. +static inline int keyle(const reiser4_key * k1 /* first key to compare */ ,
  23145. + const reiser4_key * k2/* second key to compare */)
  23146. +{
  23147. + assert("nikita-1954", k1 != NULL);
  23148. + assert("nikita-1955", k2 != NULL);
  23149. + return keycmp(k1, k2) != GREATER_THAN;
  23150. +}
  23151. +
  23152. +/* true if @k1 is greater than @k2 */
  23153. +static inline int keygt(const reiser4_key * k1 /* first key to compare */ ,
  23154. + const reiser4_key * k2/* second key to compare */)
  23155. +{
  23156. + assert("nikita-1959", k1 != NULL);
  23157. + assert("nikita-1960", k2 != NULL);
  23158. + return keycmp(k1, k2) == GREATER_THAN;
  23159. +}
  23160. +
  23161. +/* true if @k1 is greater than or equal to @k2 */
  23162. +static inline int keyge(const reiser4_key * k1 /* first key to compare */ ,
  23163. + const reiser4_key * k2/* second key to compare */)
  23164. +{
  23165. + assert("nikita-1956", k1 != NULL);
  23166. + assert("nikita-1957", k2 != NULL); /* October 4: sputnik launched
  23167. + * November 3: Laika */
  23168. + return keycmp(k1, k2) != LESS_THAN;
  23169. +}
  23170. +
  23171. +static inline void prefetchkey(reiser4_key * key)
  23172. +{
  23173. + prefetch(key);
  23174. + prefetch(&key->el[KEY_CACHELINE_END]);
  23175. +}
  23176. +
  23177. +/* (%Lx:%x:%Lx:%Lx:%Lx:%Lx) =
  23178. + 1 + 16 + 1 + 1 + 1 + 1 + 1 + 16 + 1 + 16 + 1 + 16 + 1 */
  23179. +/* size of a buffer suitable to hold human readable key representation */
  23180. +#define KEY_BUF_LEN (80)
  23181. +
  23182. +#if REISER4_DEBUG
  23183. +extern void reiser4_print_key(const char *prefix, const reiser4_key * key);
  23184. +#else
  23185. +#define reiser4_print_key(p, k) noop
  23186. +#endif
  23187. +
  23188. +/* __FS_REISERFS_KEY_H__ */
  23189. +#endif
  23190. +
  23191. +/* Make Linus happy.
  23192. + Local variables:
  23193. + c-indentation-style: "K&R"
  23194. + mode-name: "LC"
  23195. + c-basic-offset: 8
  23196. + tab-width: 8
  23197. + fill-column: 120
  23198. + End:
  23199. +*/
  23200. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/ktxnmgrd.c linux-5.16.14/fs/reiser4/ktxnmgrd.c
  23201. --- linux-5.16.14.orig/fs/reiser4/ktxnmgrd.c 1970-01-01 01:00:00.000000000 +0100
  23202. +++ linux-5.16.14/fs/reiser4/ktxnmgrd.c 2022-03-12 13:26:19.657892741 +0100
  23203. @@ -0,0 +1,215 @@
  23204. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  23205. +/* Transaction manager daemon. */
  23206. +
  23207. +/*
  23208. + * ktxnmgrd is a kernel daemon responsible for committing transactions. It is
  23209. + * needed/important for the following reasons:
  23210. + *
  23211. + * 1. in reiser4 atom is not committed immediately when last transaction
  23212. + * handle closes, unless atom is either too old or too large (see
  23213. + * atom_should_commit()). This is done to avoid committing too frequently.
  23214. + * because:
  23215. + *
  23216. + * 2. sometimes we don't want to commit atom when closing last transaction
  23217. + * handle even if it is old and fat enough. For example, because we are at
  23218. + * this point under directory semaphore, and committing would stall all
  23219. + * accesses to this directory.
  23220. + *
  23221. + * ktxnmgrd binds its time sleeping on condition variable. When is awakes
  23222. + * either due to (tunable) timeout or because it was explicitly woken up by
  23223. + * call to ktxnmgrd_kick(), it scans list of all atoms and commits ones
  23224. + * eligible.
  23225. + *
  23226. + */
  23227. +
  23228. +#include "debug.h"
  23229. +#include "txnmgr.h"
  23230. +#include "tree.h"
  23231. +#include "ktxnmgrd.h"
  23232. +#include "super.h"
  23233. +#include "reiser4.h"
  23234. +
  23235. +#include <linux/sched.h> /* for struct task_struct */
  23236. +#include <linux/wait.h>
  23237. +#include <linux/suspend.h>
  23238. +#include <linux/kernel.h>
  23239. +#include <linux/writeback.h>
  23240. +#include <linux/kthread.h>
  23241. +#include <linux/freezer.h>
  23242. +
  23243. +static int scan_mgr(struct super_block *);
  23244. +
  23245. +/*
  23246. + * change current->comm so that ps, top, and friends will see changed
  23247. + * state. This serves no useful purpose whatsoever, but also costs nothing. May
  23248. + * be it will make lonely system administrator feeling less alone at 3 A.M.
  23249. + */
  23250. +#define set_comm(state) \
  23251. + snprintf(current->comm, sizeof(current->comm), \
  23252. + "%s:%s:%s", __FUNCTION__, (super)->s_id, (state))
  23253. +
  23254. +/**
  23255. + * ktxnmgrd - kernel txnmgr daemon
  23256. + * @arg: pointer to super block
  23257. + *
  23258. + * The background transaction manager daemon, started as a kernel thread during
  23259. + * reiser4 initialization.
  23260. + */
  23261. +static int ktxnmgrd(void *arg)
  23262. +{
  23263. + struct super_block *super;
  23264. + ktxnmgrd_context *ctx;
  23265. + txn_mgr *mgr;
  23266. + int done = 0;
  23267. +
  23268. + super = arg;
  23269. + mgr = &get_super_private(super)->tmgr;
  23270. +
  23271. + /*
  23272. + * do_fork() just copies task_struct into the new thread. ->fs_context
  23273. + * shouldn't be copied of course. This shouldn't be a problem for the
  23274. + * rest of the code though.
  23275. + */
  23276. + current->journal_info = NULL;
  23277. + ctx = mgr->daemon;
  23278. + while (1) {
  23279. + try_to_freeze();
  23280. + set_comm("wait");
  23281. + {
  23282. + DEFINE_WAIT(__wait);
  23283. +
  23284. + prepare_to_wait(&ctx->wait, &__wait,
  23285. + TASK_INTERRUPTIBLE);
  23286. + if (kthread_should_stop())
  23287. + done = 1;
  23288. + else
  23289. + schedule_timeout(ctx->timeout);
  23290. + finish_wait(&ctx->wait, &__wait);
  23291. + }
  23292. + if (done)
  23293. + break;
  23294. + set_comm("run");
  23295. + spin_lock(&ctx->guard);
  23296. + /*
  23297. + * wait timed out or ktxnmgrd was woken up by explicit request
  23298. + * to commit something. Scan list of atoms in txnmgr and look
  23299. + * for too old atoms.
  23300. + */
  23301. + do {
  23302. + ctx->rescan = 0;
  23303. + scan_mgr(super);
  23304. + spin_lock(&ctx->guard);
  23305. + if (ctx->rescan) {
  23306. + /*
  23307. + * the list could be modified while ctx
  23308. + * spinlock was released, we have to repeat
  23309. + * scanning from the beginning
  23310. + */
  23311. + break;
  23312. + }
  23313. + } while (ctx->rescan);
  23314. + spin_unlock(&ctx->guard);
  23315. + }
  23316. + return 0;
  23317. +}
  23318. +
  23319. +#undef set_comm
  23320. +
  23321. +/**
  23322. + * reiser4_init_ktxnmgrd - initialize ktxnmgrd context and start kernel daemon
  23323. + * @super: pointer to super block
  23324. + *
  23325. + * Allocates and initializes ktxnmgrd_context, attaches it to transaction
  23326. + * manager. Starts kernel txnmgr daemon. This is called on mount.
  23327. + */
  23328. +int reiser4_init_ktxnmgrd(struct super_block *super)
  23329. +{
  23330. + txn_mgr *mgr;
  23331. + ktxnmgrd_context *ctx;
  23332. +
  23333. + mgr = &get_super_private(super)->tmgr;
  23334. +
  23335. + assert("zam-1014", mgr->daemon == NULL);
  23336. +
  23337. + ctx = kzalloc(sizeof(ktxnmgrd_context), reiser4_ctx_gfp_mask_get());
  23338. + if (!ctx)
  23339. + return RETERR(-ENOMEM);
  23340. +
  23341. + assert("nikita-2442", ctx != NULL);
  23342. +
  23343. + init_waitqueue_head(&ctx->wait);
  23344. +
  23345. + /*kcond_init(&ctx->startup);*/
  23346. + spin_lock_init(&ctx->guard);
  23347. + ctx->timeout = REISER4_TXNMGR_TIMEOUT;
  23348. + ctx->rescan = 1;
  23349. + mgr->daemon = ctx;
  23350. +
  23351. + ctx->tsk = kthread_run(ktxnmgrd, super, "ktxnmgrd");
  23352. + if (IS_ERR(ctx->tsk)) {
  23353. + int ret = PTR_ERR(ctx->tsk);
  23354. + mgr->daemon = NULL;
  23355. + kfree(ctx);
  23356. + return RETERR(ret);
  23357. + }
  23358. + return 0;
  23359. +}
  23360. +
  23361. +void ktxnmgrd_kick(txn_mgr *mgr)
  23362. +{
  23363. + assert("nikita-3234", mgr != NULL);
  23364. + assert("nikita-3235", mgr->daemon != NULL);
  23365. + wake_up(&mgr->daemon->wait);
  23366. +}
  23367. +
  23368. +int is_current_ktxnmgrd(void)
  23369. +{
  23370. + return (get_current_super_private()->tmgr.daemon->tsk == current);
  23371. +}
  23372. +
  23373. +/**
  23374. + * scan_mgr - commit atoms which are to be committed
  23375. + * @super: super block to commit atoms of
  23376. + *
  23377. + * Commits old atoms.
  23378. + */
  23379. +static int scan_mgr(struct super_block *super)
  23380. +{
  23381. + int ret;
  23382. + reiser4_context ctx;
  23383. +
  23384. + init_stack_context(&ctx, super);
  23385. +
  23386. + ret = commit_some_atoms(&get_super_private(super)->tmgr);
  23387. +
  23388. + reiser4_exit_context(&ctx);
  23389. + return ret;
  23390. +}
  23391. +
  23392. +/**
  23393. + * reiser4_done_ktxnmgrd - stop kernel thread and frees ktxnmgrd context
  23394. + * @mgr:
  23395. + *
  23396. + * This is called on umount. Stops ktxnmgrd and free t
  23397. + */
  23398. +void reiser4_done_ktxnmgrd(struct super_block *super)
  23399. +{
  23400. + txn_mgr *mgr;
  23401. +
  23402. + mgr = &get_super_private(super)->tmgr;
  23403. + assert("zam-1012", mgr->daemon != NULL);
  23404. +
  23405. + kthread_stop(mgr->daemon->tsk);
  23406. + kfree(mgr->daemon);
  23407. + mgr->daemon = NULL;
  23408. +}
  23409. +
  23410. +/*
  23411. + * Local variables:
  23412. + * c-indentation-style: "K&R"
  23413. + * mode-name: "LC"
  23414. + * c-basic-offset: 8
  23415. + * tab-width: 8
  23416. + * fill-column: 120
  23417. + * End:
  23418. + */
  23419. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/ktxnmgrd.h linux-5.16.14/fs/reiser4/ktxnmgrd.h
  23420. --- linux-5.16.14.orig/fs/reiser4/ktxnmgrd.h 1970-01-01 01:00:00.000000000 +0100
  23421. +++ linux-5.16.14/fs/reiser4/ktxnmgrd.h 2022-03-12 13:26:19.657892741 +0100
  23422. @@ -0,0 +1,52 @@
  23423. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  23424. + * reiser4/README */
  23425. +
  23426. +/* Transaction manager daemon. See ktxnmgrd.c for comments. */
  23427. +
  23428. +#ifndef __KTXNMGRD_H__
  23429. +#define __KTXNMGRD_H__
  23430. +
  23431. +#include "txnmgr.h"
  23432. +
  23433. +#include <linux/fs.h>
  23434. +#include <linux/wait.h>
  23435. +#include <linux/completion.h>
  23436. +#include <linux/spinlock.h>
  23437. +#include <asm/atomic.h>
  23438. +#include <linux/sched.h> /* for struct task_struct */
  23439. +
  23440. +/* in this structure all data necessary to start up, shut down and communicate
  23441. + * with ktxnmgrd are kept. */
  23442. +struct ktxnmgrd_context {
  23443. + /* wait queue head on which ktxnmgrd sleeps */
  23444. + wait_queue_head_t wait;
  23445. + /* spin lock protecting all fields of this structure */
  23446. + spinlock_t guard;
  23447. + /* timeout of sleeping on ->wait */
  23448. + signed long timeout;
  23449. + /* kernel thread running ktxnmgrd */
  23450. + struct task_struct *tsk;
  23451. + /* list of all file systems served by this ktxnmgrd */
  23452. + struct list_head queue;
  23453. + /* should ktxnmgrd repeat scanning of atoms? */
  23454. + unsigned int rescan:1;
  23455. +};
  23456. +
  23457. +extern int reiser4_init_ktxnmgrd(struct super_block *);
  23458. +extern void reiser4_done_ktxnmgrd(struct super_block *);
  23459. +
  23460. +extern void ktxnmgrd_kick(txn_mgr * mgr);
  23461. +extern int is_current_ktxnmgrd(void);
  23462. +
  23463. +/* __KTXNMGRD_H__ */
  23464. +#endif
  23465. +
  23466. +/* Make Linus happy.
  23467. + Local variables:
  23468. + c-indentation-style: "K&R"
  23469. + mode-name: "LC"
  23470. + c-basic-offset: 8
  23471. + tab-width: 8
  23472. + fill-column: 120
  23473. + End:
  23474. +*/
  23475. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/lock.c linux-5.16.14/fs/reiser4/lock.c
  23476. --- linux-5.16.14.orig/fs/reiser4/lock.c 1970-01-01 01:00:00.000000000 +0100
  23477. +++ linux-5.16.14/fs/reiser4/lock.c 2022-03-12 13:26:19.660892748 +0100
  23478. @@ -0,0 +1,1237 @@
  23479. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  23480. + * reiser4/README */
  23481. +
  23482. +/* Traditional deadlock avoidance is achieved by acquiring all locks in a single
  23483. + order. V4 balances the tree from the bottom up, and searches the tree from
  23484. + the top down, and that is really the way we want it, so tradition won't work
  23485. + for us.
  23486. +
  23487. + Instead we have two lock orderings, a high priority lock ordering, and a low
  23488. + priority lock ordering. Each node in the tree has a lock in its znode.
  23489. +
  23490. + Suppose we have a set of processes which lock (R/W) tree nodes. Each process
  23491. + has a set (maybe empty) of already locked nodes ("process locked set"). Each
  23492. + process may have a pending lock request to a node locked by another process.
  23493. + Note: we lock and unlock, but do not transfer locks: it is possible
  23494. + transferring locks instead would save some bus locking....
  23495. +
  23496. + Deadlock occurs when we have a loop constructed from process locked sets and
  23497. + lock request vectors.
  23498. +
  23499. + NOTE: The reiser4 "tree" is a tree on disk, but its cached representation in
  23500. + memory is extended with "znodes" with which we connect nodes with their left
  23501. + and right neighbors using sibling pointers stored in the znodes. When we
  23502. + perform balancing operations we often go from left to right and from right to
  23503. + left.
  23504. +
  23505. + +-P1-+ +-P3-+
  23506. + |+--+| V1 |+--+|
  23507. + ||N1|| -------> ||N3||
  23508. + |+--+| |+--+|
  23509. + +----+ +----+
  23510. + ^ |
  23511. + |V2 |V3
  23512. + | v
  23513. + +---------P2---------+
  23514. + |+--+ +--+|
  23515. + ||N2| -------- |N4||
  23516. + |+--+ +--+|
  23517. + +--------------------+
  23518. +
  23519. + We solve this by ensuring that only low priority processes lock in top to
  23520. + bottom order and from right to left, and high priority processes lock from
  23521. + bottom to top and left to right.
  23522. +
  23523. + ZAM-FIXME-HANS: order not just node locks in this way, order atom locks, and
  23524. + kill those damn busy loops.
  23525. + ANSWER(ZAM): atom locks (which are introduced by ASTAGE_CAPTURE_WAIT atom
  23526. + stage) cannot be ordered that way. There are no rules what nodes can belong
  23527. + to the atom and what nodes cannot. We cannot define what is right or left
  23528. + direction, what is top or bottom. We can take immediate parent or side
  23529. + neighbor of one node, but nobody guarantees that, say, left neighbor node is
  23530. + not a far right neighbor for other nodes from the same atom. It breaks
  23531. + deadlock avoidance rules and hi-low priority locking cannot be applied for
  23532. + atom locks.
  23533. +
  23534. + How does it help to avoid deadlocks ?
  23535. +
  23536. + Suppose we have a deadlock with n processes. Processes from one priority
  23537. + class never deadlock because they take locks in one consistent
  23538. + order.
  23539. +
  23540. + So, any possible deadlock loop must have low priority as well as high
  23541. + priority processes. There are no other lock priority levels except low and
  23542. + high. We know that any deadlock loop contains at least one node locked by a
  23543. + low priority process and requested by a high priority process. If this
  23544. + situation is caught and resolved it is sufficient to avoid deadlocks.
  23545. +
  23546. + V4 DEADLOCK PREVENTION ALGORITHM IMPLEMENTATION.
  23547. +
  23548. + The deadlock prevention algorithm is based on comparing
  23549. + priorities of node owners (processes which keep znode locked) and
  23550. + requesters (processes which want to acquire a lock on znode). We
  23551. + implement a scheme where low-priority owners yield locks to
  23552. + high-priority requesters. We created a signal passing system that
  23553. + is used to ask low-priority processes to yield one or more locked
  23554. + znodes.
  23555. +
  23556. + The condition when a znode needs to change its owners is described by the
  23557. + following formula:
  23558. +
  23559. + #############################################
  23560. + # #
  23561. + # (number of high-priority requesters) > 0 #
  23562. + # AND #
  23563. + # (numbers of high-priority owners) == 0 #
  23564. + # #
  23565. + #############################################
  23566. +
  23567. + Note that a low-priority process delays node releasing if another
  23568. + high-priority process owns this node. So, slightly more strictly speaking,
  23569. + to have a deadlock capable cycle you must have a loop in which a high
  23570. + priority process is waiting on a low priority process to yield a node, which
  23571. + is slightly different from saying a high priority process is waiting on a
  23572. + node owned by a low priority process.
  23573. +
  23574. + It is enough to avoid deadlocks if we prevent any low-priority process from
  23575. + falling asleep if its locked set contains a node which satisfies the
  23576. + deadlock condition.
  23577. +
  23578. + That condition is implicitly or explicitly checked in all places where new
  23579. + high-priority requests may be added or removed from node request queue or
  23580. + high-priority process takes or releases a lock on node. The main
  23581. + goal of these checks is to never lose the moment when node becomes "has
  23582. + wrong owners" and send "must-yield-this-lock" signals to its low-pri owners
  23583. + at that time.
  23584. +
  23585. + The information about received signals is stored in the per-process
  23586. + structure (lock stack) and analyzed before a low-priority process goes to
  23587. + sleep but after a "fast" attempt to lock a node fails. Any signal wakes
  23588. + sleeping process up and forces him to re-check lock status and received
  23589. + signal info. If "must-yield-this-lock" signals were received the locking
  23590. + primitive (longterm_lock_znode()) fails with -E_DEADLOCK error code.
  23591. +
  23592. + V4 LOCKING DRAWBACKS
  23593. +
  23594. + If we have already balanced on one level, and we are propagating our changes
  23595. + upward to a higher level, it could be very messy to surrender all locks on
  23596. + the lower level because we put so much computational work into it, and
  23597. + reverting them to their state before they were locked might be very complex.
  23598. + We also don't want to acquire all locks before performing balancing because
  23599. + that would either be almost as much work as the balancing, or it would be
  23600. + too conservative and lock too much. We want balancing to be done only at
  23601. + high priority. Yet, we might want to go to the left one node and use some
  23602. + of its empty space... So we make one attempt at getting the node to the left
  23603. + using try_lock, and if it fails we do without it, because we didn't really
  23604. + need it, it was only a nice to have.
  23605. +
  23606. + LOCK STRUCTURES DESCRIPTION
  23607. +
  23608. + The following data structures are used in the reiser4 locking
  23609. + implementation:
  23610. +
  23611. + All fields related to long-term locking are stored in znode->lock.
  23612. +
  23613. + The lock stack is a per thread object. It owns all znodes locked by the
  23614. + thread. One znode may be locked by several threads in case of read lock or
  23615. + one znode may be write locked by one thread several times. The special link
  23616. + objects (lock handles) support n<->m relation between znodes and lock
  23617. + owners.
  23618. +
  23619. + <Thread 1> <Thread 2>
  23620. +
  23621. + +---------+ +---------+
  23622. + | LS1 | | LS2 |
  23623. + +---------+ +---------+
  23624. + ^ ^
  23625. + |---------------+ +----------+
  23626. + v v v v
  23627. + +---------+ +---------+ +---------+ +---------+
  23628. + | LH1 | | LH2 | | LH3 | | LH4 |
  23629. + +---------+ +---------+ +---------+ +---------+
  23630. + ^ ^ ^ ^
  23631. + | +------------+ |
  23632. + v v v
  23633. + +---------+ +---------+ +---------+
  23634. + | Z1 | | Z2 | | Z3 |
  23635. + +---------+ +---------+ +---------+
  23636. +
  23637. + Thread 1 locked znodes Z1 and Z2, thread 2 locked znodes Z2 and Z3. The
  23638. + picture above shows that lock stack LS1 has a list of 2 lock handles LH1 and
  23639. + LH2, lock stack LS2 has a list with lock handles LH3 and LH4 on it. Znode
  23640. + Z1 is locked by only one thread, znode has only one lock handle LH1 on its
  23641. + list, similar situation is for Z3 which is locked by the thread 2 only. Z2
  23642. + is locked (for read) twice by different threads and two lock handles are on
  23643. + its list. Each lock handle represents a single relation of a locking of a
  23644. + znode by a thread. Locking of a znode is an establishing of a locking
  23645. + relation between the lock stack and the znode by adding of a new lock handle
  23646. + to a list of lock handles, the lock stack. The lock stack links all lock
  23647. + handles for all znodes locked by the lock stack. The znode list groups all
  23648. + lock handles for all locks stacks which locked the znode.
  23649. +
  23650. + Yet another relation may exist between znode and lock owners. If lock
  23651. + procedure cannot immediately take lock on an object it adds the lock owner
  23652. + on special `requestors' list belongs to znode. That list represents a
  23653. + queue of pending lock requests. Because one lock owner may request only
  23654. + only one lock object at a time, it is a 1->n relation between lock objects
  23655. + and a lock owner implemented as it is described above. Full information
  23656. + (priority, pointers to lock and link objects) about each lock request is
  23657. + stored in lock owner structure in `request' field.
  23658. +
  23659. + SHORT_TERM LOCKING
  23660. +
  23661. + This is a list of primitive operations over lock stacks / lock handles /
  23662. + znodes and locking descriptions for them.
  23663. +
  23664. + 1. locking / unlocking which is done by two list insertion/deletion, one
  23665. + to/from znode's list of lock handles, another one is to/from lock stack's
  23666. + list of lock handles. The first insertion is protected by
  23667. + znode->lock.guard spinlock. The list owned by the lock stack can be
  23668. + modified only by thread who owns the lock stack and nobody else can
  23669. + modify/read it. There is nothing to be protected by a spinlock or
  23670. + something else.
  23671. +
  23672. + 2. adding/removing a lock request to/from znode requesters list. The rule is
  23673. + that znode->lock.guard spinlock should be taken for this.
  23674. +
  23675. + 3. we can traverse list of lock handles and use references to lock stacks who
  23676. + locked given znode if znode->lock.guard spinlock is taken.
  23677. +
  23678. + 4. If a lock stack is associated with a znode as a lock requestor or lock
  23679. + owner its existence is guaranteed by znode->lock.guard spinlock. Some its
  23680. + (lock stack's) fields should be protected from being accessed in parallel
  23681. + by two or more threads. Please look at lock_stack structure definition
  23682. + for the info how those fields are protected. */
  23683. +
  23684. +/* Znode lock and capturing intertwining. */
  23685. +/* In current implementation we capture formatted nodes before locking
  23686. + them. Take a look on longterm lock znode, reiser4_try_capture() request
  23687. + precedes locking requests. The longterm_lock_znode function unconditionally
  23688. + captures znode before even checking of locking conditions.
  23689. +
  23690. + Another variant is to capture znode after locking it. It was not tested, but
  23691. + at least one deadlock condition is supposed to be there. One thread has
  23692. + locked a znode (Node-1) and calls reiser4_try_capture() for it.
  23693. + reiser4_try_capture() sleeps because znode's atom has CAPTURE_WAIT state.
  23694. + Second thread is a flushing thread, its current atom is the atom Node-1
  23695. + belongs to. Second thread wants to lock Node-1 and sleeps because Node-1
  23696. + is locked by the first thread. The described situation is a deadlock. */
  23697. +
  23698. +#include "debug.h"
  23699. +#include "txnmgr.h"
  23700. +#include "znode.h"
  23701. +#include "jnode.h"
  23702. +#include "tree.h"
  23703. +#include "plugin/node/node.h"
  23704. +#include "super.h"
  23705. +
  23706. +#include <linux/spinlock.h>
  23707. +
  23708. +#if REISER4_DEBUG
  23709. +static int request_is_deadlock_safe(znode * , znode_lock_mode,
  23710. + znode_lock_request);
  23711. +#endif
  23712. +
  23713. +/* Returns a lock owner associated with current thread */
  23714. +lock_stack *get_current_lock_stack(void)
  23715. +{
  23716. + return &get_current_context()->stack;
  23717. +}
  23718. +
  23719. +/* Wakes up all low priority owners informing them about possible deadlock */
  23720. +static void wake_up_all_lopri_owners(znode * node)
  23721. +{
  23722. + lock_handle *handle;
  23723. +
  23724. + assert_spin_locked(&(node->lock.guard));
  23725. + list_for_each_entry(handle, &node->lock.owners, owners_link) {
  23726. + assert("nikita-1832", handle->node == node);
  23727. + /* count this signal in owner->nr_signaled */
  23728. + if (!handle->signaled) {
  23729. + handle->signaled = 1;
  23730. + atomic_inc(&handle->owner->nr_signaled);
  23731. + /* Wake up a single process */
  23732. + reiser4_wake_up(handle->owner);
  23733. + }
  23734. + }
  23735. +}
  23736. +
  23737. +/* Adds a lock to a lock owner, which means creating a link to the lock and
  23738. + putting the link into the two lists all links are on (the doubly linked list
  23739. + that forms the lock_stack, and the doubly linked list of links attached
  23740. + to a lock.
  23741. +*/
  23742. +static inline void
  23743. +link_object(lock_handle * handle, lock_stack * owner, znode * node)
  23744. +{
  23745. + assert("jmacd-810", handle->owner == NULL);
  23746. + assert_spin_locked(&(node->lock.guard));
  23747. +
  23748. + handle->owner = owner;
  23749. + handle->node = node;
  23750. +
  23751. + assert("reiser4-4",
  23752. + ergo(list_empty_careful(&owner->locks), owner->nr_locks == 0));
  23753. +
  23754. + /* add lock handle to the end of lock_stack's list of locks */
  23755. + list_add_tail(&handle->locks_link, &owner->locks);
  23756. + ON_DEBUG(owner->nr_locks++);
  23757. + reiser4_ctx_gfp_mask_set();
  23758. +
  23759. + /* add lock handle to the head of znode's list of owners */
  23760. + list_add(&handle->owners_link, &node->lock.owners);
  23761. + handle->signaled = 0;
  23762. +}
  23763. +
  23764. +/* Breaks a relation between a lock and its owner */
  23765. +static inline void unlink_object(lock_handle * handle)
  23766. +{
  23767. + assert("zam-354", handle->owner != NULL);
  23768. + assert("nikita-1608", handle->node != NULL);
  23769. + assert_spin_locked(&(handle->node->lock.guard));
  23770. + assert("nikita-1829", handle->owner == get_current_lock_stack());
  23771. + assert("reiser4-5", handle->owner->nr_locks > 0);
  23772. +
  23773. + /* remove lock handle from lock_stack's list of locks */
  23774. + list_del(&handle->locks_link);
  23775. + ON_DEBUG(handle->owner->nr_locks--);
  23776. + reiser4_ctx_gfp_mask_set();
  23777. + assert("reiser4-6",
  23778. + ergo(list_empty_careful(&handle->owner->locks),
  23779. + handle->owner->nr_locks == 0));
  23780. + /* remove lock handle from znode's list of owners */
  23781. + list_del(&handle->owners_link);
  23782. + /* indicates that lock handle is free now */
  23783. + handle->node = NULL;
  23784. +#if REISER4_DEBUG
  23785. + INIT_LIST_HEAD(&handle->locks_link);
  23786. + INIT_LIST_HEAD(&handle->owners_link);
  23787. + handle->owner = NULL;
  23788. +#endif
  23789. +}
  23790. +
  23791. +/* Actually locks an object knowing that we are able to do this */
  23792. +static void lock_object(lock_stack * owner)
  23793. +{
  23794. + struct lock_request *request;
  23795. + znode *node;
  23796. +
  23797. + request = &owner->request;
  23798. + node = request->node;
  23799. + assert_spin_locked(&(node->lock.guard));
  23800. + if (request->mode == ZNODE_READ_LOCK) {
  23801. + node->lock.nr_readers++;
  23802. + } else {
  23803. + /* check that we don't switched from read to write lock */
  23804. + assert("nikita-1840", node->lock.nr_readers <= 0);
  23805. + /* We allow recursive locking; a node can be locked several
  23806. + times for write by same process */
  23807. + node->lock.nr_readers--;
  23808. + }
  23809. +
  23810. + link_object(request->handle, owner, node);
  23811. +
  23812. + if (owner->curpri)
  23813. + node->lock.nr_hipri_owners++;
  23814. +}
  23815. +
  23816. +/* Check for recursive write locking */
  23817. +static int recursive(lock_stack * owner)
  23818. +{
  23819. + int ret;
  23820. + znode *node;
  23821. + lock_handle *lh;
  23822. +
  23823. + node = owner->request.node;
  23824. +
  23825. + /* Owners list is not empty for a locked node */
  23826. + assert("zam-314", !list_empty_careful(&node->lock.owners));
  23827. + assert("nikita-1841", owner == get_current_lock_stack());
  23828. + assert_spin_locked(&(node->lock.guard));
  23829. +
  23830. + lh = list_entry(node->lock.owners.next, lock_handle, owners_link);
  23831. + ret = (lh->owner == owner);
  23832. +
  23833. + /* Recursive read locking should be done usual way */
  23834. + assert("zam-315", !ret || owner->request.mode == ZNODE_WRITE_LOCK);
  23835. + /* mixing of read/write locks is not allowed */
  23836. + assert("zam-341", !ret || znode_is_wlocked(node));
  23837. +
  23838. + return ret;
  23839. +}
  23840. +
  23841. +#if REISER4_DEBUG
  23842. +/* Returns true if the lock is held by the calling thread. */
  23843. +int znode_is_any_locked(const znode * node)
  23844. +{
  23845. + lock_handle *handle;
  23846. + lock_stack *stack;
  23847. + int ret;
  23848. +
  23849. + if (!znode_is_locked(node))
  23850. + return 0;
  23851. +
  23852. + stack = get_current_lock_stack();
  23853. +
  23854. + spin_lock_stack(stack);
  23855. +
  23856. + ret = 0;
  23857. +
  23858. + list_for_each_entry(handle, &stack->locks, locks_link) {
  23859. + if (handle->node == node) {
  23860. + ret = 1;
  23861. + break;
  23862. + }
  23863. + }
  23864. +
  23865. + spin_unlock_stack(stack);
  23866. +
  23867. + return ret;
  23868. +}
  23869. +
  23870. +#endif
  23871. +
  23872. +/* Returns true if a write lock is held by the calling thread. */
  23873. +int znode_is_write_locked(const znode * node)
  23874. +{
  23875. + lock_stack *stack;
  23876. + lock_handle *handle;
  23877. +
  23878. + assert("jmacd-8765", node != NULL);
  23879. +
  23880. + if (!znode_is_wlocked(node))
  23881. + return 0;
  23882. +
  23883. + stack = get_current_lock_stack();
  23884. +
  23885. + /*
  23886. + * When znode is write locked, all owner handles point to the same lock
  23887. + * stack. Get pointer to lock stack from the first lock handle from
  23888. + * znode's owner list
  23889. + */
  23890. + handle = list_entry(node->lock.owners.next, lock_handle, owners_link);
  23891. +
  23892. + return (handle->owner == stack);
  23893. +}
  23894. +
  23895. +/* This "deadlock" condition is the essential part of reiser4 locking
  23896. + implementation. This condition is checked explicitly by calling
  23897. + check_deadlock_condition() or implicitly in all places where znode lock
  23898. + state (set of owners and request queue) is changed. Locking code is
  23899. + designed to use this condition to trigger procedure of passing object from
  23900. + low priority owner(s) to high priority one(s).
  23901. +
  23902. + The procedure results in passing an event (setting lock_handle->signaled
  23903. + flag) and counting this event in nr_signaled field of owner's lock stack
  23904. + object and wakeup owner's process.
  23905. +*/
  23906. +static inline int check_deadlock_condition(znode * node)
  23907. +{
  23908. + assert_spin_locked(&(node->lock.guard));
  23909. + return node->lock.nr_hipri_requests > 0
  23910. + && node->lock.nr_hipri_owners == 0;
  23911. +}
  23912. +
  23913. +static int check_livelock_condition(znode * node, znode_lock_mode mode)
  23914. +{
  23915. + zlock * lock = &node->lock;
  23916. +
  23917. + return mode == ZNODE_READ_LOCK &&
  23918. + lock->nr_readers >= 0 && lock->nr_hipri_write_requests > 0;
  23919. +}
  23920. +
  23921. +/* checks lock/request compatibility */
  23922. +static int can_lock_object(lock_stack * owner)
  23923. +{
  23924. + znode *node = owner->request.node;
  23925. +
  23926. + assert_spin_locked(&(node->lock.guard));
  23927. +
  23928. + /* See if the node is disconnected. */
  23929. + if (unlikely(ZF_ISSET(node, JNODE_IS_DYING)))
  23930. + return RETERR(-EINVAL);
  23931. +
  23932. + /* Do not ever try to take a lock if we are going in low priority
  23933. + direction and a node have a high priority request without high
  23934. + priority owners. */
  23935. + if (unlikely(!owner->curpri && check_deadlock_condition(node)))
  23936. + return RETERR(-E_REPEAT);
  23937. + if (unlikely(owner->curpri &&
  23938. + check_livelock_condition(node, owner->request.mode)))
  23939. + return RETERR(-E_REPEAT);
  23940. + if (unlikely(!is_lock_compatible(node, owner->request.mode)))
  23941. + return RETERR(-E_REPEAT);
  23942. + return 0;
  23943. +}
  23944. +
  23945. +/* Setting of a high priority to the process. It clears "signaled" flags
  23946. + because znode locked by high-priority process can't satisfy our "deadlock
  23947. + condition". */
  23948. +static void set_high_priority(lock_stack * owner)
  23949. +{
  23950. + assert("nikita-1846", owner == get_current_lock_stack());
  23951. + /* Do nothing if current priority is already high */
  23952. + if (!owner->curpri) {
  23953. + /* We don't need locking for owner->locks list, because, this
  23954. + * function is only called with the lock stack of the current
  23955. + * thread, and no other thread can play with owner->locks list
  23956. + * and/or change ->node pointers of lock handles in this list.
  23957. + *
  23958. + * (Interrupts also are not involved.)
  23959. + */
  23960. + lock_handle *item = list_entry(owner->locks.next, lock_handle,
  23961. + locks_link);
  23962. + while (&owner->locks != &item->locks_link) {
  23963. + znode *node = item->node;
  23964. +
  23965. + spin_lock_zlock(&node->lock);
  23966. +
  23967. + node->lock.nr_hipri_owners++;
  23968. +
  23969. + /* we can safely set signaled to zero, because
  23970. + previous statement (nr_hipri_owners ++) guarantees
  23971. + that signaled will be never set again. */
  23972. + item->signaled = 0;
  23973. + spin_unlock_zlock(&node->lock);
  23974. +
  23975. + item = list_entry(item->locks_link.next, lock_handle,
  23976. + locks_link);
  23977. + }
  23978. + owner->curpri = 1;
  23979. + atomic_set(&owner->nr_signaled, 0);
  23980. + }
  23981. +}
  23982. +
  23983. +/* Sets a low priority to the process. */
  23984. +static void set_low_priority(lock_stack * owner)
  23985. +{
  23986. + assert("nikita-3075", owner == get_current_lock_stack());
  23987. + /* Do nothing if current priority is already low */
  23988. + if (owner->curpri) {
  23989. + /* scan all locks (lock handles) held by @owner, which is
  23990. + actually current thread, and check whether we are reaching
  23991. + deadlock possibility anywhere.
  23992. + */
  23993. + lock_handle *handle = list_entry(owner->locks.next, lock_handle,
  23994. + locks_link);
  23995. + while (&owner->locks != &handle->locks_link) {
  23996. + znode *node = handle->node;
  23997. + spin_lock_zlock(&node->lock);
  23998. + /* this thread just was hipri owner of @node, so
  23999. + nr_hipri_owners has to be greater than zero. */
  24000. + assert("nikita-1835", node->lock.nr_hipri_owners > 0);
  24001. + node->lock.nr_hipri_owners--;
  24002. + /* If we have deadlock condition, adjust a nr_signaled
  24003. + field. It is enough to set "signaled" flag only for
  24004. + current process, other low-pri owners will be
  24005. + signaled and waken up after current process unlocks
  24006. + this object and any high-priority requestor takes
  24007. + control. */
  24008. + if (check_deadlock_condition(node)
  24009. + && !handle->signaled) {
  24010. + handle->signaled = 1;
  24011. + atomic_inc(&owner->nr_signaled);
  24012. + }
  24013. + spin_unlock_zlock(&node->lock);
  24014. + handle = list_entry(handle->locks_link.next,
  24015. + lock_handle, locks_link);
  24016. + }
  24017. + owner->curpri = 0;
  24018. + }
  24019. +}
  24020. +
  24021. +static void remove_lock_request(lock_stack * requestor)
  24022. +{
  24023. + zlock * lock = &requestor->request.node->lock;
  24024. +
  24025. + if (requestor->curpri) {
  24026. + assert("nikita-1838", lock->nr_hipri_requests > 0);
  24027. + lock->nr_hipri_requests--;
  24028. + if (requestor->request.mode == ZNODE_WRITE_LOCK)
  24029. + lock->nr_hipri_write_requests--;
  24030. + }
  24031. + list_del(&requestor->requestors_link);
  24032. +}
  24033. +
  24034. +static void invalidate_all_lock_requests(znode * node)
  24035. +{
  24036. + lock_stack *requestor, *tmp;
  24037. +
  24038. + assert_spin_locked(&(node->lock.guard));
  24039. +
  24040. + list_for_each_entry_safe(requestor, tmp, &node->lock.requestors,
  24041. + requestors_link) {
  24042. + remove_lock_request(requestor);
  24043. + requestor->request.ret_code = -EINVAL;
  24044. + reiser4_wake_up(requestor);
  24045. + requestor->request.mode = ZNODE_NO_LOCK;
  24046. + }
  24047. +}
  24048. +
  24049. +static void dispatch_lock_requests(znode * node)
  24050. +{
  24051. + lock_stack *requestor, *tmp;
  24052. +
  24053. + assert_spin_locked(&(node->lock.guard));
  24054. +
  24055. + list_for_each_entry_safe(requestor, tmp, &node->lock.requestors,
  24056. + requestors_link) {
  24057. + if (znode_is_write_locked(node))
  24058. + break;
  24059. + if (!can_lock_object(requestor)) {
  24060. + lock_object(requestor);
  24061. + remove_lock_request(requestor);
  24062. + requestor->request.ret_code = 0;
  24063. + reiser4_wake_up(requestor);
  24064. + requestor->request.mode = ZNODE_NO_LOCK;
  24065. + }
  24066. + }
  24067. +}
  24068. +
  24069. +/* release long-term lock, acquired by longterm_lock_znode() */
  24070. +void longterm_unlock_znode(lock_handle * handle)
  24071. +{
  24072. + znode *node = handle->node;
  24073. + lock_stack *oldowner = handle->owner;
  24074. + int hipri;
  24075. + int readers;
  24076. + int rdelta;
  24077. + int youdie;
  24078. +
  24079. + /*
  24080. + * this is time-critical and highly optimized code. Modify carefully.
  24081. + */
  24082. +
  24083. + assert("jmacd-1021", handle != NULL);
  24084. + assert("jmacd-1022", handle->owner != NULL);
  24085. + assert("nikita-1392", LOCK_CNT_GTZ(long_term_locked_znode));
  24086. +
  24087. + assert("zam-130", oldowner == get_current_lock_stack());
  24088. +
  24089. + LOCK_CNT_DEC(long_term_locked_znode);
  24090. +
  24091. + /*
  24092. + * to minimize amount of operations performed under lock, pre-compute
  24093. + * all variables used within critical section. This makes code
  24094. + * obscure.
  24095. + */
  24096. +
  24097. + /* was this lock of hi or lo priority */
  24098. + hipri = oldowner->curpri ? 1 : 0;
  24099. + /* number of readers */
  24100. + readers = node->lock.nr_readers;
  24101. + /* +1 if write lock, -1 if read lock */
  24102. + rdelta = (readers > 0) ? -1 : +1;
  24103. + /* true if node is to die and write lock is released */
  24104. + youdie = ZF_ISSET(node, JNODE_HEARD_BANSHEE) && (readers < 0);
  24105. +
  24106. + spin_lock_zlock(&node->lock);
  24107. +
  24108. + assert("zam-101", znode_is_locked(node));
  24109. +
  24110. + /* Adjust a number of high priority owners of this lock */
  24111. + assert("nikita-1836", node->lock.nr_hipri_owners >= hipri);
  24112. + node->lock.nr_hipri_owners -= hipri;
  24113. +
  24114. + /* Handle znode deallocation on last write-lock release. */
  24115. + if (znode_is_wlocked_once(node)) {
  24116. + if (youdie) {
  24117. + forget_znode(handle);
  24118. + assert("nikita-2191", znode_invariant(node));
  24119. + zput(node);
  24120. + return;
  24121. + }
  24122. + }
  24123. +
  24124. + if (handle->signaled)
  24125. + atomic_dec(&oldowner->nr_signaled);
  24126. +
  24127. + /* Unlocking means owner<->object link deletion */
  24128. + unlink_object(handle);
  24129. +
  24130. + /* This is enough to be sure whether an object is completely
  24131. + unlocked. */
  24132. + node->lock.nr_readers += rdelta;
  24133. +
  24134. + /* If the node is locked it must have an owners list. Likewise, if
  24135. + the node is unlocked it must have an empty owners list. */
  24136. + assert("zam-319", equi(znode_is_locked(node),
  24137. + !list_empty_careful(&node->lock.owners)));
  24138. +
  24139. +#if REISER4_DEBUG
  24140. + if (!znode_is_locked(node))
  24141. + ++node->times_locked;
  24142. +#endif
  24143. +
  24144. + /* If there are pending lock requests we wake up a requestor */
  24145. + if (!znode_is_wlocked(node))
  24146. + dispatch_lock_requests(node);
  24147. + if (check_deadlock_condition(node))
  24148. + wake_up_all_lopri_owners(node);
  24149. + spin_unlock_zlock(&node->lock);
  24150. +
  24151. + /* minus one reference from handle->node */
  24152. + assert("nikita-2190", znode_invariant(node));
  24153. + ON_DEBUG(check_lock_data());
  24154. + ON_DEBUG(check_lock_node_data(node));
  24155. + zput(node);
  24156. +}
  24157. +
  24158. +/* final portion of longterm-lock */
  24159. +static int
  24160. +lock_tail(lock_stack * owner, int ok, znode_lock_mode mode)
  24161. +{
  24162. + znode *node = owner->request.node;
  24163. +
  24164. + assert_spin_locked(&(node->lock.guard));
  24165. +
  24166. + /* If we broke with (ok == 0) it means we can_lock, now do it. */
  24167. + if (ok == 0) {
  24168. + lock_object(owner);
  24169. + owner->request.mode = 0;
  24170. + /* count a reference from lockhandle->node
  24171. +
  24172. + znode was already referenced at the entry to this function,
  24173. + hence taking spin-lock here is not necessary (see comment
  24174. + in the zref()).
  24175. + */
  24176. + zref(node);
  24177. +
  24178. + LOCK_CNT_INC(long_term_locked_znode);
  24179. + }
  24180. + spin_unlock_zlock(&node->lock);
  24181. + ON_DEBUG(check_lock_data());
  24182. + ON_DEBUG(check_lock_node_data(node));
  24183. + return ok;
  24184. +}
  24185. +
  24186. +/*
  24187. + * version of longterm_znode_lock() optimized for the most common case: read
  24188. + * lock without any special flags. This is the kind of lock that any tree
  24189. + * traversal takes on the root node of the tree, which is very frequent.
  24190. + */
  24191. +static int longterm_lock_tryfast(lock_stack * owner)
  24192. +{
  24193. + int result;
  24194. + znode *node;
  24195. + zlock *lock;
  24196. +
  24197. + node = owner->request.node;
  24198. + lock = &node->lock;
  24199. +
  24200. + assert("nikita-3340", reiser4_schedulable());
  24201. + assert("nikita-3341", request_is_deadlock_safe(node,
  24202. + ZNODE_READ_LOCK,
  24203. + ZNODE_LOCK_LOPRI));
  24204. + spin_lock_zlock(lock);
  24205. + result = can_lock_object(owner);
  24206. + spin_unlock_zlock(lock);
  24207. +
  24208. + if (likely(result != -EINVAL)) {
  24209. + spin_lock_znode(node);
  24210. + result = reiser4_try_capture(ZJNODE(node), ZNODE_READ_LOCK, 0);
  24211. + spin_unlock_znode(node);
  24212. + spin_lock_zlock(lock);
  24213. + if (unlikely(result != 0)) {
  24214. + owner->request.mode = 0;
  24215. + } else {
  24216. + result = can_lock_object(owner);
  24217. + if (unlikely(result == -E_REPEAT)) {
  24218. + /* fall back to longterm_lock_znode() */
  24219. + spin_unlock_zlock(lock);
  24220. + return 1;
  24221. + }
  24222. + }
  24223. + return lock_tail(owner, result, ZNODE_READ_LOCK);
  24224. + } else
  24225. + return 1;
  24226. +}
  24227. +
  24228. +/* locks given lock object */
  24229. +int longterm_lock_znode(
  24230. + /* local link object (allocated by lock owner
  24231. + * thread, usually on its own stack) */
  24232. + lock_handle * handle,
  24233. + /* znode we want to lock. */
  24234. + znode * node,
  24235. + /* {ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}; */
  24236. + znode_lock_mode mode,
  24237. + /* {0, -EINVAL, -E_DEADLOCK}, see return codes
  24238. + description. */
  24239. + znode_lock_request request) {
  24240. + int ret;
  24241. + int hipri = (request & ZNODE_LOCK_HIPRI) != 0;
  24242. + int non_blocking = 0;
  24243. + int has_atom;
  24244. + txn_capture cap_flags;
  24245. + zlock *lock;
  24246. + txn_handle *txnh;
  24247. + tree_level level;
  24248. +
  24249. + /* Get current process context */
  24250. + lock_stack *owner = get_current_lock_stack();
  24251. +
  24252. + /* Check that the lock handle is initialized and isn't already being
  24253. + * used. */
  24254. + assert("jmacd-808", handle->owner == NULL);
  24255. + assert("nikita-3026", reiser4_schedulable());
  24256. + assert("nikita-3219", request_is_deadlock_safe(node, mode, request));
  24257. + assert("zam-1056", atomic_read(&ZJNODE(node)->x_count) > 0);
  24258. + /* long term locks are not allowed in the VM contexts (->writepage(),
  24259. + * prune_{d,i}cache()).
  24260. + *
  24261. + * FIXME this doesn't work due to unused-dentry-with-unlinked-inode
  24262. + * bug caused by d_splice_alias() only working for directories.
  24263. + */
  24264. + assert("nikita-3547", 1 || ((current->flags & PF_MEMALLOC) == 0));
  24265. + assert("zam-1055", mode != ZNODE_NO_LOCK);
  24266. +
  24267. + cap_flags = 0;
  24268. + if (request & ZNODE_LOCK_NONBLOCK) {
  24269. + cap_flags |= TXN_CAPTURE_NONBLOCKING;
  24270. + non_blocking = 1;
  24271. + }
  24272. +
  24273. + if (request & ZNODE_LOCK_DONT_FUSE)
  24274. + cap_flags |= TXN_CAPTURE_DONT_FUSE;
  24275. +
  24276. + /* If we are changing our process priority we must adjust a number
  24277. + of high priority owners for each znode that we already lock */
  24278. + if (hipri) {
  24279. + set_high_priority(owner);
  24280. + } else {
  24281. + set_low_priority(owner);
  24282. + }
  24283. +
  24284. + level = znode_get_level(node);
  24285. +
  24286. + /* Fill request structure with our values. */
  24287. + owner->request.mode = mode;
  24288. + owner->request.handle = handle;
  24289. + owner->request.node = node;
  24290. +
  24291. + txnh = get_current_context()->trans;
  24292. + lock = &node->lock;
  24293. +
  24294. + if (mode == ZNODE_READ_LOCK && request == 0) {
  24295. + ret = longterm_lock_tryfast(owner);
  24296. + if (ret <= 0)
  24297. + return ret;
  24298. + }
  24299. +
  24300. + has_atom = (txnh->atom != NULL);
  24301. +
  24302. + /* Synchronize on node's zlock guard lock. */
  24303. + spin_lock_zlock(lock);
  24304. +
  24305. + if (znode_is_locked(node) &&
  24306. + mode == ZNODE_WRITE_LOCK && recursive(owner))
  24307. + return lock_tail(owner, 0, mode);
  24308. +
  24309. + for (;;) {
  24310. + /* Check the lock's availability: if it is unavaiable we get
  24311. + E_REPEAT, 0 indicates "can_lock", otherwise the node is
  24312. + invalid. */
  24313. + ret = can_lock_object(owner);
  24314. +
  24315. + if (unlikely(ret == -EINVAL)) {
  24316. + /* @node is dying. Leave it alone. */
  24317. + break;
  24318. + }
  24319. +
  24320. + if (unlikely(ret == -E_REPEAT && non_blocking)) {
  24321. + /* either locking of @node by the current thread will
  24322. + * lead to the deadlock, or lock modes are
  24323. + * incompatible. */
  24324. + break;
  24325. + }
  24326. +
  24327. + assert("nikita-1844", (ret == 0)
  24328. + || ((ret == -E_REPEAT) && !non_blocking));
  24329. + /* If we can get the lock... Try to capture first before
  24330. + taking the lock. */
  24331. +
  24332. + /* first handle commonest case where node and txnh are already
  24333. + * in the same atom. */
  24334. + /* safe to do without taking locks, because:
  24335. + *
  24336. + * 1. read of aligned word is atomic with respect to writes to
  24337. + * this word
  24338. + *
  24339. + * 2. false negatives are handled in reiser4_try_capture().
  24340. + *
  24341. + * 3. false positives are impossible.
  24342. + *
  24343. + * PROOF: left as an exercise to the curious reader.
  24344. + *
  24345. + * Just kidding. Here is one:
  24346. + *
  24347. + * At the time T0 txnh->atom is stored in txnh_atom.
  24348. + *
  24349. + * At the time T1 node->atom is stored in node_atom.
  24350. + *
  24351. + * At the time T2 we observe that
  24352. + *
  24353. + * txnh_atom != NULL && node_atom == txnh_atom.
  24354. + *
  24355. + * Imagine that at this moment we acquire node and txnh spin
  24356. + * lock in this order. Suppose that under spin lock we have
  24357. + *
  24358. + * node->atom != txnh->atom, (S1)
  24359. + *
  24360. + * at the time T3.
  24361. + *
  24362. + * txnh->atom != NULL still, because txnh is open by the
  24363. + * current thread.
  24364. + *
  24365. + * Suppose node->atom == NULL, that is, node was un-captured
  24366. + * between T1, and T3. But un-capturing of formatted node is
  24367. + * always preceded by the call to reiser4_invalidate_lock(),
  24368. + * which marks znode as JNODE_IS_DYING under zlock spin
  24369. + * lock. Contradiction, because can_lock_object() above checks
  24370. + * for JNODE_IS_DYING. Hence, node->atom != NULL at T3.
  24371. + *
  24372. + * Suppose that node->atom != node_atom, that is, atom, node
  24373. + * belongs to was fused into another atom: node_atom was fused
  24374. + * into node->atom. Atom of txnh was equal to node_atom at T2,
  24375. + * which means that under spin lock, txnh->atom == node->atom,
  24376. + * because txnh->atom can only follow fusion
  24377. + * chain. Contradicts S1.
  24378. + *
  24379. + * The same for hypothesis txnh->atom != txnh_atom. Hence,
  24380. + * node->atom == node_atom == txnh_atom == txnh->atom. Again
  24381. + * contradicts S1. Hence S1 is false. QED.
  24382. + *
  24383. + */
  24384. +
  24385. + if (likely(has_atom && ZJNODE(node)->atom == txnh->atom)) {
  24386. + ;
  24387. + } else {
  24388. + /*
  24389. + * unlock zlock spin lock here. It is possible for
  24390. + * longterm_unlock_znode() to sneak in here, but there
  24391. + * is no harm: reiser4_invalidate_lock() will mark znode
  24392. + * as JNODE_IS_DYING and this will be noted by
  24393. + * can_lock_object() below.
  24394. + */
  24395. + spin_unlock_zlock(lock);
  24396. + spin_lock_znode(node);
  24397. + ret = reiser4_try_capture(ZJNODE(node), mode,
  24398. + cap_flags);
  24399. + spin_unlock_znode(node);
  24400. + spin_lock_zlock(lock);
  24401. + if (unlikely(ret != 0)) {
  24402. + /* In the failure case, the txnmgr releases
  24403. + the znode's lock (or in some cases, it was
  24404. + released a while ago). There's no need to
  24405. + reacquire it so we should return here,
  24406. + avoid releasing the lock. */
  24407. + owner->request.mode = 0;
  24408. + break;
  24409. + }
  24410. +
  24411. + /* Check the lock's availability again -- this is
  24412. + because under some circumstances the capture code
  24413. + has to release and reacquire the znode spinlock. */
  24414. + ret = can_lock_object(owner);
  24415. + }
  24416. +
  24417. + /* This time, a return of (ret == 0) means we can lock, so we
  24418. + should break out of the loop. */
  24419. + if (likely(ret != -E_REPEAT || non_blocking))
  24420. + break;
  24421. +
  24422. + /* Lock is unavailable, we have to wait. */
  24423. + ret = reiser4_prepare_to_sleep(owner);
  24424. + if (unlikely(ret != 0))
  24425. + break;
  24426. +
  24427. + assert_spin_locked(&(node->lock.guard));
  24428. + if (hipri) {
  24429. + /* If we are going in high priority direction then
  24430. + increase high priority requests counter for the
  24431. + node */
  24432. + lock->nr_hipri_requests++;
  24433. + if (mode == ZNODE_WRITE_LOCK)
  24434. + lock->nr_hipri_write_requests++;
  24435. + /* If there are no high priority owners for a node,
  24436. + then immediately wake up low priority owners, so
  24437. + they can detect possible deadlock */
  24438. + if (lock->nr_hipri_owners == 0)
  24439. + wake_up_all_lopri_owners(node);
  24440. + }
  24441. + list_add_tail(&owner->requestors_link, &lock->requestors);
  24442. +
  24443. + /* Ok, here we have prepared a lock request, so unlock
  24444. + a znode ... */
  24445. + spin_unlock_zlock(lock);
  24446. + /* ... and sleep */
  24447. + reiser4_go_to_sleep(owner);
  24448. + if (owner->request.mode == ZNODE_NO_LOCK)
  24449. + goto request_is_done;
  24450. + spin_lock_zlock(lock);
  24451. + if (owner->request.mode == ZNODE_NO_LOCK) {
  24452. + spin_unlock_zlock(lock);
  24453. +request_is_done:
  24454. + if (owner->request.ret_code == 0) {
  24455. + LOCK_CNT_INC(long_term_locked_znode);
  24456. + zref(node);
  24457. + }
  24458. + return owner->request.ret_code;
  24459. + }
  24460. + remove_lock_request(owner);
  24461. + }
  24462. +
  24463. + return lock_tail(owner, ret, mode);
  24464. +}
  24465. +
  24466. +/* lock object invalidation means changing of lock object state to `INVALID'
  24467. + and waiting for all other processes to cancel theirs lock requests. */
  24468. +void reiser4_invalidate_lock(lock_handle * handle /* path to lock
  24469. + * owner and lock
  24470. + * object is being
  24471. + * invalidated. */ )
  24472. +{
  24473. + znode *node = handle->node;
  24474. + lock_stack *owner = handle->owner;
  24475. +
  24476. + assert("zam-325", owner == get_current_lock_stack());
  24477. + assert("zam-103", znode_is_write_locked(node));
  24478. + assert("nikita-1393", !ZF_ISSET(node, JNODE_LEFT_CONNECTED));
  24479. + assert("nikita-1793", !ZF_ISSET(node, JNODE_RIGHT_CONNECTED));
  24480. + assert("nikita-1394", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  24481. + assert("nikita-3097", znode_is_wlocked_once(node));
  24482. + assert_spin_locked(&(node->lock.guard));
  24483. +
  24484. + if (handle->signaled)
  24485. + atomic_dec(&owner->nr_signaled);
  24486. +
  24487. + ZF_SET(node, JNODE_IS_DYING);
  24488. + unlink_object(handle);
  24489. + node->lock.nr_readers = 0;
  24490. +
  24491. + invalidate_all_lock_requests(node);
  24492. + spin_unlock_zlock(&node->lock);
  24493. +}
  24494. +
  24495. +/* Initializes lock_stack. */
  24496. +void init_lock_stack(lock_stack * owner /* pointer to
  24497. + * allocated
  24498. + * structure. */ )
  24499. +{
  24500. + INIT_LIST_HEAD(&owner->locks);
  24501. + INIT_LIST_HEAD(&owner->requestors_link);
  24502. + spin_lock_init(&owner->sguard);
  24503. + owner->curpri = 1;
  24504. + init_waitqueue_head(&owner->wait);
  24505. +}
  24506. +
  24507. +/* Initializes lock object. */
  24508. +void reiser4_init_lock(zlock * lock /* pointer on allocated
  24509. + * uninitialized lock object
  24510. + * structure. */ )
  24511. +{
  24512. + memset(lock, 0, sizeof(zlock));
  24513. + spin_lock_init(&lock->guard);
  24514. + INIT_LIST_HEAD(&lock->requestors);
  24515. + INIT_LIST_HEAD(&lock->owners);
  24516. +}
  24517. +
  24518. +/* Transfer a lock handle (presumably so that variables can be moved between
  24519. + stack and heap locations). */
  24520. +static void
  24521. +move_lh_internal(lock_handle * new, lock_handle * old, int unlink_old)
  24522. +{
  24523. + znode *node = old->node;
  24524. + lock_stack *owner = old->owner;
  24525. + int signaled;
  24526. +
  24527. + /* locks_list, modified by link_object() is not protected by
  24528. + anything. This is valid because only current thread ever modifies
  24529. + locks_list of its lock_stack.
  24530. + */
  24531. + assert("nikita-1827", owner == get_current_lock_stack());
  24532. + assert("nikita-1831", new->owner == NULL);
  24533. +
  24534. + spin_lock_zlock(&node->lock);
  24535. +
  24536. + signaled = old->signaled;
  24537. + if (unlink_old) {
  24538. + unlink_object(old);
  24539. + } else {
  24540. + if (node->lock.nr_readers > 0) {
  24541. + node->lock.nr_readers += 1;
  24542. + } else {
  24543. + node->lock.nr_readers -= 1;
  24544. + }
  24545. + if (signaled)
  24546. + atomic_inc(&owner->nr_signaled);
  24547. + if (owner->curpri)
  24548. + node->lock.nr_hipri_owners += 1;
  24549. + LOCK_CNT_INC(long_term_locked_znode);
  24550. +
  24551. + zref(node);
  24552. + }
  24553. + link_object(new, owner, node);
  24554. + new->signaled = signaled;
  24555. +
  24556. + spin_unlock_zlock(&node->lock);
  24557. +}
  24558. +
  24559. +void move_lh(lock_handle * new, lock_handle * old)
  24560. +{
  24561. + move_lh_internal(new, old, /*unlink_old */ 1);
  24562. +}
  24563. +
  24564. +void copy_lh(lock_handle * new, lock_handle * old)
  24565. +{
  24566. + move_lh_internal(new, old, /*unlink_old */ 0);
  24567. +}
  24568. +
  24569. +/* after getting -E_DEADLOCK we unlock znodes until this function returns false
  24570. + */
  24571. +int reiser4_check_deadlock(void)
  24572. +{
  24573. + lock_stack *owner = get_current_lock_stack();
  24574. + return atomic_read(&owner->nr_signaled) != 0;
  24575. +}
  24576. +
  24577. +/* Before going to sleep we re-check "release lock" requests which might come
  24578. + from threads with hi-pri lock priorities. */
  24579. +int reiser4_prepare_to_sleep(lock_stack * owner)
  24580. +{
  24581. + assert("nikita-1847", owner == get_current_lock_stack());
  24582. +
  24583. + /* We return -E_DEADLOCK if one or more "give me the lock" messages are
  24584. + * counted in nr_signaled */
  24585. + if (unlikely(atomic_read(&owner->nr_signaled) != 0)) {
  24586. + assert("zam-959", !owner->curpri);
  24587. + return RETERR(-E_DEADLOCK);
  24588. + }
  24589. + return 0;
  24590. +}
  24591. +
  24592. +/* Wakes up a single thread */
  24593. +void __reiser4_wake_up(lock_stack * owner)
  24594. +{
  24595. + atomic_set(&owner->wakeup, 1);
  24596. + wake_up(&owner->wait);
  24597. +}
  24598. +
  24599. +/* Puts a thread to sleep */
  24600. +void reiser4_go_to_sleep(lock_stack * owner)
  24601. +{
  24602. + /* Well, we might sleep here, so holding of any spinlocks is no-no */
  24603. + assert("nikita-3027", reiser4_schedulable());
  24604. +
  24605. + wait_event(owner->wait, atomic_read(&owner->wakeup));
  24606. + atomic_set(&owner->wakeup, 0);
  24607. +}
  24608. +
  24609. +int lock_stack_isclean(lock_stack * owner)
  24610. +{
  24611. + if (list_empty_careful(&owner->locks)) {
  24612. + assert("zam-353", atomic_read(&owner->nr_signaled) == 0);
  24613. + return 1;
  24614. + }
  24615. +
  24616. + return 0;
  24617. +}
  24618. +
  24619. +#if REISER4_DEBUG
  24620. +
  24621. +/*
  24622. + * debugging functions
  24623. + */
  24624. +
  24625. +static void list_check(struct list_head *head)
  24626. +{
  24627. + struct list_head *pos;
  24628. +
  24629. + list_for_each(pos, head)
  24630. + assert("", (pos->prev != NULL && pos->next != NULL &&
  24631. + pos->prev->next == pos && pos->next->prev == pos));
  24632. +}
  24633. +
  24634. +/* check consistency of locking data-structures hanging of the @stack */
  24635. +static void check_lock_stack(lock_stack * stack)
  24636. +{
  24637. + spin_lock_stack(stack);
  24638. + /* check that stack->locks is not corrupted */
  24639. + list_check(&stack->locks);
  24640. + spin_unlock_stack(stack);
  24641. +}
  24642. +
  24643. +/* check consistency of locking data structures */
  24644. +void check_lock_data(void)
  24645. +{
  24646. + check_lock_stack(&get_current_context()->stack);
  24647. +}
  24648. +
  24649. +/* check consistency of locking data structures for @node */
  24650. +void check_lock_node_data(znode * node)
  24651. +{
  24652. + spin_lock_zlock(&node->lock);
  24653. + list_check(&node->lock.owners);
  24654. + list_check(&node->lock.requestors);
  24655. + spin_unlock_zlock(&node->lock);
  24656. +}
  24657. +
  24658. +/* check that given lock request is dead lock safe. This check is, of course,
  24659. + * not exhaustive. */
  24660. +static int
  24661. +request_is_deadlock_safe(znode * node, znode_lock_mode mode,
  24662. + znode_lock_request request)
  24663. +{
  24664. + lock_stack *owner;
  24665. +
  24666. + owner = get_current_lock_stack();
  24667. + /*
  24668. + * check that hipri lock request is not issued when there are locked
  24669. + * nodes at the higher levels.
  24670. + */
  24671. + if (request & ZNODE_LOCK_HIPRI && !(request & ZNODE_LOCK_NONBLOCK) &&
  24672. + znode_get_level(node) != 0) {
  24673. + lock_handle *item;
  24674. +
  24675. + list_for_each_entry(item, &owner->locks, locks_link) {
  24676. + znode *other;
  24677. +
  24678. + other = item->node;
  24679. +
  24680. + if (znode_get_level(other) == 0)
  24681. + continue;
  24682. + if (znode_get_level(other) > znode_get_level(node))
  24683. + return 0;
  24684. + }
  24685. + }
  24686. + return 1;
  24687. +}
  24688. +
  24689. +#endif
  24690. +
  24691. +/* return pointer to static storage with name of lock_mode. For
  24692. + debugging */
  24693. +const char *lock_mode_name(znode_lock_mode lock/* lock mode to get name of */)
  24694. +{
  24695. + if (lock == ZNODE_READ_LOCK)
  24696. + return "read";
  24697. + else if (lock == ZNODE_WRITE_LOCK)
  24698. + return "write";
  24699. + else {
  24700. + static char buf[30];
  24701. +
  24702. + sprintf(buf, "unknown: %i", lock);
  24703. + return buf;
  24704. + }
  24705. +}
  24706. +
  24707. +/* Make Linus happy.
  24708. + Local variables:
  24709. + c-indentation-style: "K&R"
  24710. + mode-name: "LC"
  24711. + c-basic-offset: 8
  24712. + tab-width: 8
  24713. + fill-column: 79
  24714. + End:
  24715. +*/
  24716. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/lock.h linux-5.16.14/fs/reiser4/lock.h
  24717. --- linux-5.16.14.orig/fs/reiser4/lock.h 1970-01-01 01:00:00.000000000 +0100
  24718. +++ linux-5.16.14/fs/reiser4/lock.h 2022-03-12 13:26:19.660892748 +0100
  24719. @@ -0,0 +1,250 @@
  24720. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  24721. + * reiser4/README */
  24722. +
  24723. +/* Long term locking data structures. See lock.c for details. */
  24724. +
  24725. +#ifndef __LOCK_H__
  24726. +#define __LOCK_H__
  24727. +
  24728. +#include "forward.h"
  24729. +#include "debug.h"
  24730. +#include "dformat.h"
  24731. +#include "key.h"
  24732. +#include "coord.h"
  24733. +#include "plugin/node/node.h"
  24734. +#include "txnmgr.h"
  24735. +#include "readahead.h"
  24736. +
  24737. +#include <linux/types.h>
  24738. +#include <linux/spinlock.h>
  24739. +#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
  24740. +#include <asm/atomic.h>
  24741. +#include <linux/wait.h>
  24742. +
  24743. +/* Per-znode lock object */
  24744. +struct zlock {
  24745. + spinlock_t guard;
  24746. + /* The number of readers if positive; the number of recursively taken
  24747. + write locks if negative. Protected by zlock spin lock. */
  24748. + int nr_readers;
  24749. + /* A number of processes (lock_stacks) that have this object
  24750. + locked with high priority */
  24751. + unsigned nr_hipri_owners;
  24752. + /* A number of attempts to lock znode in high priority direction */
  24753. + unsigned nr_hipri_requests;
  24754. + /* A linked list of lock_handle objects that contains pointers
  24755. + for all lock_stacks which have this lock object locked */
  24756. + unsigned nr_hipri_write_requests;
  24757. + struct list_head owners;
  24758. + /* A linked list of lock_stacks that wait for this lock */
  24759. + struct list_head requestors;
  24760. +};
  24761. +
  24762. +static inline void spin_lock_zlock(zlock *lock)
  24763. +{
  24764. + /* check that zlock is not locked */
  24765. + assert("", LOCK_CNT_NIL(spin_locked_zlock));
  24766. + /* check that spinlocks of lower priorities are not held */
  24767. + assert("", LOCK_CNT_NIL(spin_locked_stack));
  24768. +
  24769. + spin_lock(&lock->guard);
  24770. +
  24771. + LOCK_CNT_INC(spin_locked_zlock);
  24772. + LOCK_CNT_INC(spin_locked);
  24773. +}
  24774. +
  24775. +static inline void spin_unlock_zlock(zlock *lock)
  24776. +{
  24777. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_zlock));
  24778. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  24779. +
  24780. + LOCK_CNT_DEC(spin_locked_zlock);
  24781. + LOCK_CNT_DEC(spin_locked);
  24782. +
  24783. + spin_unlock(&lock->guard);
  24784. +}
  24785. +
  24786. +#define lock_is_locked(lock) ((lock)->nr_readers != 0)
  24787. +#define lock_is_rlocked(lock) ((lock)->nr_readers > 0)
  24788. +#define lock_is_wlocked(lock) ((lock)->nr_readers < 0)
  24789. +#define lock_is_wlocked_once(lock) ((lock)->nr_readers == -1)
  24790. +#define lock_can_be_rlocked(lock) ((lock)->nr_readers >= 0)
  24791. +#define lock_mode_compatible(lock, mode) \
  24792. + (((mode) == ZNODE_WRITE_LOCK && !lock_is_locked(lock)) || \
  24793. + ((mode) == ZNODE_READ_LOCK && lock_can_be_rlocked(lock)))
  24794. +
  24795. +/* Since we have R/W znode locks we need additional bidirectional `link'
  24796. + objects to implement n<->m relationship between lock owners and lock
  24797. + objects. We call them `lock handles'.
  24798. +
  24799. + Locking: see lock.c/"SHORT-TERM LOCKING"
  24800. +*/
  24801. +struct lock_handle {
  24802. + /* This flag indicates that a signal to yield a lock was passed to
  24803. + lock owner and counted in owner->nr_signalled
  24804. +
  24805. + Locking: this is accessed under spin lock on ->node.
  24806. + */
  24807. + int signaled;
  24808. + /* A link to owner of a lock */
  24809. + lock_stack *owner;
  24810. + /* A link to znode locked */
  24811. + znode *node;
  24812. + /* A list of all locks for a process */
  24813. + struct list_head locks_link;
  24814. + /* A list of all owners for a znode */
  24815. + struct list_head owners_link;
  24816. +};
  24817. +
  24818. +struct lock_request {
  24819. + /* A pointer to uninitialized link object */
  24820. + lock_handle *handle;
  24821. + /* A pointer to the object we want to lock */
  24822. + znode *node;
  24823. + /* Lock mode (ZNODE_READ_LOCK or ZNODE_WRITE_LOCK) */
  24824. + znode_lock_mode mode;
  24825. + /* how dispatch_lock_requests() returns lock request result code */
  24826. + int ret_code;
  24827. +};
  24828. +
  24829. +/* A lock stack structure for accumulating locks owned by a process */
  24830. +struct lock_stack {
  24831. + /* A guard lock protecting a lock stack */
  24832. + spinlock_t sguard;
  24833. + /* number of znodes which were requested by high priority processes */
  24834. + atomic_t nr_signaled;
  24835. + /* Current priority of a process
  24836. +
  24837. + This is only accessed by the current thread and thus requires no
  24838. + locking.
  24839. + */
  24840. + int curpri;
  24841. + /* A list of all locks owned by this process. Elements can be added to
  24842. + * this list only by the current thread. ->node pointers in this list
  24843. + * can be only changed by the current thread. */
  24844. + struct list_head locks;
  24845. + /* When lock_stack waits for the lock, it puts itself on double-linked
  24846. + requestors list of that lock */
  24847. + struct list_head requestors_link;
  24848. + /* Current lock request info.
  24849. +
  24850. + This is only accessed by the current thread and thus requires no
  24851. + locking.
  24852. + */
  24853. + struct lock_request request;
  24854. + /* the following two fields are the lock stack's
  24855. + * synchronization object to use with the standard linux/wait.h
  24856. + * interface. See reiser4_go_to_sleep and __reiser4_wake_up for
  24857. + * usage details. */
  24858. + wait_queue_head_t wait;
  24859. + atomic_t wakeup;
  24860. +#if REISER4_DEBUG
  24861. + int nr_locks; /* number of lock handles in the above list */
  24862. +#endif
  24863. +};
  24864. +
  24865. +/*
  24866. + User-visible znode locking functions
  24867. +*/
  24868. +
  24869. +extern int longterm_lock_znode(lock_handle * handle,
  24870. + znode * node,
  24871. + znode_lock_mode mode,
  24872. + znode_lock_request request);
  24873. +
  24874. +extern void longterm_unlock_znode(lock_handle * handle);
  24875. +
  24876. +extern int reiser4_check_deadlock(void);
  24877. +
  24878. +extern lock_stack *get_current_lock_stack(void);
  24879. +
  24880. +extern void init_lock_stack(lock_stack * owner);
  24881. +extern void reiser4_init_lock(zlock * lock);
  24882. +
  24883. +static inline void init_lh(lock_handle *lh)
  24884. +{
  24885. +#if REISER4_DEBUG
  24886. + memset(lh, 0, sizeof *lh);
  24887. + INIT_LIST_HEAD(&lh->locks_link);
  24888. + INIT_LIST_HEAD(&lh->owners_link);
  24889. +#else
  24890. + lh->node = NULL;
  24891. +#endif
  24892. +}
  24893. +
  24894. +static inline void done_lh(lock_handle *lh)
  24895. +{
  24896. + assert("zam-342", lh != NULL);
  24897. + if (lh->node != NULL)
  24898. + longterm_unlock_znode(lh);
  24899. +}
  24900. +
  24901. +extern void move_lh(lock_handle * new, lock_handle * old);
  24902. +extern void copy_lh(lock_handle * new, lock_handle * old);
  24903. +
  24904. +extern int reiser4_prepare_to_sleep(lock_stack * owner);
  24905. +extern void reiser4_go_to_sleep(lock_stack * owner);
  24906. +extern void __reiser4_wake_up(lock_stack * owner);
  24907. +
  24908. +extern int lock_stack_isclean(lock_stack * owner);
  24909. +
  24910. +/* zlock object state check macros: only used in assertions. Both forms imply
  24911. + that the lock is held by the current thread. */
  24912. +extern int znode_is_write_locked(const znode *);
  24913. +extern void reiser4_invalidate_lock(lock_handle *);
  24914. +
  24915. +/* lock ordering is: first take zlock spin lock, then lock stack spin lock */
  24916. +#define spin_ordering_pred_stack(stack) \
  24917. + (LOCK_CNT_NIL(spin_locked_stack) && \
  24918. + LOCK_CNT_NIL(spin_locked_txnmgr) && \
  24919. + LOCK_CNT_NIL(spin_locked_inode) && \
  24920. + LOCK_CNT_NIL(rw_locked_cbk_cache) && \
  24921. + LOCK_CNT_NIL(spin_locked_super_eflush))
  24922. +
  24923. +static inline void spin_lock_stack(lock_stack *stack)
  24924. +{
  24925. + assert("", spin_ordering_pred_stack(stack));
  24926. + spin_lock(&(stack->sguard));
  24927. + LOCK_CNT_INC(spin_locked_stack);
  24928. + LOCK_CNT_INC(spin_locked);
  24929. +}
  24930. +
  24931. +static inline void spin_unlock_stack(lock_stack *stack)
  24932. +{
  24933. + assert_spin_locked(&(stack->sguard));
  24934. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_stack));
  24935. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  24936. + LOCK_CNT_DEC(spin_locked_stack);
  24937. + LOCK_CNT_DEC(spin_locked);
  24938. + spin_unlock(&(stack->sguard));
  24939. +}
  24940. +
  24941. +static inline void reiser4_wake_up(lock_stack * owner)
  24942. +{
  24943. + spin_lock_stack(owner);
  24944. + __reiser4_wake_up(owner);
  24945. + spin_unlock_stack(owner);
  24946. +}
  24947. +
  24948. +const char *lock_mode_name(znode_lock_mode lock);
  24949. +
  24950. +#if REISER4_DEBUG
  24951. +extern void check_lock_data(void);
  24952. +extern void check_lock_node_data(znode * node);
  24953. +#else
  24954. +#define check_lock_data() noop
  24955. +#define check_lock_node_data() noop
  24956. +#endif
  24957. +
  24958. +/* __LOCK_H__ */
  24959. +#endif
  24960. +
  24961. +/* Make Linus happy.
  24962. + Local variables:
  24963. + c-indentation-style: "K&R"
  24964. + mode-name: "LC"
  24965. + c-basic-offset: 8
  24966. + tab-width: 8
  24967. + fill-column: 120
  24968. + End:
  24969. +*/
  24970. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/Makefile linux-5.16.14/fs/reiser4/Makefile
  24971. --- linux-5.16.14.orig/fs/reiser4/Makefile 1970-01-01 01:00:00.000000000 +0100
  24972. +++ linux-5.16.14/fs/reiser4/Makefile 2022-03-12 13:26:19.638892695 +0100
  24973. @@ -0,0 +1,116 @@
  24974. +#
  24975. +# reiser4/Makefile
  24976. +#
  24977. +
  24978. +MODULE := reiser4
  24979. +
  24980. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  24981. +
  24982. +$(MODULE)-objs += \
  24983. + debug.o \
  24984. + jnode.o \
  24985. + znode.o \
  24986. + key.o \
  24987. + pool.o \
  24988. + tree_mod.o \
  24989. + estimate.o \
  24990. + carry.o \
  24991. + carry_ops.o \
  24992. + lock.o \
  24993. + tree.o \
  24994. + context.o \
  24995. + tap.o \
  24996. + coord.o \
  24997. + block_alloc.o \
  24998. + txnmgr.o \
  24999. + kassign.o \
  25000. + flush.o \
  25001. + wander.o \
  25002. + eottl.o \
  25003. + search.o \
  25004. + page_cache.o \
  25005. + seal.o \
  25006. + dscale.o \
  25007. + flush_queue.o \
  25008. + ktxnmgrd.o \
  25009. + blocknrset.o \
  25010. + super.o \
  25011. + super_ops.o \
  25012. + fsdata.o \
  25013. + export_ops.o \
  25014. + oid.o \
  25015. + tree_walk.o \
  25016. + inode.o \
  25017. + vfs_ops.o \
  25018. + as_ops.o \
  25019. + entd.o \
  25020. + readahead.o \
  25021. + status_flags.o \
  25022. + init_super.o \
  25023. + safe_link.o \
  25024. + blocknrlist.o \
  25025. + discard.o \
  25026. + checksum.o \
  25027. + \
  25028. + plugin/plugin.o \
  25029. + plugin/plugin_set.o \
  25030. + plugin/object.o \
  25031. + plugin/cluster.o \
  25032. + plugin/txmod.o \
  25033. + plugin/inode_ops.o \
  25034. + plugin/inode_ops_rename.o \
  25035. + plugin/file_ops.o \
  25036. + plugin/file_ops_readdir.o \
  25037. + plugin/dir_plugin_common.o \
  25038. + plugin/file_plugin_common.o \
  25039. + plugin/hash.o \
  25040. + plugin/fibration.o \
  25041. + plugin/tail_policy.o \
  25042. + \
  25043. + plugin/file/file.o \
  25044. + plugin/file/tail_conversion.o \
  25045. + plugin/file/file_conversion.o \
  25046. + plugin/file/symlink.o \
  25047. + plugin/file/cryptcompress.o \
  25048. + \
  25049. + plugin/dir/hashed_dir.o \
  25050. + plugin/dir/seekable_dir.o \
  25051. + \
  25052. + plugin/node/node.o \
  25053. + plugin/node/node40.o \
  25054. + plugin/node/node41.o \
  25055. + \
  25056. + plugin/crypto/cipher.o \
  25057. + plugin/crypto/digest.o \
  25058. + \
  25059. + plugin/compress/compress.o \
  25060. + plugin/compress/compress_mode.o \
  25061. + \
  25062. + plugin/item/static_stat.o \
  25063. + plugin/item/sde.o \
  25064. + plugin/item/cde.o \
  25065. + plugin/item/blackbox.o \
  25066. + plugin/item/internal.o \
  25067. + plugin/item/tail.o \
  25068. + plugin/item/ctail.o \
  25069. + plugin/item/extent.o \
  25070. + plugin/item/extent_item_ops.o \
  25071. + plugin/item/extent_file_ops.o \
  25072. + plugin/item/extent_flush_ops.o \
  25073. + plugin/item/item.o \
  25074. + \
  25075. + plugin/security/perm.o \
  25076. + \
  25077. + plugin/space/bitmap.o \
  25078. + \
  25079. + plugin/disk_format/disk_format40.o \
  25080. + plugin/disk_format/disk_format.o
  25081. +
  25082. +CFLAGS_REMOVE_carry_ops.o = -Wimplicit-fallthrough
  25083. +CFLAGS_REMOVE_tree.o = -Wimplicit-fallthrough
  25084. +CFLAGS_REMOVE_search.o = -Wimplicit-fallthrough
  25085. +CFLAGS_REMOVE_plugin/file/cryptcompress.o = -Wimplicit-fallthrough
  25086. +CFLAGS_REMOVE_plugin/node/node40.o = -Wimplicit-fallthrough
  25087. +CFLAGS_REMOVE_plugin/compress/compress.o = -Wimplicit-fallthrough
  25088. +CFLAGS_REMOVE_plugin/item/internal.o = -Wimplicit-fallthrough
  25089. +CFLAGS_REMOVE_plugin/disk_format/disk_format40.o = -Wimplicit-fallthrough
  25090. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/oid.c linux-5.16.14/fs/reiser4/oid.c
  25091. --- linux-5.16.14.orig/fs/reiser4/oid.c 1970-01-01 01:00:00.000000000 +0100
  25092. +++ linux-5.16.14/fs/reiser4/oid.c 2022-03-12 13:26:19.661892751 +0100
  25093. @@ -0,0 +1,141 @@
  25094. +/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
  25095. +
  25096. +#include "debug.h"
  25097. +#include "super.h"
  25098. +#include "txnmgr.h"
  25099. +
  25100. +/* we used to have oid allocation plugin. It was removed because it
  25101. + was recognized as providing unneeded level of abstraction. If one
  25102. + ever will find it useful - look at yet_unneeded_abstractions/oid
  25103. +*/
  25104. +
  25105. +/*
  25106. + * initialize in-memory data for oid allocator at @super. @nr_files and @next
  25107. + * are provided by disk format plugin that reads them from the disk during
  25108. + * mount.
  25109. + */
  25110. +int oid_init_allocator(struct super_block *super, oid_t nr_files, oid_t next)
  25111. +{
  25112. + reiser4_super_info_data *sbinfo;
  25113. +
  25114. + sbinfo = get_super_private(super);
  25115. +
  25116. + sbinfo->next_to_use = next;
  25117. + sbinfo->oids_in_use = nr_files;
  25118. + return 0;
  25119. +}
  25120. +
  25121. +/*
  25122. + * allocate oid and return it. ABSOLUTE_MAX_OID is returned when allocator
  25123. + * runs out of oids.
  25124. + */
  25125. +oid_t oid_allocate(struct super_block *super)
  25126. +{
  25127. + reiser4_super_info_data *sbinfo;
  25128. + oid_t oid;
  25129. +
  25130. + sbinfo = get_super_private(super);
  25131. +
  25132. + spin_lock_reiser4_super(sbinfo);
  25133. + if (sbinfo->next_to_use != ABSOLUTE_MAX_OID) {
  25134. + oid = sbinfo->next_to_use++;
  25135. + sbinfo->oids_in_use++;
  25136. + } else
  25137. + oid = ABSOLUTE_MAX_OID;
  25138. + spin_unlock_reiser4_super(sbinfo);
  25139. + return oid;
  25140. +}
  25141. +
  25142. +/*
  25143. + * Tell oid allocator that @oid is now free.
  25144. + */
  25145. +int oid_release(struct super_block *super, oid_t oid UNUSED_ARG)
  25146. +{
  25147. + reiser4_super_info_data *sbinfo;
  25148. +
  25149. + sbinfo = get_super_private(super);
  25150. +
  25151. + spin_lock_reiser4_super(sbinfo);
  25152. + sbinfo->oids_in_use--;
  25153. + spin_unlock_reiser4_super(sbinfo);
  25154. + return 0;
  25155. +}
  25156. +
  25157. +/*
  25158. + * return next @oid that would be allocated (i.e., returned by oid_allocate())
  25159. + * without actually allocating it. This is used by disk format plugin to save
  25160. + * oid allocator state on the disk.
  25161. + */
  25162. +oid_t oid_next(const struct super_block *super)
  25163. +{
  25164. + reiser4_super_info_data *sbinfo;
  25165. + oid_t oid;
  25166. +
  25167. + sbinfo = get_super_private(super);
  25168. +
  25169. + spin_lock_reiser4_super(sbinfo);
  25170. + oid = sbinfo->next_to_use;
  25171. + spin_unlock_reiser4_super(sbinfo);
  25172. + return oid;
  25173. +}
  25174. +
  25175. +/*
  25176. + * returns number of currently used oids. This is used by statfs(2) to report
  25177. + * number of "inodes" and by disk format plugin to save oid allocator state on
  25178. + * the disk.
  25179. + */
  25180. +long oids_used(const struct super_block *super)
  25181. +{
  25182. + reiser4_super_info_data *sbinfo;
  25183. + oid_t used;
  25184. +
  25185. + sbinfo = get_super_private(super);
  25186. +
  25187. + spin_lock_reiser4_super(sbinfo);
  25188. + used = sbinfo->oids_in_use;
  25189. + spin_unlock_reiser4_super(sbinfo);
  25190. + if (used < (__u64) ((long)~0) >> 1)
  25191. + return (long)used;
  25192. + else
  25193. + return (long)-1;
  25194. +}
  25195. +
  25196. +/*
  25197. + * Count oid as allocated in atom. This is done after call to oid_allocate()
  25198. + * at the point when we are irrevocably committed to creation of the new file
  25199. + * (i.e., when oid allocation cannot be any longer rolled back due to some
  25200. + * error).
  25201. + */
  25202. +void oid_count_allocated(void)
  25203. +{
  25204. + txn_atom *atom;
  25205. +
  25206. + atom = get_current_atom_locked();
  25207. + atom->nr_objects_created++;
  25208. + spin_unlock_atom(atom);
  25209. +}
  25210. +
  25211. +/*
  25212. + * Count oid as free in atom. This is done after call to oid_release() at the
  25213. + * point when we are irrevocably committed to the deletion of the file (i.e.,
  25214. + * when oid release cannot be any longer rolled back due to some error).
  25215. + */
  25216. +void oid_count_released(void)
  25217. +{
  25218. + txn_atom *atom;
  25219. +
  25220. + atom = get_current_atom_locked();
  25221. + atom->nr_objects_deleted++;
  25222. + spin_unlock_atom(atom);
  25223. +}
  25224. +
  25225. +/*
  25226. + Local variables:
  25227. + c-indentation-style: "K&R"
  25228. + mode-name: "LC"
  25229. + c-basic-offset: 8
  25230. + tab-width: 8
  25231. + fill-column: 120
  25232. + scroll-step: 1
  25233. + End:
  25234. +*/
  25235. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/page_cache.c linux-5.16.14/fs/reiser4/page_cache.c
  25236. --- linux-5.16.14.orig/fs/reiser4/page_cache.c 1970-01-01 01:00:00.000000000 +0100
  25237. +++ linux-5.16.14/fs/reiser4/page_cache.c 2022-03-12 13:26:19.661892751 +0100
  25238. @@ -0,0 +1,692 @@
  25239. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  25240. + * reiser4/README */
  25241. +
  25242. +/* Memory pressure hooks. Fake inodes handling. */
  25243. +
  25244. +/* GLOSSARY
  25245. +
  25246. + . Formatted and unformatted nodes.
  25247. + Elements of reiser4 balanced tree to store data and metadata.
  25248. + Unformatted nodes are pointed to by extent pointers. Such nodes
  25249. + are used to store data of large objects. Unlike unformatted nodes,
  25250. + formatted ones have associated format described by node4X plugin.
  25251. +
  25252. + . Jnode (or journal node)
  25253. + The in-memory header which is used to track formatted and unformatted
  25254. + nodes, bitmap nodes, etc. In particular, jnodes are used to track
  25255. + transactional information associated with each block(see reiser4/jnode.c
  25256. + for details).
  25257. +
  25258. + . Znode
  25259. + The in-memory header which is used to track formatted nodes. Contains
  25260. + embedded jnode (see reiser4/znode.c for details).
  25261. +*/
  25262. +
  25263. +/* We store all file system meta data (and data, of course) in the page cache.
  25264. +
  25265. + What does this mean? In stead of using bread/brelse we create special
  25266. + "fake" inode (one per super block) and store content of formatted nodes
  25267. + into pages bound to this inode in the page cache. In newer kernels bread()
  25268. + already uses inode attached to block device (bd_inode). Advantage of having
  25269. + our own fake inode is that we can install appropriate methods in its
  25270. + address_space operations. Such methods are called by VM on memory pressure
  25271. + (or during background page flushing) and we can use them to react
  25272. + appropriately.
  25273. +
  25274. + In initial version we only support one block per page. Support for multiple
  25275. + blocks per page is complicated by relocation.
  25276. +
  25277. + To each page, used by reiser4, jnode is attached. jnode is analogous to
  25278. + buffer head. Difference is that jnode is bound to the page permanently:
  25279. + jnode cannot be removed from memory until its backing page is.
  25280. +
  25281. + jnode contain pointer to page (->pg field) and page contain pointer to
  25282. + jnode in ->private field. Pointer from jnode to page is protected to by
  25283. + jnode's spinlock and pointer from page to jnode is protected by page lock
  25284. + (PG_locked bit). Lock ordering is: first take page lock, then jnode spin
  25285. + lock. To go into reverse direction use jnode_lock_page() function that uses
  25286. + standard try-lock-and-release device.
  25287. +
  25288. + Properties:
  25289. +
  25290. + 1. when jnode-to-page mapping is established (by jnode_attach_page()), page
  25291. + reference counter is increased.
  25292. +
  25293. + 2. when jnode-to-page mapping is destroyed (by page_clear_jnode(), page
  25294. + reference counter is decreased.
  25295. +
  25296. + 3. on jload() reference counter on jnode page is increased, page is
  25297. + kmapped and `referenced'.
  25298. +
  25299. + 4. on jrelse() inverse operations are performed.
  25300. +
  25301. + 5. kmapping/kunmapping of unformatted pages is done by read/write methods.
  25302. +
  25303. + DEADLOCKS RELATED TO MEMORY PRESSURE. [OUTDATED. Only interesting
  25304. + historically.]
  25305. +
  25306. + [In the following discussion, `lock' invariably means long term lock on
  25307. + znode.] (What about page locks?)
  25308. +
  25309. + There is some special class of deadlock possibilities related to memory
  25310. + pressure. Locks acquired by other reiser4 threads are accounted for in
  25311. + deadlock prevention mechanism (lock.c), but when ->vm_writeback() is
  25312. + invoked additional hidden arc is added to the locking graph: thread that
  25313. + tries to allocate memory waits for ->vm_writeback() to finish. If this
  25314. + thread keeps lock and ->vm_writeback() tries to acquire this lock, deadlock
  25315. + prevention is useless.
  25316. +
  25317. + Another related problem is possibility for ->vm_writeback() to run out of
  25318. + memory itself. This is not a problem for ext2 and friends, because their
  25319. + ->vm_writeback() don't allocate much memory, but reiser4 flush is
  25320. + definitely able to allocate huge amounts of memory.
  25321. +
  25322. + It seems that there is no reliable way to cope with the problems above. In
  25323. + stead it was decided that ->vm_writeback() (as invoked in the kswapd
  25324. + context) wouldn't perform any flushing itself, but rather should just wake
  25325. + up some auxiliary thread dedicated for this purpose (or, the same thread
  25326. + that does periodic commit of old atoms (ktxnmgrd.c)).
  25327. +
  25328. + Details:
  25329. +
  25330. + 1. Page is called `reclaimable' against particular reiser4 mount F if this
  25331. + page can be ultimately released by try_to_free_pages() under presumptions
  25332. + that:
  25333. +
  25334. + a. ->vm_writeback() for F is no-op, and
  25335. +
  25336. + b. none of the threads accessing F are making any progress, and
  25337. +
  25338. + c. other reiser4 mounts obey the same memory reservation protocol as F
  25339. + (described below).
  25340. +
  25341. + For example, clean un-pinned page, or page occupied by ext2 data are
  25342. + reclaimable against any reiser4 mount.
  25343. +
  25344. + When there is more than one reiser4 mount in a system, condition (c) makes
  25345. + reclaim-ability not easily verifiable beyond trivial cases mentioned above.
  25346. +
  25347. + THIS COMMENT IS VALID FOR "MANY BLOCKS ON PAGE" CASE
  25348. +
  25349. + Fake inode is used to bound formatted nodes and each node is indexed within
  25350. + fake inode by its block number. If block size of smaller than page size, it
  25351. + may so happen that block mapped to the page with formatted node is occupied
  25352. + by unformatted node or is unallocated. This lead to some complications,
  25353. + because flushing whole page can lead to an incorrect overwrite of
  25354. + unformatted node that is moreover, can be cached in some other place as
  25355. + part of the file body. To avoid this, buffers for unformatted nodes are
  25356. + never marked dirty. Also pages in the fake are never marked dirty. This
  25357. + rules out usage of ->writepage() as memory pressure hook. In stead
  25358. + ->releasepage() is used.
  25359. +
  25360. + Josh is concerned that page->buffer is going to die. This should not pose
  25361. + significant problem though, because we need to add some data structures to
  25362. + the page anyway (jnode) and all necessary book keeping can be put there.
  25363. +
  25364. +*/
  25365. +
  25366. +/* Life cycle of pages/nodes.
  25367. +
  25368. + jnode contains reference to page and page contains reference back to
  25369. + jnode. This reference is counted in page ->count. Thus, page bound to jnode
  25370. + cannot be released back into free pool.
  25371. +
  25372. + 1. Formatted nodes.
  25373. +
  25374. + 1. formatted node is represented by znode. When new znode is created its
  25375. + ->pg pointer is NULL initially.
  25376. +
  25377. + 2. when node content is loaded into znode (by call to zload()) for the
  25378. + first time following happens (in call to ->read_node() or
  25379. + ->allocate_node()):
  25380. +
  25381. + 1. new page is added to the page cache.
  25382. +
  25383. + 2. this page is attached to znode and its ->count is increased.
  25384. +
  25385. + 3. page is kmapped.
  25386. +
  25387. + 3. if more calls to zload() follow (without corresponding zrelses), page
  25388. + counter is left intact and in its stead ->d_count is increased in znode.
  25389. +
  25390. + 4. each call to zrelse decreases ->d_count. When ->d_count drops to zero
  25391. + ->release_node() is called and page is kunmapped as result.
  25392. +
  25393. + 5. at some moment node can be captured by a transaction. Its ->x_count
  25394. + is then increased by transaction manager.
  25395. +
  25396. + 6. if node is removed from the tree (empty node with JNODE_HEARD_BANSHEE
  25397. + bit set) following will happen (also see comment at the top of znode.c):
  25398. +
  25399. + 1. when last lock is released, node will be uncaptured from
  25400. + transaction. This released reference that transaction manager acquired
  25401. + at the step 5.
  25402. +
  25403. + 2. when last reference is released, zput() detects that node is
  25404. + actually deleted and calls ->delete_node()
  25405. + operation. page_cache_delete_node() implementation detaches jnode from
  25406. + page and releases page.
  25407. +
  25408. + 7. otherwise (node wasn't removed from the tree), last reference to
  25409. + znode will be released after transaction manager committed transaction
  25410. + node was in. This implies squallocing of this node (see
  25411. + flush.c). Nothing special happens at this point. Znode is still in the
  25412. + hash table and page is still attached to it.
  25413. +
  25414. + 8. znode is actually removed from the memory because of the memory
  25415. + pressure, or during umount (znodes_tree_done()). Anyway, znode is
  25416. + removed by the call to zdrop(). At this moment, page is detached from
  25417. + znode and removed from the inode address space.
  25418. +
  25419. +*/
  25420. +
  25421. +#include "debug.h"
  25422. +#include "dformat.h"
  25423. +#include "key.h"
  25424. +#include "txnmgr.h"
  25425. +#include "jnode.h"
  25426. +#include "znode.h"
  25427. +#include "block_alloc.h"
  25428. +#include "tree.h"
  25429. +#include "vfs_ops.h"
  25430. +#include "inode.h"
  25431. +#include "super.h"
  25432. +#include "entd.h"
  25433. +#include "page_cache.h"
  25434. +#include "ktxnmgrd.h"
  25435. +
  25436. +#include <linux/types.h>
  25437. +#include <linux/fs.h>
  25438. +#include <linux/mm.h> /* for struct page */
  25439. +#include <linux/swap.h> /* for struct page */
  25440. +#include <linux/pagemap.h>
  25441. +#include <linux/bio.h>
  25442. +#include <linux/writeback.h>
  25443. +#include <linux/blkdev.h>
  25444. +
  25445. +static struct bio *page_bio(struct page *, jnode * , int rw, gfp_t gfp);
  25446. +
  25447. +static struct address_space_operations formatted_fake_as_ops;
  25448. +
  25449. +static const oid_t fake_ino = 0x1;
  25450. +static const oid_t bitmap_ino = 0x2;
  25451. +static const oid_t cc_ino = 0x3;
  25452. +
  25453. +static void
  25454. +init_fake_inode(struct super_block *super, struct inode *fake,
  25455. + struct inode **pfake)
  25456. +{
  25457. + assert("nikita-2168", fake->i_state & I_NEW);
  25458. + fake->i_mapping->a_ops = &formatted_fake_as_ops;
  25459. + inode_attach_wb(fake, NULL);
  25460. + *pfake = fake;
  25461. + /* NOTE-NIKITA something else? */
  25462. + unlock_new_inode(fake);
  25463. +}
  25464. +
  25465. +/**
  25466. + * reiser4_init_formatted_fake - iget inodes for formatted nodes and bitmaps
  25467. + * @super: super block to init fake inode for
  25468. + *
  25469. + * Initializes fake inode to which formatted nodes are bound in the page cache
  25470. + * and inode for bitmaps.
  25471. + */
  25472. +int reiser4_init_formatted_fake(struct super_block *super)
  25473. +{
  25474. + struct inode *fake;
  25475. + struct inode *bitmap;
  25476. + struct inode *cc;
  25477. + reiser4_super_info_data *sinfo;
  25478. +
  25479. + assert("nikita-1703", super != NULL);
  25480. +
  25481. + sinfo = get_super_private_nocheck(super);
  25482. + fake = iget_locked(super, oid_to_ino(fake_ino));
  25483. +
  25484. + if (fake != NULL) {
  25485. + init_fake_inode(super, fake, &sinfo->fake);
  25486. +
  25487. + bitmap = iget_locked(super, oid_to_ino(bitmap_ino));
  25488. + if (bitmap != NULL) {
  25489. + init_fake_inode(super, bitmap, &sinfo->bitmap);
  25490. +
  25491. + cc = iget_locked(super, oid_to_ino(cc_ino));
  25492. + if (cc != NULL) {
  25493. + init_fake_inode(super, cc, &sinfo->cc);
  25494. + return 0;
  25495. + } else {
  25496. + iput(sinfo->fake);
  25497. + iput(sinfo->bitmap);
  25498. + sinfo->fake = NULL;
  25499. + sinfo->bitmap = NULL;
  25500. + }
  25501. + } else {
  25502. + iput(sinfo->fake);
  25503. + sinfo->fake = NULL;
  25504. + }
  25505. + }
  25506. + return RETERR(-ENOMEM);
  25507. +}
  25508. +
  25509. +/**
  25510. + * reiser4_done_formatted_fake - release inode used by formatted nodes and bitmaps
  25511. + * @super: super block to init fake inode for
  25512. + *
  25513. + * Releases inodes which were used as address spaces of bitmap and formatted
  25514. + * nodes.
  25515. + */
  25516. +void reiser4_done_formatted_fake(struct super_block *super)
  25517. +{
  25518. + reiser4_super_info_data *sinfo;
  25519. +
  25520. + sinfo = get_super_private_nocheck(super);
  25521. +
  25522. + if (sinfo->fake != NULL) {
  25523. + iput(sinfo->fake);
  25524. + sinfo->fake = NULL;
  25525. + }
  25526. +
  25527. + if (sinfo->bitmap != NULL) {
  25528. + iput(sinfo->bitmap);
  25529. + sinfo->bitmap = NULL;
  25530. + }
  25531. +
  25532. + if (sinfo->cc != NULL) {
  25533. + iput(sinfo->cc);
  25534. + sinfo->cc = NULL;
  25535. + }
  25536. + return;
  25537. +}
  25538. +
  25539. +void reiser4_wait_page_writeback(struct page *page)
  25540. +{
  25541. + assert("zam-783", PageLocked(page));
  25542. +
  25543. + do {
  25544. + unlock_page(page);
  25545. + wait_on_page_writeback(page);
  25546. + lock_page(page);
  25547. + } while (PageWriteback(page));
  25548. +}
  25549. +
  25550. +/* return tree @page is in */
  25551. +reiser4_tree *reiser4_tree_by_page(const struct page *page/* page to query */)
  25552. +{
  25553. + assert("nikita-2461", page != NULL);
  25554. + return &get_super_private(page->mapping->host->i_sb)->tree;
  25555. +}
  25556. +
  25557. +/* completion handler for single page bio-based read.
  25558. +
  25559. + mpage_end_io_read() would also do. But it's static.
  25560. +
  25561. +*/
  25562. +static void end_bio_single_page_read(struct bio *bio)
  25563. +{
  25564. + struct page *page;
  25565. +
  25566. + page = bio->bi_io_vec[0].bv_page;
  25567. +
  25568. + if (!bio->bi_status)
  25569. + SetPageUptodate(page);
  25570. + else {
  25571. + ClearPageUptodate(page);
  25572. + SetPageError(page);
  25573. + }
  25574. + unlock_page(page);
  25575. + bio_put(bio);
  25576. +}
  25577. +
  25578. +/* completion handler for single page bio-based write.
  25579. +
  25580. + mpage_end_io_write() would also do. But it's static.
  25581. +
  25582. +*/
  25583. +static void end_bio_single_page_write(struct bio *bio)
  25584. +{
  25585. + struct page *page;
  25586. +
  25587. + page = bio->bi_io_vec[0].bv_page;
  25588. +
  25589. + if (bio->bi_status)
  25590. + SetPageError(page);
  25591. + end_page_writeback(page);
  25592. + bio_put(bio);
  25593. +}
  25594. +
  25595. +/* ->readpage() method for formatted nodes */
  25596. +static int formatted_readpage(struct file *f UNUSED_ARG,
  25597. + struct page *page/* page to read */)
  25598. +{
  25599. + assert("nikita-2412", PagePrivate(page) && jprivate(page));
  25600. + return reiser4_page_io(page, jprivate(page), READ,
  25601. + reiser4_ctx_gfp_mask_get());
  25602. +}
  25603. +
  25604. +/**
  25605. + * reiser4_page_io - submit single-page bio request
  25606. + * @page: page to perform io for
  25607. + * @node: jnode of page
  25608. + * @rw: read or write
  25609. + * @gfp: gfp mask for bio allocation
  25610. + *
  25611. + * Submits single page read or write.
  25612. + */
  25613. +int reiser4_page_io(struct page *page, jnode *node, int rw, gfp_t gfp)
  25614. +{
  25615. + struct bio *bio;
  25616. + int result;
  25617. +
  25618. + assert("nikita-2094", page != NULL);
  25619. + assert("nikita-2226", PageLocked(page));
  25620. + assert("nikita-2634", node != NULL);
  25621. + assert("nikita-2893", rw == READ || rw == WRITE);
  25622. +
  25623. + if (rw) {
  25624. + if (unlikely(IS_RDONLY(page->mapping->host))) {
  25625. + unlock_page(page);
  25626. + return 0;
  25627. + }
  25628. + }
  25629. +
  25630. + bio = page_bio(page, node, rw, gfp);
  25631. + if (!IS_ERR(bio)) {
  25632. + if (rw == WRITE) {
  25633. + set_page_writeback(page);
  25634. + unlock_page(page);
  25635. + }
  25636. + bio_set_op_attrs(bio, rw, 0);
  25637. + submit_bio(bio);
  25638. + result = 0;
  25639. + } else {
  25640. + unlock_page(page);
  25641. + result = PTR_ERR(bio);
  25642. + }
  25643. +
  25644. + return result;
  25645. +}
  25646. +
  25647. +/* helper function to construct bio for page */
  25648. +static struct bio *page_bio(struct page *page, jnode * node, int rw, gfp_t gfp)
  25649. +{
  25650. + struct bio *bio;
  25651. + assert("nikita-2092", page != NULL);
  25652. + assert("nikita-2633", node != NULL);
  25653. +
  25654. + /* Simple implementation in the assumption that blocksize == pagesize.
  25655. +
  25656. + We only have to submit one block, but submit_bh() will allocate bio
  25657. + anyway, so lets use all the bells-and-whistles of bio code.
  25658. + */
  25659. +
  25660. + bio = bio_alloc(gfp, 1);
  25661. + if (bio != NULL) {
  25662. + int blksz;
  25663. + struct super_block *super;
  25664. + reiser4_block_nr blocknr;
  25665. +
  25666. + super = page->mapping->host->i_sb;
  25667. + assert("nikita-2029", super != NULL);
  25668. + blksz = super->s_blocksize;
  25669. + assert("nikita-2028", blksz == (int)PAGE_SIZE);
  25670. +
  25671. + spin_lock_jnode(node);
  25672. + blocknr = *jnode_get_io_block(node);
  25673. + spin_unlock_jnode(node);
  25674. +
  25675. + assert("nikita-2275", blocknr != (reiser4_block_nr) 0);
  25676. + assert("nikita-2276", !reiser4_blocknr_is_fake(&blocknr));
  25677. +
  25678. + bio_set_dev(bio, super->s_bdev);
  25679. + /* fill bio->bi_iter.bi_sector before calling bio_add_page(), because
  25680. + * q->merge_bvec_fn may want to inspect it (see
  25681. + * drivers/md/linear.c:linear_mergeable_bvec() for example. */
  25682. + bio->bi_iter.bi_sector = blocknr * (blksz >> 9);
  25683. +
  25684. + if (!bio_add_page(bio, page, blksz, 0)) {
  25685. + warning("nikita-3452",
  25686. + "Single page bio cannot be constructed");
  25687. + return ERR_PTR(RETERR(-EINVAL));
  25688. + }
  25689. +
  25690. + /* bio -> bi_idx is filled by bio_init() */
  25691. + bio->bi_end_io = (rw == READ) ?
  25692. + end_bio_single_page_read : end_bio_single_page_write;
  25693. +
  25694. + return bio;
  25695. + } else
  25696. + return ERR_PTR(RETERR(-ENOMEM));
  25697. +}
  25698. +
  25699. +#if 0
  25700. +static int can_hit_entd(reiser4_context *ctx, struct super_block *s)
  25701. +{
  25702. + if (ctx == NULL || ((unsigned long)ctx->magic) != context_magic)
  25703. + return 1;
  25704. + if (ctx->super != s)
  25705. + return 1;
  25706. + if (get_super_private(s)->entd.tsk == current)
  25707. + return 0;
  25708. + if (!lock_stack_isclean(&ctx->stack))
  25709. + return 0;
  25710. + if (ctx->trans->atom != NULL)
  25711. + return 0;
  25712. + return 1;
  25713. +}
  25714. +#endif
  25715. +
  25716. +/**
  25717. + * reiser4_writepage - writepage of struct address_space_operations
  25718. + * @page: page to write
  25719. + * @wbc:
  25720. + *
  25721. + *
  25722. + */
  25723. +/* Common memory pressure notification. */
  25724. +int reiser4_writepage(struct page *page,
  25725. + struct writeback_control *wbc)
  25726. +{
  25727. + /*
  25728. + * assert("edward-1562",
  25729. + * can_hit_entd(get_current_context_check(), sb));
  25730. + */
  25731. + assert("vs-828", PageLocked(page));
  25732. +
  25733. + return write_page_by_ent(page, wbc);
  25734. +}
  25735. +
  25736. +/* ->set_page_dirty() method of formatted address_space */
  25737. +static int formatted_set_page_dirty(struct page *page)
  25738. +{
  25739. + assert("nikita-2173", page != NULL);
  25740. + BUG();
  25741. + return __set_page_dirty_nobuffers(page);
  25742. +}
  25743. +
  25744. +/* writepages method of address space operations in reiser4 is used to involve
  25745. + into transactions pages which are dirtied via mmap. Only regular files can
  25746. + have such pages. Fake inode is used to access formatted nodes via page
  25747. + cache. As formatted nodes can never be mmaped, fake inode's writepages has
  25748. + nothing to do */
  25749. +static int
  25750. +writepages_fake(struct address_space *mapping, struct writeback_control *wbc)
  25751. +{
  25752. + return 0;
  25753. +}
  25754. +
  25755. +/* address space operations for the fake inode */
  25756. +static struct address_space_operations formatted_fake_as_ops = {
  25757. + /* Perform a writeback of a single page as a memory-freeing
  25758. + * operation. */
  25759. + .writepage = reiser4_writepage,
  25760. + /* this is called to read formatted node */
  25761. + .readpage = formatted_readpage,
  25762. + /* ->sync_page() method of fake inode address space operations. Called
  25763. + from wait_on_page() and lock_page().
  25764. +
  25765. + This is most annoyingly misnomered method. Actually it is called
  25766. + from wait_on_page_bit() and lock_page() and its purpose is to
  25767. + actually start io by jabbing device drivers.
  25768. + .sync_page = block_sync_page,
  25769. + */
  25770. + /* Write back some dirty pages from this mapping. Called from sync.
  25771. + called during sync (pdflush) */
  25772. + .writepages = writepages_fake,
  25773. + /* Set a page dirty */
  25774. + .set_page_dirty = formatted_set_page_dirty,
  25775. + /* used for read-ahead. Not applicable */
  25776. + .readpages = NULL,
  25777. + .write_begin = NULL,
  25778. + .write_end = NULL,
  25779. + .bmap = NULL,
  25780. + /* called just before page is being detached from inode mapping and
  25781. + removed from memory. Called on truncate, cut/squeeze, and
  25782. + umount. */
  25783. + .invalidatepage = reiser4_invalidatepage,
  25784. + /* this is called by shrink_cache() so that file system can try to
  25785. + release objects (jnodes, buffers, journal heads) attached to page
  25786. + and, may be made page itself free-able.
  25787. + */
  25788. + .releasepage = reiser4_releasepage,
  25789. + .direct_IO = NULL,
  25790. + .migratepage = reiser4_migratepage,
  25791. + .batch_lock_tabu = 1
  25792. +};
  25793. +
  25794. +/* called just before page is released (no longer used by reiser4). Callers:
  25795. + jdelete() and extent2tail(). */
  25796. +void reiser4_drop_page(struct page *page)
  25797. +{
  25798. + assert("nikita-2181", PageLocked(page));
  25799. + clear_page_dirty_for_io(page);
  25800. + ClearPageUptodate(page);
  25801. +#if defined(PG_skipped)
  25802. + ClearPageSkipped(page);
  25803. +#endif
  25804. + unlock_page(page);
  25805. +}
  25806. +
  25807. +#define JNODE_GANG_SIZE (16)
  25808. +
  25809. +/* find all jnodes from range specified and invalidate them */
  25810. +static int
  25811. +truncate_jnodes_range(struct inode *inode, pgoff_t from, pgoff_t count)
  25812. +{
  25813. + reiser4_inode *info;
  25814. + int truncated_jnodes;
  25815. + reiser4_tree *tree;
  25816. + unsigned long index;
  25817. + unsigned long end;
  25818. +
  25819. + if (inode_file_plugin(inode) ==
  25820. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID))
  25821. + /*
  25822. + * No need to get rid of jnodes here: if the single jnode of
  25823. + * page cluster did not have page, then it was found and killed
  25824. + * before in
  25825. + * truncate_complete_page_cluster()->jput()->jput_final(),
  25826. + * otherwise it will be dropped by reiser4_invalidatepage()
  25827. + */
  25828. + return 0;
  25829. + truncated_jnodes = 0;
  25830. +
  25831. + info = reiser4_inode_data(inode);
  25832. + tree = reiser4_tree_by_inode(inode);
  25833. +
  25834. + index = from;
  25835. + end = from + count;
  25836. +
  25837. + while (1) {
  25838. + jnode *gang[JNODE_GANG_SIZE];
  25839. + int taken;
  25840. + int i;
  25841. + jnode *node;
  25842. +
  25843. + assert("nikita-3466", index <= end);
  25844. +
  25845. + read_lock_tree(tree);
  25846. + taken =
  25847. + radix_tree_gang_lookup(jnode_tree_by_reiser4_inode(info),
  25848. + (void **)gang, index,
  25849. + JNODE_GANG_SIZE);
  25850. + for (i = 0; i < taken; ++i) {
  25851. + node = gang[i];
  25852. + if (index_jnode(node) < end)
  25853. + jref(node);
  25854. + else
  25855. + gang[i] = NULL;
  25856. + }
  25857. + read_unlock_tree(tree);
  25858. +
  25859. + for (i = 0; i < taken; ++i) {
  25860. + node = gang[i];
  25861. + if (node != NULL) {
  25862. + index = max(index, index_jnode(node));
  25863. + spin_lock_jnode(node);
  25864. + assert("edward-1457", node->pg == NULL);
  25865. + /* this is always called after
  25866. + truncate_inode_pages_range(). Therefore, here
  25867. + jnode can not have page. New pages can not be
  25868. + created because truncate_jnodes_range goes
  25869. + under exclusive access on file obtained,
  25870. + where as new page creation requires
  25871. + non-exclusive access obtained */
  25872. + JF_SET(node, JNODE_HEARD_BANSHEE);
  25873. + reiser4_uncapture_jnode(node);
  25874. + unhash_unformatted_jnode(node);
  25875. + truncated_jnodes++;
  25876. + jput(node);
  25877. + } else
  25878. + break;
  25879. + }
  25880. + if (i != taken || taken == 0)
  25881. + break;
  25882. + }
  25883. + return truncated_jnodes;
  25884. +}
  25885. +
  25886. +/* Truncating files in reiser4: problems and solutions.
  25887. +
  25888. + VFS calls fs's truncate after it has called truncate_inode_pages()
  25889. + to get rid of pages corresponding to part of file being truncated.
  25890. + In reiser4 it may cause existence of unallocated extents which do
  25891. + not have jnodes. Flush code does not expect that. Solution of this
  25892. + problem is straightforward. As vfs's truncate is implemented using
  25893. + setattr operation, it seems reasonable to have ->setattr() that
  25894. + will cut file body. However, flush code also does not expect dirty
  25895. + pages without parent items, so it is impossible to cut all items,
  25896. + then truncate all pages in two steps. We resolve this problem by
  25897. + cutting items one-by-one. Each such fine-grained step performed
  25898. + under longterm znode lock calls at the end ->kill_hook() method of
  25899. + a killed item to remove its binded pages and jnodes.
  25900. +
  25901. + The following function is a common part of mentioned kill hooks.
  25902. + Also, this is called before tail-to-extent conversion (to not manage
  25903. + few copies of the data).
  25904. +*/
  25905. +void reiser4_invalidate_pages(struct address_space *mapping, pgoff_t from,
  25906. + unsigned long count, int even_cows)
  25907. +{
  25908. + loff_t from_bytes, count_bytes;
  25909. +
  25910. + if (count == 0)
  25911. + return;
  25912. + from_bytes = ((loff_t) from) << PAGE_SHIFT;
  25913. + count_bytes = ((loff_t) count) << PAGE_SHIFT;
  25914. +
  25915. + unmap_mapping_range(mapping, from_bytes, count_bytes, even_cows);
  25916. + truncate_inode_pages_range(mapping, from_bytes,
  25917. + from_bytes + count_bytes - 1);
  25918. + truncate_jnodes_range(mapping->host, from, count);
  25919. +}
  25920. +
  25921. +/*
  25922. + * Local variables:
  25923. + * c-indentation-style: "K&R"
  25924. + * mode-name: "LC"
  25925. + * c-basic-offset: 8
  25926. + * tab-width: 8
  25927. + * fill-column: 120
  25928. + * scroll-step: 1
  25929. + * End:
  25930. + */
  25931. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/page_cache.h linux-5.16.14/fs/reiser4/page_cache.h
  25932. --- linux-5.16.14.orig/fs/reiser4/page_cache.h 1970-01-01 01:00:00.000000000 +0100
  25933. +++ linux-5.16.14/fs/reiser4/page_cache.h 2022-03-12 13:26:19.662892753 +0100
  25934. @@ -0,0 +1,62 @@
  25935. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  25936. + * reiser4/README */
  25937. +/* Memory pressure hooks. Fake inodes handling. See page_cache.c. */
  25938. +
  25939. +#if !defined(__REISER4_PAGE_CACHE_H__)
  25940. +#define __REISER4_PAGE_CACHE_H__
  25941. +
  25942. +#include "forward.h"
  25943. +#include "context.h" /* for reiser4_ctx_gfp_mask_get() */
  25944. +
  25945. +#include <linux/fs.h> /* for struct super_block, address_space */
  25946. +#include <linux/mm.h> /* for struct page */
  25947. +#include <linux/pagemap.h> /* for lock_page() */
  25948. +#include <linux/vmalloc.h> /* for __vmalloc() */
  25949. +
  25950. +extern int reiser4_init_formatted_fake(struct super_block *);
  25951. +extern void reiser4_done_formatted_fake(struct super_block *);
  25952. +
  25953. +extern reiser4_tree *reiser4_tree_by_page(const struct page *);
  25954. +
  25955. +extern void reiser4_wait_page_writeback(struct page *);
  25956. +static inline void lock_and_wait_page_writeback(struct page *page)
  25957. +{
  25958. + lock_page(page);
  25959. + if (unlikely(PageWriteback(page)))
  25960. + reiser4_wait_page_writeback(page);
  25961. +}
  25962. +
  25963. +#define jprivate(page) ((jnode *)page_private(page))
  25964. +
  25965. +extern int reiser4_page_io(struct page *, jnode *, int rw, gfp_t);
  25966. +extern void reiser4_drop_page(struct page *);
  25967. +extern void reiser4_invalidate_pages(struct address_space *, pgoff_t from,
  25968. + unsigned long count, int even_cows);
  25969. +extern void capture_reiser4_inodes(struct super_block *,
  25970. + struct writeback_control *);
  25971. +static inline void *reiser4_vmalloc(unsigned long size)
  25972. +{
  25973. + return __vmalloc(size, reiser4_ctx_gfp_mask_get());
  25974. +}
  25975. +
  25976. +#define PAGECACHE_TAG_REISER4_MOVED PAGECACHE_TAG_DIRTY
  25977. +
  25978. +#if REISER4_DEBUG
  25979. +extern void print_page(const char *prefix, struct page *page);
  25980. +#else
  25981. +#define print_page(prf, p) noop
  25982. +#endif
  25983. +
  25984. +/* __REISER4_PAGE_CACHE_H__ */
  25985. +#endif
  25986. +
  25987. +/* Make Linus happy.
  25988. + Local variables:
  25989. + c-indentation-style: "K&R"
  25990. + mode-name: "LC"
  25991. + c-basic-offset: 8
  25992. + tab-width: 8
  25993. + fill-column: 120
  25994. + scroll-step: 1
  25995. + End:
  25996. +*/
  25997. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/cluster.c linux-5.16.14/fs/reiser4/plugin/cluster.c
  25998. --- linux-5.16.14.orig/fs/reiser4/plugin/cluster.c 1970-01-01 01:00:00.000000000 +0100
  25999. +++ linux-5.16.14/fs/reiser4/plugin/cluster.c 2022-03-12 13:26:19.662892753 +0100
  26000. @@ -0,0 +1,72 @@
  26001. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  26002. + * reiser4/README */
  26003. +
  26004. +/* Contains reiser4 cluster plugins (see
  26005. + http://www.namesys.com/cryptcompress_design.html
  26006. + "Concepts of clustering" for details). */
  26007. +
  26008. +#include "plugin_header.h"
  26009. +#include "plugin.h"
  26010. +#include "../inode.h"
  26011. +
  26012. +static int change_cluster(struct inode *inode,
  26013. + reiser4_plugin * plugin,
  26014. + pset_member memb)
  26015. +{
  26016. + assert("edward-1324", inode != NULL);
  26017. + assert("edward-1325", plugin != NULL);
  26018. + assert("edward-1326", is_reiser4_inode(inode));
  26019. + assert("edward-1327", plugin->h.type_id == REISER4_CLUSTER_PLUGIN_TYPE);
  26020. +
  26021. + /* Can't change the cluster plugin for already existent regular files */
  26022. + if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
  26023. + return RETERR(-EINVAL);
  26024. +
  26025. + /* If matches, nothing to change. */
  26026. + if (inode_hash_plugin(inode) != NULL &&
  26027. + inode_hash_plugin(inode)->h.id == plugin->h.id)
  26028. + return 0;
  26029. +
  26030. + return aset_set_unsafe(&reiser4_inode_data(inode)->pset,
  26031. + PSET_CLUSTER, plugin);
  26032. +}
  26033. +
  26034. +static reiser4_plugin_ops cluster_plugin_ops = {
  26035. + .init = NULL,
  26036. + .load = NULL,
  26037. + .save_len = NULL,
  26038. + .save = NULL,
  26039. + .change = &change_cluster
  26040. +};
  26041. +
  26042. +#define SUPPORT_CLUSTER(SHIFT, ID, LABEL, DESC) \
  26043. + [CLUSTER_ ## ID ## _ID] = { \
  26044. + .h = { \
  26045. + .type_id = REISER4_CLUSTER_PLUGIN_TYPE, \
  26046. + .id = CLUSTER_ ## ID ## _ID, \
  26047. + .pops = &cluster_plugin_ops, \
  26048. + .label = LABEL, \
  26049. + .desc = DESC, \
  26050. + .linkage = {NULL, NULL} \
  26051. + }, \
  26052. + .shift = SHIFT \
  26053. + }
  26054. +
  26055. +cluster_plugin cluster_plugins[LAST_CLUSTER_ID] = {
  26056. + SUPPORT_CLUSTER(16, 64K, "64K", "Large"),
  26057. + SUPPORT_CLUSTER(15, 32K, "32K", "Big"),
  26058. + SUPPORT_CLUSTER(14, 16K, "16K", "Average"),
  26059. + SUPPORT_CLUSTER(13, 8K, "8K", "Small"),
  26060. + SUPPORT_CLUSTER(12, 4K, "4K", "Minimal")
  26061. +};
  26062. +
  26063. +/*
  26064. + Local variables:
  26065. + c-indentation-style: "K&R"
  26066. + mode-name: "LC"
  26067. + c-basic-offset: 8
  26068. + tab-width: 8
  26069. + fill-column: 120
  26070. + scroll-step: 1
  26071. + End:
  26072. +*/
  26073. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/cluster.h linux-5.16.14/fs/reiser4/plugin/cluster.h
  26074. --- linux-5.16.14.orig/fs/reiser4/plugin/cluster.h 1970-01-01 01:00:00.000000000 +0100
  26075. +++ linux-5.16.14/fs/reiser4/plugin/cluster.h 2022-03-12 13:26:19.662892753 +0100
  26076. @@ -0,0 +1,410 @@
  26077. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  26078. +
  26079. +/* This file contains size/offset translators, modulators
  26080. + and other helper functions. */
  26081. +
  26082. +#if !defined(__FS_REISER4_CLUSTER_H__)
  26083. +#define __FS_REISER4_CLUSTER_H__
  26084. +
  26085. +#include "../inode.h"
  26086. +
  26087. +static inline int inode_cluster_shift(struct inode *inode)
  26088. +{
  26089. + assert("edward-92", inode != NULL);
  26090. + assert("edward-93", reiser4_inode_data(inode) != NULL);
  26091. +
  26092. + return inode_cluster_plugin(inode)->shift;
  26093. +}
  26094. +
  26095. +static inline unsigned cluster_nrpages_shift(struct inode *inode)
  26096. +{
  26097. + return inode_cluster_shift(inode) - PAGE_SHIFT;
  26098. +}
  26099. +
  26100. +/* cluster size in page units */
  26101. +static inline unsigned cluster_nrpages(struct inode *inode)
  26102. +{
  26103. + return 1U << cluster_nrpages_shift(inode);
  26104. +}
  26105. +
  26106. +static inline size_t inode_cluster_size(struct inode *inode)
  26107. +{
  26108. + assert("edward-96", inode != NULL);
  26109. +
  26110. + return 1U << inode_cluster_shift(inode);
  26111. +}
  26112. +
  26113. +static inline cloff_t pg_to_clust(pgoff_t idx, struct inode *inode)
  26114. +{
  26115. + return idx >> cluster_nrpages_shift(inode);
  26116. +}
  26117. +
  26118. +static inline pgoff_t clust_to_pg(cloff_t idx, struct inode *inode)
  26119. +{
  26120. + return idx << cluster_nrpages_shift(inode);
  26121. +}
  26122. +
  26123. +static inline pgoff_t pg_to_clust_to_pg(pgoff_t idx, struct inode *inode)
  26124. +{
  26125. + return clust_to_pg(pg_to_clust(idx, inode), inode);
  26126. +}
  26127. +
  26128. +static inline pgoff_t off_to_pg(loff_t off)
  26129. +{
  26130. + return (off >> PAGE_SHIFT);
  26131. +}
  26132. +
  26133. +static inline loff_t pg_to_off(pgoff_t idx)
  26134. +{
  26135. + return ((loff_t) (idx) << PAGE_SHIFT);
  26136. +}
  26137. +
  26138. +static inline cloff_t off_to_clust(loff_t off, struct inode *inode)
  26139. +{
  26140. + return off >> inode_cluster_shift(inode);
  26141. +}
  26142. +
  26143. +static inline loff_t clust_to_off(cloff_t idx, struct inode *inode)
  26144. +{
  26145. + return (loff_t) idx << inode_cluster_shift(inode);
  26146. +}
  26147. +
  26148. +static inline loff_t off_to_clust_to_off(loff_t off, struct inode *inode)
  26149. +{
  26150. + return clust_to_off(off_to_clust(off, inode), inode);
  26151. +}
  26152. +
  26153. +static inline pgoff_t off_to_clust_to_pg(loff_t off, struct inode *inode)
  26154. +{
  26155. + return clust_to_pg(off_to_clust(off, inode), inode);
  26156. +}
  26157. +
  26158. +static inline unsigned off_to_pgoff(loff_t off)
  26159. +{
  26160. + return off & (PAGE_SIZE - 1);
  26161. +}
  26162. +
  26163. +static inline unsigned off_to_cloff(loff_t off, struct inode *inode)
  26164. +{
  26165. + return off & ((loff_t) (inode_cluster_size(inode)) - 1);
  26166. +}
  26167. +
  26168. +static inline pgoff_t offset_in_clust(struct page *page)
  26169. +{
  26170. + assert("edward-1488", page != NULL);
  26171. + assert("edward-1489", page->mapping != NULL);
  26172. +
  26173. + return page_index(page) & ((cluster_nrpages(page->mapping->host)) - 1);
  26174. +}
  26175. +
  26176. +static inline int first_page_in_cluster(struct page *page)
  26177. +{
  26178. + return offset_in_clust(page) == 0;
  26179. +}
  26180. +
  26181. +static inline int last_page_in_cluster(struct page *page)
  26182. +{
  26183. + return offset_in_clust(page) ==
  26184. + cluster_nrpages(page->mapping->host) - 1;
  26185. +}
  26186. +
  26187. +static inline unsigned
  26188. +pg_to_off_to_cloff(unsigned long idx, struct inode *inode)
  26189. +{
  26190. + return off_to_cloff(pg_to_off(idx), inode);
  26191. +}
  26192. +
  26193. +/*********************** Size translators **************************/
  26194. +
  26195. +/* Translate linear size.
  26196. + * New units are (1 << @blk_shift) times larger, then old ones.
  26197. + * In other words, calculate number of logical blocks, occupied
  26198. + * by @count elements
  26199. + */
  26200. +static inline unsigned long size_in_blocks(loff_t count, unsigned blkbits)
  26201. +{
  26202. + return (count + (1UL << blkbits) - 1) >> blkbits;
  26203. +}
  26204. +
  26205. +/* size in pages */
  26206. +static inline pgoff_t size_in_pages(loff_t size)
  26207. +{
  26208. + return size_in_blocks(size, PAGE_SHIFT);
  26209. +}
  26210. +
  26211. +/* size in logical clusters */
  26212. +static inline cloff_t size_in_lc(loff_t size, struct inode *inode)
  26213. +{
  26214. + return size_in_blocks(size, inode_cluster_shift(inode));
  26215. +}
  26216. +
  26217. +/* size in pages to the size in page clusters */
  26218. +static inline cloff_t sp_to_spcl(pgoff_t size, struct inode *inode)
  26219. +{
  26220. + return size_in_blocks(size, cluster_nrpages_shift(inode));
  26221. +}
  26222. +
  26223. +/*********************** Size modulators ***************************/
  26224. +
  26225. +/*
  26226. + Modulate linear size by nominated block size and offset.
  26227. +
  26228. + The "finite" function (which is zero almost everywhere).
  26229. + How much is a height of the figure at a position @pos,
  26230. + when trying to construct rectangle of height (1 << @blkbits),
  26231. + and square @size.
  26232. +
  26233. + ******
  26234. + *******
  26235. + *******
  26236. + *******
  26237. + ----------> pos
  26238. +*/
  26239. +static inline unsigned __mbb(loff_t size, unsigned long pos, int blkbits)
  26240. +{
  26241. + unsigned end = size >> blkbits;
  26242. + if (pos < end)
  26243. + return 1U << blkbits;
  26244. + if (unlikely(pos > end))
  26245. + return 0;
  26246. + return size & ~(~0ull << blkbits);
  26247. +}
  26248. +
  26249. +/* the same as above, but block size is page size */
  26250. +static inline unsigned __mbp(loff_t size, pgoff_t pos)
  26251. +{
  26252. + return __mbb(size, pos, PAGE_SHIFT);
  26253. +}
  26254. +
  26255. +/* number of file's bytes in the nominated logical cluster */
  26256. +static inline unsigned lbytes(cloff_t index, struct inode *inode)
  26257. +{
  26258. + return __mbb(i_size_read(inode), index, inode_cluster_shift(inode));
  26259. +}
  26260. +
  26261. +/* number of file's bytes in the nominated page */
  26262. +static inline unsigned pbytes(pgoff_t index, struct inode *inode)
  26263. +{
  26264. + return __mbp(i_size_read(inode), index);
  26265. +}
  26266. +
  26267. +/**
  26268. + * number of pages occuped by @win->count bytes starting from
  26269. + * @win->off at logical cluster defined by @win. This is exactly
  26270. + * a number of pages to be modified and dirtied in any cluster operation.
  26271. + */
  26272. +static inline pgoff_t win_count_to_nrpages(struct reiser4_slide * win)
  26273. +{
  26274. + return ((win->off + win->count +
  26275. + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT) -
  26276. + off_to_pg(win->off);
  26277. +}
  26278. +
  26279. +/* return true, if logical cluster is not occupied by the file */
  26280. +static inline int new_logical_cluster(struct cluster_handle *clust,
  26281. + struct inode *inode)
  26282. +{
  26283. + return clust_to_off(clust->index, inode) >= i_size_read(inode);
  26284. +}
  26285. +
  26286. +/* return true, if pages @p1 and @p2 are of the same page cluster */
  26287. +static inline int same_page_cluster(struct page *p1, struct page *p2)
  26288. +{
  26289. + assert("edward-1490", p1 != NULL);
  26290. + assert("edward-1491", p2 != NULL);
  26291. + assert("edward-1492", p1->mapping != NULL);
  26292. + assert("edward-1493", p2->mapping != NULL);
  26293. +
  26294. + return (pg_to_clust(page_index(p1), p1->mapping->host) ==
  26295. + pg_to_clust(page_index(p2), p2->mapping->host));
  26296. +}
  26297. +
  26298. +static inline int cluster_is_complete(struct cluster_handle *clust,
  26299. + struct inode *inode)
  26300. +{
  26301. + return clust->tc.lsize == inode_cluster_size(inode);
  26302. +}
  26303. +
  26304. +static inline void reiser4_slide_init(struct reiser4_slide *win)
  26305. +{
  26306. + assert("edward-1084", win != NULL);
  26307. + memset(win, 0, sizeof *win);
  26308. +}
  26309. +
  26310. +static inline tfm_action
  26311. +cluster_get_tfm_act(struct tfm_cluster *tc)
  26312. +{
  26313. + assert("edward-1356", tc != NULL);
  26314. + return tc->act;
  26315. +}
  26316. +
  26317. +static inline void
  26318. +cluster_set_tfm_act(struct tfm_cluster *tc, tfm_action act)
  26319. +{
  26320. + assert("edward-1356", tc != NULL);
  26321. + tc->act = act;
  26322. +}
  26323. +
  26324. +static inline void cluster_init_act(struct cluster_handle *clust,
  26325. + tfm_action act,
  26326. + struct reiser4_slide *window)
  26327. +{
  26328. + assert("edward-84", clust != NULL);
  26329. + memset(clust, 0, sizeof *clust);
  26330. + cluster_set_tfm_act(&clust->tc, act);
  26331. + clust->dstat = INVAL_DISK_CLUSTER;
  26332. + clust->win = window;
  26333. +}
  26334. +
  26335. +static inline void cluster_init_read(struct cluster_handle *clust,
  26336. + struct reiser4_slide *window)
  26337. +{
  26338. + cluster_init_act(clust, TFMA_READ, window);
  26339. +}
  26340. +
  26341. +static inline void cluster_init_write(struct cluster_handle *clust,
  26342. + struct reiser4_slide *window)
  26343. +{
  26344. + cluster_init_act(clust, TFMA_WRITE, window);
  26345. +}
  26346. +
  26347. +/* true if @p1 and @p2 are items of the same disk cluster */
  26348. +static inline int same_disk_cluster(const coord_t *p1, const coord_t *p2)
  26349. +{
  26350. + /* drop this if you have other items to aggregate */
  26351. + assert("edward-1494", item_id_by_coord(p1) == CTAIL_ID);
  26352. +
  26353. + return item_plugin_by_coord(p1)->b.mergeable(p1, p2);
  26354. +}
  26355. +
  26356. +static inline int dclust_get_extension_dsize(hint_t *hint)
  26357. +{
  26358. + return hint->ext_coord.extension.ctail.dsize;
  26359. +}
  26360. +
  26361. +static inline void dclust_set_extension_dsize(hint_t *hint, int dsize)
  26362. +{
  26363. + hint->ext_coord.extension.ctail.dsize = dsize;
  26364. +}
  26365. +
  26366. +static inline int dclust_get_extension_shift(hint_t *hint)
  26367. +{
  26368. + return hint->ext_coord.extension.ctail.shift;
  26369. +}
  26370. +
  26371. +static inline int dclust_get_extension_ncount(hint_t *hint)
  26372. +{
  26373. + return hint->ext_coord.extension.ctail.ncount;
  26374. +}
  26375. +
  26376. +static inline void dclust_inc_extension_ncount(hint_t *hint)
  26377. +{
  26378. + hint->ext_coord.extension.ctail.ncount++;
  26379. +}
  26380. +
  26381. +static inline void dclust_init_extension(hint_t *hint)
  26382. +{
  26383. + memset(&hint->ext_coord.extension.ctail, 0,
  26384. + sizeof(hint->ext_coord.extension.ctail));
  26385. +}
  26386. +
  26387. +static inline int hint_is_unprepped_dclust(hint_t *hint)
  26388. +{
  26389. + assert("edward-1451", hint_is_valid(hint));
  26390. + return dclust_get_extension_shift(hint) == (int)UCTAIL_SHIFT;
  26391. +}
  26392. +
  26393. +static inline void coord_set_between_clusters(coord_t *coord)
  26394. +{
  26395. +#if REISER4_DEBUG
  26396. + int result;
  26397. + result = zload(coord->node);
  26398. + assert("edward-1296", !result);
  26399. +#endif
  26400. + if (!coord_is_between_items(coord)) {
  26401. + coord->between = AFTER_ITEM;
  26402. + coord->unit_pos = 0;
  26403. + }
  26404. +#if REISER4_DEBUG
  26405. + zrelse(coord->node);
  26406. +#endif
  26407. +}
  26408. +
  26409. +int reiser4_inflate_cluster(struct cluster_handle *, struct inode *);
  26410. +int find_disk_cluster(struct cluster_handle *, struct inode *, int read,
  26411. + znode_lock_mode mode);
  26412. +int checkout_logical_cluster(struct cluster_handle *, jnode * , struct inode *);
  26413. +int reiser4_deflate_cluster(struct cluster_handle *, struct inode *);
  26414. +void truncate_complete_page_cluster(struct inode *inode, cloff_t start,
  26415. + int even_cows);
  26416. +void invalidate_hint_cluster(struct cluster_handle *clust);
  26417. +int get_disk_cluster_locked(struct cluster_handle *clust, struct inode *inode,
  26418. + znode_lock_mode lock_mode);
  26419. +void reset_cluster_params(struct cluster_handle *clust);
  26420. +int set_cluster_by_page(struct cluster_handle *clust, struct page *page,
  26421. + int count);
  26422. +int prepare_page_cluster(struct inode *inode, struct cluster_handle *clust,
  26423. + rw_op rw);
  26424. +void __put_page_cluster(int from, int count, struct page **pages,
  26425. + struct inode *inode);
  26426. +void put_page_cluster(struct cluster_handle *clust,
  26427. + struct inode *inode, rw_op rw);
  26428. +void put_cluster_handle(struct cluster_handle *clust);
  26429. +int grab_tfm_stream(struct inode *inode, struct tfm_cluster *tc,
  26430. + tfm_stream_id id);
  26431. +int tfm_cluster_is_uptodate(struct tfm_cluster *tc);
  26432. +void tfm_cluster_set_uptodate(struct tfm_cluster *tc);
  26433. +void tfm_cluster_clr_uptodate(struct tfm_cluster *tc);
  26434. +
  26435. +/* move cluster handle to the target position
  26436. + specified by the page of index @pgidx */
  26437. +static inline void move_cluster_forward(struct cluster_handle *clust,
  26438. + struct inode *inode,
  26439. + pgoff_t pgidx)
  26440. +{
  26441. + assert("edward-1297", clust != NULL);
  26442. + assert("edward-1298", inode != NULL);
  26443. +
  26444. + reset_cluster_params(clust);
  26445. + if (clust->index_valid &&
  26446. + /* Hole in the indices. Hint became invalid and can not be
  26447. + used by find_cluster_item() even if seal/node versions
  26448. + will coincide */
  26449. + pg_to_clust(pgidx, inode) != clust->index + 1) {
  26450. + reiser4_unset_hint(clust->hint);
  26451. + invalidate_hint_cluster(clust);
  26452. + }
  26453. + clust->index = pg_to_clust(pgidx, inode);
  26454. + clust->index_valid = 1;
  26455. +}
  26456. +
  26457. +static inline int alloc_clust_pages(struct cluster_handle *clust,
  26458. + struct inode *inode)
  26459. +{
  26460. + assert("edward-791", clust != NULL);
  26461. + assert("edward-792", inode != NULL);
  26462. + clust->pages =
  26463. + kmalloc(sizeof(*clust->pages) << inode_cluster_shift(inode),
  26464. + reiser4_ctx_gfp_mask_get());
  26465. + if (!clust->pages)
  26466. + return -ENOMEM;
  26467. + return 0;
  26468. +}
  26469. +
  26470. +static inline void free_clust_pages(struct cluster_handle *clust)
  26471. +{
  26472. + kfree(clust->pages);
  26473. +}
  26474. +
  26475. +#endif /* __FS_REISER4_CLUSTER_H__ */
  26476. +
  26477. +/* Make Linus happy.
  26478. + Local variables:
  26479. + c-indentation-style: "K&R"
  26480. + mode-name: "LC"
  26481. + c-basic-offset: 8
  26482. + tab-width: 8
  26483. + fill-column: 120
  26484. + scroll-step: 1
  26485. + End:
  26486. +*/
  26487. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/compress/compress.c linux-5.16.14/fs/reiser4/plugin/compress/compress.c
  26488. --- linux-5.16.14.orig/fs/reiser4/plugin/compress/compress.c 1970-01-01 01:00:00.000000000 +0100
  26489. +++ linux-5.16.14/fs/reiser4/plugin/compress/compress.c 2022-03-12 13:26:19.662892753 +0100
  26490. @@ -0,0 +1,531 @@
  26491. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  26492. +/* reiser4 compression transform plugins */
  26493. +
  26494. +#include "../../debug.h"
  26495. +#include "../../inode.h"
  26496. +#include "../plugin.h"
  26497. +
  26498. +#include <linux/lzo.h>
  26499. +#include <linux/zstd.h>
  26500. +#include <linux/zlib.h>
  26501. +#include <linux/types.h>
  26502. +#include <linux/hardirq.h>
  26503. +
  26504. +static int change_compression(struct inode *inode,
  26505. + reiser4_plugin * plugin,
  26506. + pset_member memb)
  26507. +{
  26508. + assert("edward-1316", inode != NULL);
  26509. + assert("edward-1317", plugin != NULL);
  26510. + assert("edward-1318", is_reiser4_inode(inode));
  26511. + assert("edward-1319",
  26512. + plugin->h.type_id == REISER4_COMPRESSION_PLUGIN_TYPE);
  26513. +
  26514. + /* cannot change compression plugin of already existing regular object */
  26515. + if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
  26516. + return RETERR(-EINVAL);
  26517. +
  26518. + /* If matches, nothing to change. */
  26519. + if (inode_hash_plugin(inode) != NULL &&
  26520. + inode_hash_plugin(inode)->h.id == plugin->h.id)
  26521. + return 0;
  26522. +
  26523. + return aset_set_unsafe(&reiser4_inode_data(inode)->pset,
  26524. + PSET_COMPRESSION, plugin);
  26525. +}
  26526. +
  26527. +static reiser4_plugin_ops compression_plugin_ops = {
  26528. + .init = NULL,
  26529. + .load = NULL,
  26530. + .save_len = NULL,
  26531. + .save = NULL,
  26532. + .change = &change_compression
  26533. +};
  26534. +
  26535. +/******************************************************************************/
  26536. +/* gzip1 compression */
  26537. +/******************************************************************************/
  26538. +
  26539. +#define GZIP1_DEF_LEVEL Z_BEST_SPEED
  26540. +#define GZIP1_DEF_WINBITS 15
  26541. +#define GZIP1_DEF_MEMLEVEL MAX_MEM_LEVEL
  26542. +#define ZSTD_DEF_LEVEL 3
  26543. +
  26544. +static int gzip1_init(void)
  26545. +{
  26546. + return 0;
  26547. +}
  26548. +
  26549. +static int gzip1_overrun(unsigned src_len UNUSED_ARG)
  26550. +{
  26551. + return 0;
  26552. +}
  26553. +
  26554. +static coa_t gzip1_alloc(tfm_action act)
  26555. +{
  26556. + coa_t coa = NULL;
  26557. + int ret = 0;
  26558. + switch (act) {
  26559. + case TFMA_WRITE: /* compress */
  26560. + coa = reiser4_vmalloc(zlib_deflate_workspacesize(MAX_WBITS,
  26561. + MAX_MEM_LEVEL));
  26562. + if (!coa) {
  26563. + ret = -ENOMEM;
  26564. + break;
  26565. + }
  26566. + break;
  26567. + case TFMA_READ: /* decompress */
  26568. + coa = reiser4_vmalloc(zlib_inflate_workspacesize());
  26569. + if (!coa) {
  26570. + ret = -ENOMEM;
  26571. + break;
  26572. + }
  26573. + break;
  26574. + default:
  26575. + impossible("edward-767", "unknown tfm action");
  26576. + }
  26577. + if (ret)
  26578. + return ERR_PTR(ret);
  26579. + return coa;
  26580. +}
  26581. +
  26582. +static void gzip1_free(coa_t coa, tfm_action act)
  26583. +{
  26584. + assert("edward-769", coa != NULL);
  26585. +
  26586. + switch (act) {
  26587. + case TFMA_WRITE: /* compress */
  26588. + vfree(coa);
  26589. + break;
  26590. + case TFMA_READ: /* decompress */
  26591. + vfree(coa);
  26592. + break;
  26593. + default:
  26594. + impossible("edward-770", "unknown tfm action");
  26595. + }
  26596. + return;
  26597. +}
  26598. +
  26599. +static int gzip1_min_size_deflate(void)
  26600. +{
  26601. + return 64;
  26602. +}
  26603. +
  26604. +static void
  26605. +gzip1_compress(coa_t coa, __u8 * src_first, size_t src_len,
  26606. + __u8 * dst_first, size_t *dst_len)
  26607. +{
  26608. + int ret = 0;
  26609. + struct z_stream_s stream;
  26610. +
  26611. + assert("edward-842", coa != NULL);
  26612. + assert("edward-875", src_len != 0);
  26613. +
  26614. + stream.workspace = coa;
  26615. + ret = zlib_deflateInit2(&stream, GZIP1_DEF_LEVEL, Z_DEFLATED,
  26616. + -GZIP1_DEF_WINBITS, GZIP1_DEF_MEMLEVEL,
  26617. + Z_DEFAULT_STRATEGY);
  26618. + if (ret != Z_OK) {
  26619. + warning("edward-771", "zlib_deflateInit2 returned %d\n", ret);
  26620. + goto rollback;
  26621. + }
  26622. + ret = zlib_deflateReset(&stream);
  26623. + if (ret != Z_OK) {
  26624. + warning("edward-772", "zlib_deflateReset returned %d\n", ret);
  26625. + goto rollback;
  26626. + }
  26627. + stream.next_in = src_first;
  26628. + stream.avail_in = src_len;
  26629. + stream.next_out = dst_first;
  26630. + stream.avail_out = *dst_len;
  26631. +
  26632. + ret = zlib_deflate(&stream, Z_FINISH);
  26633. + if (ret != Z_STREAM_END) {
  26634. + if (ret != Z_OK)
  26635. + warning("edward-773",
  26636. + "zlib_deflate returned %d\n", ret);
  26637. + goto rollback;
  26638. + }
  26639. + *dst_len = stream.total_out;
  26640. + return;
  26641. + rollback:
  26642. + *dst_len = src_len;
  26643. + return;
  26644. +}
  26645. +
  26646. +static void
  26647. +gzip1_decompress(coa_t coa, __u8 * src_first, size_t src_len,
  26648. + __u8 * dst_first, size_t *dst_len)
  26649. +{
  26650. + int ret = 0;
  26651. + struct z_stream_s stream;
  26652. +
  26653. + assert("edward-843", coa != NULL);
  26654. + assert("edward-876", src_len != 0);
  26655. +
  26656. + stream.workspace = coa;
  26657. + ret = zlib_inflateInit2(&stream, -GZIP1_DEF_WINBITS);
  26658. + if (ret != Z_OK) {
  26659. + warning("edward-774", "zlib_inflateInit2 returned %d\n", ret);
  26660. + return;
  26661. + }
  26662. + ret = zlib_inflateReset(&stream);
  26663. + if (ret != Z_OK) {
  26664. + warning("edward-775", "zlib_inflateReset returned %d\n", ret);
  26665. + return;
  26666. + }
  26667. +
  26668. + stream.next_in = src_first;
  26669. + stream.avail_in = src_len;
  26670. + stream.next_out = dst_first;
  26671. + stream.avail_out = *dst_len;
  26672. +
  26673. + ret = zlib_inflate(&stream, Z_SYNC_FLUSH);
  26674. + /*
  26675. + * Work around a bug in zlib, which sometimes wants to taste an extra
  26676. + * byte when being used in the (undocumented) raw deflate mode.
  26677. + * (From USAGI).
  26678. + */
  26679. + if (ret == Z_OK && !stream.avail_in && stream.avail_out) {
  26680. + u8 zerostuff = 0;
  26681. + stream.next_in = &zerostuff;
  26682. + stream.avail_in = 1;
  26683. + ret = zlib_inflate(&stream, Z_FINISH);
  26684. + }
  26685. + if (ret != Z_STREAM_END) {
  26686. + warning("edward-776", "zlib_inflate returned %d\n", ret);
  26687. + return;
  26688. + }
  26689. + *dst_len = stream.total_out;
  26690. + return;
  26691. +}
  26692. +
  26693. +/******************************************************************************/
  26694. +/* lzo1 compression */
  26695. +/******************************************************************************/
  26696. +
  26697. +static int lzo1_init(void)
  26698. +{
  26699. + return 0;
  26700. +}
  26701. +
  26702. +static int lzo1_overrun(unsigned in_len)
  26703. +{
  26704. + return in_len / 16 + 64 + 3;
  26705. +}
  26706. +
  26707. +static coa_t lzo1_alloc(tfm_action act)
  26708. +{
  26709. + int ret = 0;
  26710. + coa_t coa = NULL;
  26711. +
  26712. + switch (act) {
  26713. + case TFMA_WRITE: /* compress */
  26714. + coa = reiser4_vmalloc(LZO1X_1_MEM_COMPRESS);
  26715. + if (!coa) {
  26716. + ret = -ENOMEM;
  26717. + break;
  26718. + }
  26719. + case TFMA_READ: /* decompress */
  26720. + break;
  26721. + default:
  26722. + impossible("edward-877", "unknown tfm action");
  26723. + }
  26724. + if (ret)
  26725. + return ERR_PTR(ret);
  26726. + return coa;
  26727. +}
  26728. +
  26729. +static void lzo1_free(coa_t coa, tfm_action act)
  26730. +{
  26731. + assert("edward-879", coa != NULL);
  26732. +
  26733. + switch (act) {
  26734. + case TFMA_WRITE: /* compress */
  26735. + vfree(coa);
  26736. + break;
  26737. + case TFMA_READ: /* decompress */
  26738. + impossible("edward-1304",
  26739. + "trying to free non-allocated workspace");
  26740. + default:
  26741. + impossible("edward-880", "unknown tfm action");
  26742. + }
  26743. + return;
  26744. +}
  26745. +
  26746. +static int lzo1_min_size_deflate(void)
  26747. +{
  26748. + return 256;
  26749. +}
  26750. +
  26751. +static void
  26752. +lzo1_compress(coa_t coa, __u8 * src_first, size_t src_len,
  26753. + __u8 * dst_first, size_t *dst_len)
  26754. +{
  26755. + int result;
  26756. +
  26757. + assert("edward-846", coa != NULL);
  26758. + assert("edward-847", src_len != 0);
  26759. +
  26760. + result = lzo1x_1_compress(src_first, src_len, dst_first, dst_len, coa);
  26761. + if (unlikely(result != LZO_E_OK)) {
  26762. + warning("edward-849", "lzo1x_1_compress failed\n");
  26763. + goto out;
  26764. + }
  26765. + if (*dst_len >= src_len) {
  26766. + //warning("edward-850", "lzo1x_1_compress: incompressible data\n");
  26767. + goto out;
  26768. + }
  26769. + return;
  26770. + out:
  26771. + *dst_len = src_len;
  26772. + return;
  26773. +}
  26774. +
  26775. +static void
  26776. +lzo1_decompress(coa_t coa, __u8 * src_first, size_t src_len,
  26777. + __u8 * dst_first, size_t *dst_len)
  26778. +{
  26779. + int result;
  26780. +
  26781. + assert("edward-851", coa == NULL);
  26782. + assert("edward-852", src_len != 0);
  26783. +
  26784. + result = lzo1x_decompress_safe(src_first, src_len, dst_first, dst_len);
  26785. + if (result != LZO_E_OK)
  26786. + warning("edward-853", "lzo1x_1_decompress failed\n");
  26787. + return;
  26788. +}
  26789. +
  26790. +/******************************************************************************/
  26791. +/* zstd1 compression */
  26792. +/******************************************************************************/
  26793. +
  26794. +typedef struct {
  26795. + void *workspace;
  26796. + zstd_cctx *cctx;
  26797. +} zstd1_coa_c;
  26798. +
  26799. +typedef struct {
  26800. + void *workspace;
  26801. + zstd_dctx *dctx;
  26802. +} zstd1_coa_d;
  26803. +
  26804. +static int zstd1_init(void)
  26805. +{
  26806. + return 0;
  26807. +}
  26808. +
  26809. +static int zstd1_overrun(unsigned src_len UNUSED_ARG)
  26810. +{
  26811. + return zstd_compress_bound(src_len) - src_len;
  26812. +}
  26813. +
  26814. +static zstd_parameters zstd_params(void)
  26815. +{
  26816. + return zstd_get_params(ZSTD_DEF_LEVEL, 0);
  26817. +}
  26818. +
  26819. +static coa_t zstd1_alloc(tfm_action act)
  26820. +{
  26821. + int ret = 0;
  26822. + size_t workspace_size;
  26823. + coa_t coa = NULL;
  26824. + const zstd_parameters params = zstd_params();
  26825. +
  26826. + switch (act) {
  26827. + case TFMA_WRITE: /* compress */
  26828. + coa = reiser4_vmalloc(sizeof(zstd1_coa_c));
  26829. + if (!coa) {
  26830. + ret = -ENOMEM;
  26831. + break;
  26832. + }
  26833. + workspace_size = zstd_cctx_workspace_bound(&params.cParams);
  26834. + if (zstd_is_error(workspace_size)) {
  26835. + ret = -EINVAL;
  26836. + break;
  26837. + }
  26838. + ((zstd1_coa_c*)coa)->workspace = reiser4_vmalloc(workspace_size);
  26839. + if (!(((zstd1_coa_c*)coa)->workspace)) {
  26840. + ret = -ENOMEM;
  26841. + vfree(coa);
  26842. + break;
  26843. + }
  26844. + ((zstd1_coa_c*)coa)->cctx = zstd_init_cctx(((zstd1_coa_c*)coa)->workspace, workspace_size);
  26845. + if (!(((zstd1_coa_c*)coa)->cctx)) {
  26846. + ret = -ENOMEM;
  26847. + vfree(((zstd1_coa_c*)coa)->workspace);
  26848. + vfree(coa);
  26849. + break;
  26850. + }
  26851. + break;
  26852. + case TFMA_READ: /* decompress */
  26853. + coa = reiser4_vmalloc(sizeof(zstd1_coa_d));
  26854. + if (!coa) {
  26855. + ret = -ENOMEM;
  26856. + break;
  26857. + }
  26858. + workspace_size = zstd_dctx_workspace_bound();
  26859. + ((zstd1_coa_d*)coa)->workspace = reiser4_vmalloc(workspace_size);
  26860. + if (!(((zstd1_coa_d*)coa)->workspace)) {
  26861. + ret = -ENOMEM;
  26862. + vfree(coa);
  26863. + break;
  26864. + }
  26865. + ((zstd1_coa_d*)coa)->dctx = zstd_init_dctx(((zstd1_coa_d*)coa)->workspace, workspace_size);
  26866. + if (!(((zstd1_coa_d*)coa)->dctx)) {
  26867. + ret = -ENOMEM;
  26868. + vfree(((zstd1_coa_d*)coa)->workspace);
  26869. + vfree(coa);
  26870. + break;
  26871. + }
  26872. + break;
  26873. + default:
  26874. + impossible("bsinot-1",
  26875. + "trying to alloc workspace for unknown tfm action");
  26876. + }
  26877. + if (ret) {
  26878. + warning("bsinot-2",
  26879. + "alloc workspace for zstd (tfm action = %d) failed\n",
  26880. + act);
  26881. + return ERR_PTR(ret);
  26882. + }
  26883. + return coa;
  26884. +}
  26885. +
  26886. +static void zstd1_free(coa_t coa, tfm_action act)
  26887. +{
  26888. + assert("bsinot-3", coa != NULL);
  26889. +
  26890. + switch (act) {
  26891. + case TFMA_WRITE: /* compress */
  26892. + vfree(((zstd1_coa_c*)coa)->workspace);
  26893. + vfree(coa);
  26894. + //printk(KERN_WARNING "free comp memory -- %p\n", coa);
  26895. + break;
  26896. + case TFMA_READ: /* decompress */
  26897. + vfree(((zstd1_coa_d*)coa)->workspace);
  26898. + vfree(coa);
  26899. + //printk(KERN_WARNING "free decomp memory -- %p\n", coa);
  26900. + break;
  26901. + default:
  26902. + impossible("bsinot-4", "unknown tfm action");
  26903. + }
  26904. + return;
  26905. +}
  26906. +
  26907. +static int zstd1_min_size_deflate(void)
  26908. +{
  26909. + return 256; /* I'm not sure about the correct value, so took from LZO1 */
  26910. +}
  26911. +
  26912. +static void
  26913. +zstd1_compress(coa_t coa, __u8 * src_first, size_t src_len,
  26914. + __u8 * dst_first, size_t *dst_len)
  26915. +{
  26916. + unsigned int result;
  26917. + const zstd_parameters params = zstd_params();
  26918. +
  26919. + assert("bsinot-5", coa != NULL);
  26920. + assert("bsinot-6", src_len != 0);
  26921. + result = zstd_compress_cctx(((zstd1_coa_c*)coa)->cctx, dst_first, *dst_len, src_first, src_len, &params);
  26922. + if (zstd_is_error(result)) {
  26923. + warning("bsinot-7", "zstd1_compressCCtx failed\n");
  26924. + goto out;
  26925. + }
  26926. + *dst_len = result;
  26927. + if (*dst_len >= src_len) {
  26928. + //warning("bsinot-8", "zstd1_compressCCtx: incompressible data\n");
  26929. + goto out;
  26930. + }
  26931. + return;
  26932. + out:
  26933. + *dst_len = src_len;
  26934. + return;
  26935. +}
  26936. +
  26937. +static void
  26938. +zstd1_decompress(coa_t coa, __u8 * src_first, size_t src_len,
  26939. + __u8 * dst_first, size_t *dst_len)
  26940. +{
  26941. + unsigned int result;
  26942. +
  26943. + assert("bsinot-9", coa != NULL);
  26944. + assert("bsinot-10", src_len != 0);
  26945. +
  26946. + result = zstd_decompress_dctx(((zstd1_coa_d*)coa)->dctx, dst_first, *dst_len, src_first, src_len);
  26947. + /* Same here. */
  26948. + if (zstd_is_error(result))
  26949. + warning("bsinot-11", "zstd1_decompressDCtx failed\n");
  26950. + *dst_len = result;
  26951. + return;
  26952. +}
  26953. +
  26954. +
  26955. +compression_plugin compression_plugins[LAST_COMPRESSION_ID] = {
  26956. + [LZO1_COMPRESSION_ID] = {
  26957. + .h = {
  26958. + .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
  26959. + .id = LZO1_COMPRESSION_ID,
  26960. + .pops = &compression_plugin_ops,
  26961. + .label = "lzo1",
  26962. + .desc = "lzo1 compression transform",
  26963. + .linkage = {NULL, NULL}
  26964. + },
  26965. + .init = lzo1_init,
  26966. + .overrun = lzo1_overrun,
  26967. + .alloc = lzo1_alloc,
  26968. + .free = lzo1_free,
  26969. + .min_size_deflate = lzo1_min_size_deflate,
  26970. + .checksum = reiser4_adler32,
  26971. + .compress = lzo1_compress,
  26972. + .decompress = lzo1_decompress
  26973. + },
  26974. + [GZIP1_COMPRESSION_ID] = {
  26975. + .h = {
  26976. + .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
  26977. + .id = GZIP1_COMPRESSION_ID,
  26978. + .pops = &compression_plugin_ops,
  26979. + .label = "gzip1",
  26980. + .desc = "gzip1 compression transform",
  26981. + .linkage = {NULL, NULL}
  26982. + },
  26983. + .init = gzip1_init,
  26984. + .overrun = gzip1_overrun,
  26985. + .alloc = gzip1_alloc,
  26986. + .free = gzip1_free,
  26987. + .min_size_deflate = gzip1_min_size_deflate,
  26988. + .checksum = reiser4_adler32,
  26989. + .compress = gzip1_compress,
  26990. + .decompress = gzip1_decompress
  26991. + },
  26992. + [ZSTD1_COMPRESSION_ID] = {
  26993. + .h = {
  26994. + .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
  26995. + .id = ZSTD1_COMPRESSION_ID,
  26996. + .pops = &compression_plugin_ops,
  26997. + .label = "zstd1",
  26998. + .desc = "zstd1 compression transform",
  26999. + .linkage = {NULL, NULL}
  27000. + },
  27001. + .init = zstd1_init,
  27002. + .overrun = zstd1_overrun,
  27003. + .alloc = zstd1_alloc,
  27004. + .free = zstd1_free,
  27005. + .min_size_deflate = zstd1_min_size_deflate,
  27006. + .checksum = reiser4_adler32,
  27007. + .compress = zstd1_compress,
  27008. + .decompress = zstd1_decompress
  27009. + }
  27010. +};
  27011. +
  27012. +/*
  27013. + Local variables:
  27014. + c-indentation-style: "K&R"
  27015. + mode-name: "LC"
  27016. + c-basic-offset: 8
  27017. + tab-width: 8
  27018. + fill-column: 120
  27019. + scroll-step: 1
  27020. + End:
  27021. +*/
  27022. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/compress/compress.h linux-5.16.14/fs/reiser4/plugin/compress/compress.h
  27023. --- linux-5.16.14.orig/fs/reiser4/plugin/compress/compress.h 1970-01-01 01:00:00.000000000 +0100
  27024. +++ linux-5.16.14/fs/reiser4/plugin/compress/compress.h 2022-03-12 13:26:19.663892755 +0100
  27025. @@ -0,0 +1,44 @@
  27026. +#if !defined( __FS_REISER4_COMPRESS_H__ )
  27027. +#define __FS_REISER4_COMPRESS_H__
  27028. +
  27029. +#include <linux/types.h>
  27030. +#include <linux/string.h>
  27031. +
  27032. +/* transform direction */
  27033. +typedef enum {
  27034. + TFMA_READ, /* decrypt, decompress */
  27035. + TFMA_WRITE, /* encrypt, compress */
  27036. + TFMA_LAST
  27037. +} tfm_action;
  27038. +
  27039. +/* supported compression algorithms */
  27040. +typedef enum {
  27041. + LZO1_COMPRESSION_ID,
  27042. + GZIP1_COMPRESSION_ID,
  27043. + ZSTD1_COMPRESSION_ID,
  27044. + LAST_COMPRESSION_ID,
  27045. +} reiser4_compression_id;
  27046. +
  27047. +/* the same as pgoff, but units are page clusters */
  27048. +typedef unsigned long cloff_t;
  27049. +
  27050. +/* working data of a (de)compression algorithm */
  27051. +typedef void *coa_t;
  27052. +
  27053. +/* table for all supported (de)compression algorithms */
  27054. +typedef coa_t coa_set[LAST_COMPRESSION_ID][TFMA_LAST];
  27055. +
  27056. +__u32 reiser4_adler32(char *data, __u32 len);
  27057. +
  27058. +#endif /* __FS_REISER4_COMPRESS_H__ */
  27059. +
  27060. +/* Make Linus happy.
  27061. + Local variables:
  27062. + c-indentation-style: "K&R"
  27063. + mode-name: "LC"
  27064. + c-basic-offset: 8
  27065. + tab-width: 8
  27066. + fill-column: 120
  27067. + scroll-step: 1
  27068. + End:
  27069. +*/
  27070. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/compress/compress_mode.c linux-5.16.14/fs/reiser4/plugin/compress/compress_mode.c
  27071. --- linux-5.16.14.orig/fs/reiser4/plugin/compress/compress_mode.c 1970-01-01 01:00:00.000000000 +0100
  27072. +++ linux-5.16.14/fs/reiser4/plugin/compress/compress_mode.c 2022-03-12 13:26:19.663892755 +0100
  27073. @@ -0,0 +1,162 @@
  27074. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  27075. +/* This file contains Reiser4 compression mode plugins.
  27076. +
  27077. + Compression mode plugin is a set of handlers called by compressor
  27078. + at flush time and represent some heuristics including the ones
  27079. + which are to avoid compression of incompressible data, see
  27080. + http://www.namesys.com/cryptcompress_design.html for more details.
  27081. +*/
  27082. +#include "../../inode.h"
  27083. +#include "../plugin.h"
  27084. +
  27085. +static int should_deflate_none(struct inode * inode, cloff_t index)
  27086. +{
  27087. + return 0;
  27088. +}
  27089. +
  27090. +static int should_deflate_common(struct inode * inode, cloff_t index)
  27091. +{
  27092. + return compression_is_on(cryptcompress_inode_data(inode));
  27093. +}
  27094. +
  27095. +static int discard_hook_ultim(struct inode *inode, cloff_t index)
  27096. +{
  27097. + turn_off_compression(cryptcompress_inode_data(inode));
  27098. + return 0;
  27099. +}
  27100. +
  27101. +static int discard_hook_lattd(struct inode *inode, cloff_t index)
  27102. +{
  27103. + struct cryptcompress_info * info = cryptcompress_inode_data(inode);
  27104. +
  27105. + assert("edward-1462",
  27106. + get_lattice_factor(info) >= MIN_LATTICE_FACTOR &&
  27107. + get_lattice_factor(info) <= MAX_LATTICE_FACTOR);
  27108. +
  27109. + turn_off_compression(info);
  27110. + if (get_lattice_factor(info) < MAX_LATTICE_FACTOR)
  27111. + set_lattice_factor(info, get_lattice_factor(info) << 1);
  27112. + return 0;
  27113. +}
  27114. +
  27115. +static int accept_hook_lattd(struct inode *inode, cloff_t index)
  27116. +{
  27117. + turn_on_compression(cryptcompress_inode_data(inode));
  27118. + set_lattice_factor(cryptcompress_inode_data(inode), MIN_LATTICE_FACTOR);
  27119. + return 0;
  27120. +}
  27121. +
  27122. +/* Check on dynamic lattice, the adaptive compression modes which
  27123. + defines the following behavior:
  27124. +
  27125. + Compression is on: try to compress everything and turn
  27126. + it off, whenever cluster is incompressible.
  27127. +
  27128. + Compression is off: try to compress clusters of indexes
  27129. + k * FACTOR (k = 0, 1, 2, ...) and turn it on, if some of
  27130. + them is compressible. If incompressible, then increase FACTOR */
  27131. +
  27132. +/* check if @index belongs to one-dimensional lattice
  27133. + of sparce factor @factor */
  27134. +static int is_on_lattice(cloff_t index, int factor)
  27135. +{
  27136. + return (factor ? index % factor == 0: index == 0);
  27137. +}
  27138. +
  27139. +static int should_deflate_lattd(struct inode * inode, cloff_t index)
  27140. +{
  27141. + return should_deflate_common(inode, index) ||
  27142. + is_on_lattice(index,
  27143. + get_lattice_factor
  27144. + (cryptcompress_inode_data(inode)));
  27145. +}
  27146. +
  27147. +/* compression mode_plugins */
  27148. +compression_mode_plugin compression_mode_plugins[LAST_COMPRESSION_MODE_ID] = {
  27149. + [NONE_COMPRESSION_MODE_ID] = {
  27150. + .h = {
  27151. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27152. + .id = NONE_COMPRESSION_MODE_ID,
  27153. + .pops = NULL,
  27154. + .label = "none",
  27155. + .desc = "Compress nothing",
  27156. + .linkage = {NULL, NULL}
  27157. + },
  27158. + .should_deflate = should_deflate_none,
  27159. + .accept_hook = NULL,
  27160. + .discard_hook = NULL
  27161. + },
  27162. + /* Check-on-dynamic-lattice adaptive compression mode */
  27163. + [LATTD_COMPRESSION_MODE_ID] = {
  27164. + .h = {
  27165. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27166. + .id = LATTD_COMPRESSION_MODE_ID,
  27167. + .pops = NULL,
  27168. + .label = "lattd",
  27169. + .desc = "Check on dynamic lattice",
  27170. + .linkage = {NULL, NULL}
  27171. + },
  27172. + .should_deflate = should_deflate_lattd,
  27173. + .accept_hook = accept_hook_lattd,
  27174. + .discard_hook = discard_hook_lattd
  27175. + },
  27176. + /* Check-ultimately compression mode:
  27177. + Turn off compression forever as soon as we meet
  27178. + incompressible data */
  27179. + [ULTIM_COMPRESSION_MODE_ID] = {
  27180. + .h = {
  27181. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27182. + .id = ULTIM_COMPRESSION_MODE_ID,
  27183. + .pops = NULL,
  27184. + .label = "ultim",
  27185. + .desc = "Check ultimately",
  27186. + .linkage = {NULL, NULL}
  27187. + },
  27188. + .should_deflate = should_deflate_common,
  27189. + .accept_hook = NULL,
  27190. + .discard_hook = discard_hook_ultim
  27191. + },
  27192. + /* Force-to-compress-everything compression mode */
  27193. + [FORCE_COMPRESSION_MODE_ID] = {
  27194. + .h = {
  27195. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27196. + .id = FORCE_COMPRESSION_MODE_ID,
  27197. + .pops = NULL,
  27198. + .label = "force",
  27199. + .desc = "Force to compress everything",
  27200. + .linkage = {NULL, NULL}
  27201. + },
  27202. + .should_deflate = NULL,
  27203. + .accept_hook = NULL,
  27204. + .discard_hook = NULL
  27205. + },
  27206. + /* Convert-to-extent compression mode.
  27207. + In this mode items will be converted to extents and management
  27208. + will be passed to (classic) unix file plugin as soon as ->write()
  27209. + detects that the first complete logical cluster (of index #0) is
  27210. + incompressible. */
  27211. + [CONVX_COMPRESSION_MODE_ID] = {
  27212. + .h = {
  27213. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27214. + .id = CONVX_COMPRESSION_MODE_ID,
  27215. + .pops = NULL,
  27216. + .label = "conv",
  27217. + .desc = "Convert to extent",
  27218. + .linkage = {NULL, NULL}
  27219. + },
  27220. + .should_deflate = should_deflate_common,
  27221. + .accept_hook = NULL,
  27222. + .discard_hook = NULL
  27223. + }
  27224. +};
  27225. +
  27226. +/*
  27227. + Local variables:
  27228. + c-indentation-style: "K&R"
  27229. + mode-name: "LC"
  27230. + c-basic-offset: 8
  27231. + tab-width: 8
  27232. + fill-column: 120
  27233. + scroll-step: 1
  27234. + End:
  27235. +*/
  27236. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/compress/Makefile linux-5.16.14/fs/reiser4/plugin/compress/Makefile
  27237. --- linux-5.16.14.orig/fs/reiser4/plugin/compress/Makefile 1970-01-01 01:00:00.000000000 +0100
  27238. +++ linux-5.16.14/fs/reiser4/plugin/compress/Makefile 2022-03-12 13:26:19.662892753 +0100
  27239. @@ -0,0 +1,8 @@
  27240. +
  27241. +MODULE := compress_plugins
  27242. +
  27243. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  27244. +
  27245. +$(MODULE)-objs += \
  27246. + compress.o \
  27247. + compress_mode.o
  27248. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/crypto/cipher.c linux-5.16.14/fs/reiser4/plugin/crypto/cipher.c
  27249. --- linux-5.16.14.orig/fs/reiser4/plugin/crypto/cipher.c 1970-01-01 01:00:00.000000000 +0100
  27250. +++ linux-5.16.14/fs/reiser4/plugin/crypto/cipher.c 2022-03-12 13:26:19.663892755 +0100
  27251. @@ -0,0 +1,37 @@
  27252. +/* Copyright 2001, 2002, 2003 by Hans Reiser,
  27253. + licensing governed by reiser4/README */
  27254. +/* Reiser4 cipher transform plugins */
  27255. +
  27256. +#include "../../debug.h"
  27257. +#include "../plugin.h"
  27258. +
  27259. +cipher_plugin cipher_plugins[LAST_CIPHER_ID] = {
  27260. + [NONE_CIPHER_ID] = {
  27261. + .h = {
  27262. + .type_id = REISER4_CIPHER_PLUGIN_TYPE,
  27263. + .id = NONE_CIPHER_ID,
  27264. + .pops = NULL,
  27265. + .label = "none",
  27266. + .desc = "no cipher transform",
  27267. + .linkage = {NULL, NULL}
  27268. + },
  27269. + .alloc = NULL,
  27270. + .free = NULL,
  27271. + .scale = NULL,
  27272. + .align_stream = NULL,
  27273. + .setkey = NULL,
  27274. + .encrypt = NULL,
  27275. + .decrypt = NULL
  27276. + }
  27277. +};
  27278. +
  27279. +/* Make Linus happy.
  27280. + Local variables:
  27281. + c-indentation-style: "K&R"
  27282. + mode-name: "LC"
  27283. + c-basic-offset: 8
  27284. + tab-width: 8
  27285. + fill-column: 120
  27286. + scroll-step: 1
  27287. + End:
  27288. +*/
  27289. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/crypto/cipher.h linux-5.16.14/fs/reiser4/plugin/crypto/cipher.h
  27290. --- linux-5.16.14.orig/fs/reiser4/plugin/crypto/cipher.h 1970-01-01 01:00:00.000000000 +0100
  27291. +++ linux-5.16.14/fs/reiser4/plugin/crypto/cipher.h 2022-03-12 13:26:19.663892755 +0100
  27292. @@ -0,0 +1,55 @@
  27293. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  27294. +/* This file contains definitions for the objects operated
  27295. + by reiser4 key manager, which is something like keyring
  27296. + wrapped by appropriate reiser4 plugin */
  27297. +
  27298. +#if !defined( __FS_REISER4_CRYPT_H__ )
  27299. +#define __FS_REISER4_CRYPT_H__
  27300. +
  27301. +#include <linux/crypto.h>
  27302. +
  27303. +/* key info imported from user space */
  27304. +struct reiser4_crypto_data {
  27305. + int keysize; /* uninstantiated key size */
  27306. + __u8 * key; /* uninstantiated key */
  27307. + int keyid_size; /* size of passphrase */
  27308. + __u8 * keyid; /* passphrase */
  27309. +};
  27310. +
  27311. +/* This object contains all needed infrastructure to implement
  27312. + cipher transform. This is operated (allocating, inheriting,
  27313. + validating, binding to host inode, etc..) by reiser4 key manager.
  27314. +
  27315. + This info can be allocated in two cases:
  27316. + 1. importing a key from user space.
  27317. + 2. reading inode from disk */
  27318. +struct reiser4_crypto_info {
  27319. + struct inode * host;
  27320. + struct crypto_hash * digest;
  27321. + struct crypto_blkcipher * cipher;
  27322. +#if 0
  27323. + cipher_key_plugin * kplug; /* key manager */
  27324. +#endif
  27325. + __u8 * keyid; /* key fingerprint, created by digest plugin,
  27326. + using uninstantiated key and passphrase.
  27327. + supposed to be stored in disk stat-data */
  27328. + int inst; /* this indicates if the cipher key is
  27329. + instantiated (case 1 above) */
  27330. + int keysize; /* uninstantiated key size (bytes), supposed
  27331. + to be stored in disk stat-data */
  27332. + int keyload_count; /* number of the objects which has this
  27333. + crypto-stat attached */
  27334. +};
  27335. +
  27336. +#endif /* __FS_REISER4_CRYPT_H__ */
  27337. +
  27338. +/*
  27339. + Local variables:
  27340. + c-indentation-style: "K&R"
  27341. + mode-name: "LC"
  27342. + c-basic-offset: 8
  27343. + tab-width: 8
  27344. + fill-column: 120
  27345. + scroll-step: 1
  27346. + End:
  27347. +*/
  27348. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/crypto/digest.c linux-5.16.14/fs/reiser4/plugin/crypto/digest.c
  27349. --- linux-5.16.14.orig/fs/reiser4/plugin/crypto/digest.c 1970-01-01 01:00:00.000000000 +0100
  27350. +++ linux-5.16.14/fs/reiser4/plugin/crypto/digest.c 2022-03-12 13:26:19.663892755 +0100
  27351. @@ -0,0 +1,58 @@
  27352. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  27353. +
  27354. +/* reiser4 digest transform plugin (is used by cryptcompress object plugin) */
  27355. +/* EDWARD-FIXME-HANS: and it does what? a digest is a what? */
  27356. +#include "../../debug.h"
  27357. +#include "../plugin_header.h"
  27358. +#include "../plugin.h"
  27359. +#include "../file/cryptcompress.h"
  27360. +
  27361. +#include <linux/types.h>
  27362. +
  27363. +extern digest_plugin digest_plugins[LAST_DIGEST_ID];
  27364. +
  27365. +static struct crypto_hash * alloc_sha256 (void)
  27366. +{
  27367. +#if REISER4_SHA256
  27368. + return crypto_alloc_hash ("sha256", 0, CRYPTO_ALG_ASYNC);
  27369. +#else
  27370. + warning("edward-1418", "sha256 unsupported");
  27371. + return ERR_PTR(-EINVAL);
  27372. +#endif
  27373. +}
  27374. +
  27375. +static void free_sha256 (struct crypto_hash * tfm)
  27376. +{
  27377. +#if REISER4_SHA256
  27378. + crypto_free_hash(tfm);
  27379. +#endif
  27380. + return;
  27381. +}
  27382. +
  27383. +/* digest plugins */
  27384. +digest_plugin digest_plugins[LAST_DIGEST_ID] = {
  27385. + [SHA256_32_DIGEST_ID] = {
  27386. + .h = {
  27387. + .type_id = REISER4_DIGEST_PLUGIN_TYPE,
  27388. + .id = SHA256_32_DIGEST_ID,
  27389. + .pops = NULL,
  27390. + .label = "sha256_32",
  27391. + .desc = "sha256_32 digest transform",
  27392. + .linkage = {NULL, NULL}
  27393. + },
  27394. + .fipsize = sizeof(__u32),
  27395. + .alloc = alloc_sha256,
  27396. + .free = free_sha256
  27397. + }
  27398. +};
  27399. +
  27400. +/*
  27401. + Local variables:
  27402. + c-indentation-style: "K&R"
  27403. + mode-name: "LC"
  27404. + c-basic-offset: 8
  27405. + tab-width: 8
  27406. + fill-column: 120
  27407. + scroll-step: 1
  27408. + End:
  27409. +*/
  27410. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/dir/dir.h linux-5.16.14/fs/reiser4/plugin/dir/dir.h
  27411. --- linux-5.16.14.orig/fs/reiser4/plugin/dir/dir.h 1970-01-01 01:00:00.000000000 +0100
  27412. +++ linux-5.16.14/fs/reiser4/plugin/dir/dir.h 2022-03-12 13:26:19.664892758 +0100
  27413. @@ -0,0 +1,36 @@
  27414. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  27415. + * reiser4/README */
  27416. +
  27417. +/* this file contains declarations of methods implementing directory plugins */
  27418. +
  27419. +#if !defined( __REISER4_DIR_H__ )
  27420. +#define __REISER4_DIR_H__
  27421. +
  27422. +/*#include "../../key.h"
  27423. +
  27424. +#include <linux/fs.h>*/
  27425. +
  27426. +/* declarations of functions implementing HASHED_DIR_PLUGIN_ID dir plugin */
  27427. +
  27428. +/* "hashed" directory methods of dir plugin */
  27429. +void build_entry_key_hashed(const struct inode *, const struct qstr *,
  27430. + reiser4_key *);
  27431. +
  27432. +/* declarations of functions implementing SEEKABLE_HASHED_DIR_PLUGIN_ID dir plugin */
  27433. +
  27434. +/* "seekable" directory methods of dir plugin */
  27435. +void build_entry_key_seekable(const struct inode *, const struct qstr *,
  27436. + reiser4_key *);
  27437. +
  27438. +/* __REISER4_DIR_H__ */
  27439. +#endif
  27440. +
  27441. +/*
  27442. + Local variables:
  27443. + c-indentation-style: "K&R"
  27444. + mode-name: "LC"
  27445. + c-basic-offset: 8
  27446. + tab-width: 8
  27447. + fill-column: 120
  27448. + End:
  27449. +*/
  27450. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/dir/hashed_dir.c linux-5.16.14/fs/reiser4/plugin/dir/hashed_dir.c
  27451. --- linux-5.16.14.orig/fs/reiser4/plugin/dir/hashed_dir.c 1970-01-01 01:00:00.000000000 +0100
  27452. +++ linux-5.16.14/fs/reiser4/plugin/dir/hashed_dir.c 2022-03-12 13:26:19.664892758 +0100
  27453. @@ -0,0 +1,81 @@
  27454. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  27455. + * reiser4/README */
  27456. +
  27457. +/* Directory plugin using hashes (see fs/reiser4/plugin/hash.c) to map file
  27458. + names to the files. */
  27459. +
  27460. +/*
  27461. + * Hashed directory logically consists of persistent directory
  27462. + * entries. Directory entry is a pair of a file name and a key of stat-data of
  27463. + * a file that has this name in the given directory.
  27464. + *
  27465. + * Directory entries are stored in the tree in the form of directory
  27466. + * items. Directory item should implement dir_entry_ops portion of item plugin
  27467. + * interface (see plugin/item/item.h). Hashed directory interacts with
  27468. + * directory item plugin exclusively through dir_entry_ops operations.
  27469. + *
  27470. + * Currently there are two implementations of directory items: "simple
  27471. + * directory item" (plugin/item/sde.[ch]), and "compound directory item"
  27472. + * (plugin/item/cde.[ch]) with the latter being the default.
  27473. + *
  27474. + * There is, however some delicate way through which directory code interferes
  27475. + * with item plugin: key assignment policy. A key for a directory item is
  27476. + * chosen by directory code, and as described in kassign.c, this key contains
  27477. + * a portion of file name. Directory item uses this knowledge to avoid storing
  27478. + * this portion of file name twice: in the key and in the directory item body.
  27479. + *
  27480. + */
  27481. +
  27482. +#include "../../inode.h"
  27483. +
  27484. +void complete_entry_key(const struct inode *, const char *name,
  27485. + int len, reiser4_key * result);
  27486. +
  27487. +/* this is implementation of build_entry_key method of dir
  27488. + plugin for HASHED_DIR_PLUGIN_ID
  27489. + */
  27490. +void build_entry_key_hashed(const struct inode *dir, /* directory where entry is
  27491. + * (or will be) in.*/
  27492. + const struct qstr *qname, /* name of file referenced
  27493. + * by this entry */
  27494. + reiser4_key * result /* resulting key of directory
  27495. + * entry */ )
  27496. +{
  27497. + const char *name;
  27498. + int len;
  27499. +
  27500. + assert("nikita-1139", dir != NULL);
  27501. + assert("nikita-1140", qname != NULL);
  27502. + assert("nikita-1141", qname->name != NULL);
  27503. + assert("nikita-1142", result != NULL);
  27504. +
  27505. + name = qname->name;
  27506. + len = qname->len;
  27507. +
  27508. + assert("nikita-2867", strlen(name) == len);
  27509. +
  27510. + reiser4_key_init(result);
  27511. + /* locality of directory entry's key is objectid of parent
  27512. + directory */
  27513. + set_key_locality(result, get_inode_oid(dir));
  27514. + /* minor packing locality is constant */
  27515. + set_key_type(result, KEY_FILE_NAME_MINOR);
  27516. + /* dot is special case---we always want it to be first entry in
  27517. + a directory. Actually, we just want to have smallest
  27518. + directory entry.
  27519. + */
  27520. + if (len == 1 && name[0] == '.')
  27521. + return;
  27522. +
  27523. + /* initialize part of entry key which depends on file name */
  27524. + complete_entry_key(dir, name, len, result);
  27525. +}
  27526. +
  27527. +/* Local variables:
  27528. + c-indentation-style: "K&R"
  27529. + mode-name: "LC"
  27530. + c-basic-offset: 8
  27531. + tab-width: 8
  27532. + fill-column: 120
  27533. + End:
  27534. +*/
  27535. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/dir/Makefile linux-5.16.14/fs/reiser4/plugin/dir/Makefile
  27536. --- linux-5.16.14.orig/fs/reiser4/plugin/dir/Makefile 1970-01-01 01:00:00.000000000 +0100
  27537. +++ linux-5.16.14/fs/reiser4/plugin/dir/Makefile 2022-03-12 13:26:19.663892755 +0100
  27538. @@ -0,0 +1,8 @@
  27539. +
  27540. +MODULE := dir_plugins
  27541. +
  27542. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  27543. +
  27544. +$(MODULE)-objs += \
  27545. + hashed_dir.o \
  27546. + seekable_dir.o
  27547. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/dir/seekable_dir.c linux-5.16.14/fs/reiser4/plugin/dir/seekable_dir.c
  27548. --- linux-5.16.14.orig/fs/reiser4/plugin/dir/seekable_dir.c 1970-01-01 01:00:00.000000000 +0100
  27549. +++ linux-5.16.14/fs/reiser4/plugin/dir/seekable_dir.c 2022-03-12 13:26:19.664892758 +0100
  27550. @@ -0,0 +1,46 @@
  27551. +/* Copyright 2005 by Hans Reiser, licensing governed by
  27552. + * reiser4/README */
  27553. +
  27554. +#include "../../inode.h"
  27555. +
  27556. +/* this is implementation of build_entry_key method of dir
  27557. + plugin for SEEKABLE_HASHED_DIR_PLUGIN_ID
  27558. + This is for directories where we want repeatable and restartable readdir()
  27559. + even in case 32bit user level struct dirent (readdir(3)).
  27560. +*/
  27561. +void
  27562. +build_entry_key_seekable(const struct inode *dir, const struct qstr *name,
  27563. + reiser4_key * result)
  27564. +{
  27565. + oid_t objectid;
  27566. +
  27567. + assert("nikita-2283", dir != NULL);
  27568. + assert("nikita-2284", name != NULL);
  27569. + assert("nikita-2285", name->name != NULL);
  27570. + assert("nikita-2286", result != NULL);
  27571. +
  27572. + reiser4_key_init(result);
  27573. + /* locality of directory entry's key is objectid of parent
  27574. + directory */
  27575. + set_key_locality(result, get_inode_oid(dir));
  27576. + /* minor packing locality is constant */
  27577. + set_key_type(result, KEY_FILE_NAME_MINOR);
  27578. + /* dot is special case---we always want it to be first entry in
  27579. + a directory. Actually, we just want to have smallest
  27580. + directory entry.
  27581. + */
  27582. + if ((name->len == 1) && (name->name[0] == '.'))
  27583. + return;
  27584. +
  27585. + /* objectid of key is 31 lowest bits of hash. */
  27586. + objectid =
  27587. + inode_hash_plugin(dir)->hash(name->name,
  27588. + (int)name->len) & 0x7fffffff;
  27589. +
  27590. + assert("nikita-2303", !(objectid & ~KEY_OBJECTID_MASK));
  27591. + set_key_objectid(result, objectid);
  27592. +
  27593. + /* offset is always 0. */
  27594. + set_key_offset(result, (__u64) 0);
  27595. + return;
  27596. +}
  27597. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/dir_plugin_common.c linux-5.16.14/fs/reiser4/plugin/dir_plugin_common.c
  27598. --- linux-5.16.14.orig/fs/reiser4/plugin/dir_plugin_common.c 1970-01-01 01:00:00.000000000 +0100
  27599. +++ linux-5.16.14/fs/reiser4/plugin/dir_plugin_common.c 2022-03-12 13:26:19.664892758 +0100
  27600. @@ -0,0 +1,865 @@
  27601. +/* Copyright 2005 by Hans Reiser, licensing governed by
  27602. + reiser4/README */
  27603. +
  27604. +/* this file contains typical implementations for most of methods of
  27605. + directory plugin
  27606. +*/
  27607. +
  27608. +#include "../inode.h"
  27609. +
  27610. +int reiser4_find_entry(struct inode *dir, struct dentry *name,
  27611. + lock_handle * , znode_lock_mode, reiser4_dir_entry_desc *);
  27612. +int reiser4_lookup_name(struct inode *parent, struct dentry *dentry,
  27613. + reiser4_key * key);
  27614. +void check_light_weight(struct inode *inode, struct inode *parent);
  27615. +
  27616. +/* this is common implementation of get_parent method of dir plugin
  27617. + this is used by NFS kernel server to "climb" up directory tree to
  27618. + check permissions
  27619. + */
  27620. +struct dentry *get_parent_common(struct inode *child)
  27621. +{
  27622. + struct super_block *s;
  27623. + struct inode *parent;
  27624. + struct dentry dotdot;
  27625. + struct dentry *dentry;
  27626. + reiser4_key key;
  27627. + int result;
  27628. +
  27629. + /*
  27630. + * lookup dotdot entry.
  27631. + */
  27632. +
  27633. + s = child->i_sb;
  27634. + memset(&dotdot, 0, sizeof(dotdot));
  27635. + dotdot.d_name.name = "..";
  27636. + dotdot.d_name.len = 2;
  27637. + dotdot.d_op = &get_super_private(s)->ops.dentry;
  27638. +
  27639. + result = reiser4_lookup_name(child, &dotdot, &key);
  27640. + if (result != 0)
  27641. + return ERR_PTR(result);
  27642. +
  27643. + parent = reiser4_iget(s, &key, 1);
  27644. + if (!IS_ERR(parent)) {
  27645. + /*
  27646. + * FIXME-NIKITA dubious: attributes are inherited from @child
  27647. + * to @parent. But:
  27648. + *
  27649. + * (*) this is the only this we can do
  27650. + *
  27651. + * (*) attributes of light-weight object are inherited
  27652. + * from a parent through which object was looked up first,
  27653. + * so it is ambiguous anyway.
  27654. + *
  27655. + */
  27656. + check_light_weight(parent, child);
  27657. + reiser4_iget_complete(parent);
  27658. + dentry = d_obtain_alias(parent);
  27659. + if (!IS_ERR(dentry))
  27660. + dentry->d_op = &get_super_private(s)->ops.dentry;
  27661. + } else if (PTR_ERR(parent) == -ENOENT)
  27662. + dentry = ERR_PTR(RETERR(-ESTALE));
  27663. + else
  27664. + dentry = (void *)parent;
  27665. + return dentry;
  27666. +}
  27667. +
  27668. +/* this is common implementation of is_name_acceptable method of dir
  27669. + plugin
  27670. + */
  27671. +int is_name_acceptable_common(const struct inode *inode, /* directory to check*/
  27672. + const char *name UNUSED_ARG, /* name to check */
  27673. + int len/* @name's length */)
  27674. +{
  27675. + assert("nikita-733", inode != NULL);
  27676. + assert("nikita-734", name != NULL);
  27677. + assert("nikita-735", len > 0);
  27678. +
  27679. + return len <= reiser4_max_filename_len(inode);
  27680. +}
  27681. +
  27682. +/* there is no common implementation of build_entry_key method of dir
  27683. + plugin. See plugin/dir/hashed_dir.c:build_entry_key_hashed() or
  27684. + plugin/dir/seekable.c:build_entry_key_seekable() for example
  27685. +*/
  27686. +
  27687. +/* this is common implementation of build_readdir_key method of dir
  27688. + plugin
  27689. + see reiser4_readdir_common for more details
  27690. +*/
  27691. +int build_readdir_key_common(struct file *dir /* directory being read */ ,
  27692. + reiser4_key * result/* where to store key */)
  27693. +{
  27694. + reiser4_file_fsdata *fdata;
  27695. + struct inode *inode;
  27696. +
  27697. + assert("nikita-1361", dir != NULL);
  27698. + assert("nikita-1362", result != NULL);
  27699. + assert("nikita-1363", dir->f_path.dentry != NULL);
  27700. + inode = file_inode(dir);
  27701. + assert("nikita-1373", inode != NULL);
  27702. +
  27703. + fdata = reiser4_get_file_fsdata(dir);
  27704. + if (IS_ERR(fdata))
  27705. + return PTR_ERR(fdata);
  27706. + assert("nikita-1364", fdata != NULL);
  27707. + return extract_key_from_de_id(get_inode_oid(inode),
  27708. + &fdata->dir.readdir.position.
  27709. + dir_entry_key, result);
  27710. +
  27711. +}
  27712. +
  27713. +void reiser4_adjust_dir_file(struct inode *, const struct dentry *, int offset,
  27714. + int adj);
  27715. +
  27716. +/* this is common implementation of add_entry method of dir plugin
  27717. +*/
  27718. +int reiser4_add_entry_common(struct inode *object, /* directory to add new name
  27719. + * in */
  27720. + struct dentry *where, /* new name */
  27721. + reiser4_object_create_data * data, /* parameters of
  27722. + * new object */
  27723. + reiser4_dir_entry_desc * entry /* parameters of
  27724. + * new directory
  27725. + * entry */)
  27726. +{
  27727. + int result;
  27728. + coord_t *coord;
  27729. + lock_handle lh;
  27730. + struct reiser4_dentry_fsdata *fsdata;
  27731. + reiser4_block_nr reserve;
  27732. +
  27733. + assert("nikita-1114", object != NULL);
  27734. + assert("nikita-1250", where != NULL);
  27735. +
  27736. + fsdata = reiser4_get_dentry_fsdata(where);
  27737. + if (unlikely(IS_ERR(fsdata)))
  27738. + return PTR_ERR(fsdata);
  27739. +
  27740. + reserve = inode_dir_plugin(object)->estimate.add_entry(object);
  27741. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
  27742. + return RETERR(-ENOSPC);
  27743. +
  27744. + init_lh(&lh);
  27745. + coord = &fsdata->dec.entry_coord;
  27746. + coord_clear_iplug(coord);
  27747. +
  27748. + /* check for this entry in a directory. This is plugin method. */
  27749. + result = reiser4_find_entry(object, where, &lh, ZNODE_WRITE_LOCK,
  27750. + entry);
  27751. + if (likely(result == -ENOENT)) {
  27752. + /* add new entry. Just pass control to the directory
  27753. + item plugin. */
  27754. + assert("nikita-1709", inode_dir_item_plugin(object));
  27755. + assert("nikita-2230", coord->node == lh.node);
  27756. + reiser4_seal_done(&fsdata->dec.entry_seal);
  27757. + result =
  27758. + inode_dir_item_plugin(object)->s.dir.add_entry(object,
  27759. + coord, &lh,
  27760. + where,
  27761. + entry);
  27762. + if (result == 0) {
  27763. + reiser4_adjust_dir_file(object, where,
  27764. + fsdata->dec.pos + 1, +1);
  27765. + INODE_INC_FIELD(object, i_size);
  27766. + }
  27767. + } else if (result == 0) {
  27768. + assert("nikita-2232", coord->node == lh.node);
  27769. + result = RETERR(-EEXIST);
  27770. + }
  27771. + done_lh(&lh);
  27772. +
  27773. + return result;
  27774. +}
  27775. +
  27776. +/**
  27777. + * rem_entry - remove entry from directory item
  27778. + * @dir:
  27779. + * @dentry:
  27780. + * @entry:
  27781. + * @coord:
  27782. + * @lh:
  27783. + *
  27784. + * Checks that coordinate @coord is set properly and calls item plugin
  27785. + * method to cut entry.
  27786. + */
  27787. +static int
  27788. +rem_entry(struct inode *dir, struct dentry *dentry,
  27789. + reiser4_dir_entry_desc * entry, coord_t *coord, lock_handle * lh)
  27790. +{
  27791. + item_plugin *iplug;
  27792. + struct inode *child;
  27793. +
  27794. + iplug = inode_dir_item_plugin(dir);
  27795. + child = dentry->d_inode;
  27796. + assert("nikita-3399", child != NULL);
  27797. +
  27798. + /* check that we are really destroying an entry for @child */
  27799. + if (REISER4_DEBUG) {
  27800. + int result;
  27801. + reiser4_key key;
  27802. +
  27803. + result = iplug->s.dir.extract_key(coord, &key);
  27804. + if (result != 0)
  27805. + return result;
  27806. + if (get_key_objectid(&key) != get_inode_oid(child)) {
  27807. + warning("nikita-3397",
  27808. + "rem_entry: %#llx != %#llx\n",
  27809. + get_key_objectid(&key),
  27810. + (unsigned long long)get_inode_oid(child));
  27811. + return RETERR(-EIO);
  27812. + }
  27813. + }
  27814. + return iplug->s.dir.rem_entry(dir, &dentry->d_name, coord, lh, entry);
  27815. +}
  27816. +
  27817. +/**
  27818. + * reiser4_rem_entry_common - remove entry from a directory
  27819. + * @dir: directory to remove entry from
  27820. + * @where: name that is being removed
  27821. + * @entry: description of entry being removed
  27822. + *
  27823. + * This is common implementation of rem_entry method of dir plugin.
  27824. + */
  27825. +int reiser4_rem_entry_common(struct inode *dir,
  27826. + struct dentry *dentry,
  27827. + reiser4_dir_entry_desc * entry)
  27828. +{
  27829. + int result;
  27830. + coord_t *coord;
  27831. + lock_handle lh;
  27832. + struct reiser4_dentry_fsdata *fsdata;
  27833. + __u64 tograb;
  27834. +
  27835. + assert("nikita-1124", dir != NULL);
  27836. + assert("nikita-1125", dentry != NULL);
  27837. +
  27838. + tograb = inode_dir_plugin(dir)->estimate.rem_entry(dir);
  27839. + result = reiser4_grab_space(tograb, BA_CAN_COMMIT | BA_RESERVED);
  27840. + if (result != 0)
  27841. + return RETERR(-ENOSPC);
  27842. +
  27843. + init_lh(&lh);
  27844. +
  27845. + /* check for this entry in a directory. This is plugin method. */
  27846. + result = reiser4_find_entry(dir, dentry, &lh, ZNODE_WRITE_LOCK, entry);
  27847. + fsdata = reiser4_get_dentry_fsdata(dentry);
  27848. + if (IS_ERR(fsdata)) {
  27849. + done_lh(&lh);
  27850. + return PTR_ERR(fsdata);
  27851. + }
  27852. +
  27853. + coord = &fsdata->dec.entry_coord;
  27854. +
  27855. + assert("nikita-3404",
  27856. + get_inode_oid(dentry->d_inode) != get_inode_oid(dir) ||
  27857. + dir->i_size <= 1);
  27858. +
  27859. + coord_clear_iplug(coord);
  27860. + if (result == 0) {
  27861. + /* remove entry. Just pass control to the directory item
  27862. + plugin. */
  27863. + assert("vs-542", inode_dir_item_plugin(dir));
  27864. + reiser4_seal_done(&fsdata->dec.entry_seal);
  27865. + reiser4_adjust_dir_file(dir, dentry, fsdata->dec.pos, -1);
  27866. + result =
  27867. + WITH_COORD(coord,
  27868. + rem_entry(dir, dentry, entry, coord, &lh));
  27869. + if (result == 0) {
  27870. + if (dir->i_size >= 1)
  27871. + INODE_DEC_FIELD(dir, i_size);
  27872. + else {
  27873. + warning("nikita-2509", "Dir %llu is runt",
  27874. + (unsigned long long)
  27875. + get_inode_oid(dir));
  27876. + result = RETERR(-EIO);
  27877. + }
  27878. +
  27879. + assert("nikita-3405", dentry->d_inode->i_nlink != 1 ||
  27880. + dentry->d_inode->i_size != 2 ||
  27881. + inode_dir_plugin(dentry->d_inode) == NULL);
  27882. + }
  27883. + }
  27884. + done_lh(&lh);
  27885. +
  27886. + return result;
  27887. +}
  27888. +
  27889. +static reiser4_block_nr estimate_init(struct inode *parent,
  27890. + struct inode *object);
  27891. +static int create_dot_dotdot(struct inode *object, struct inode *parent);
  27892. +
  27893. +/* this is common implementation of init method of dir plugin
  27894. + create "." and ".." entries
  27895. +*/
  27896. +int reiser4_dir_init_common(struct inode *object, /* new directory */
  27897. + struct inode *parent, /* parent directory */
  27898. + reiser4_object_create_data * data /* info passed
  27899. + * to us, this
  27900. + * is filled by
  27901. + * reiser4()
  27902. + * syscall in
  27903. + * particular */)
  27904. +{
  27905. + reiser4_block_nr reserve;
  27906. +
  27907. + assert("nikita-680", object != NULL);
  27908. + assert("nikita-681", S_ISDIR(object->i_mode));
  27909. + assert("nikita-682", parent != NULL);
  27910. + assert("nikita-684", data != NULL);
  27911. + assert("nikita-686", data->id == DIRECTORY_FILE_PLUGIN_ID);
  27912. + assert("nikita-687", object->i_mode & S_IFDIR);
  27913. +
  27914. + reserve = estimate_init(parent, object);
  27915. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
  27916. + return RETERR(-ENOSPC);
  27917. +
  27918. + return create_dot_dotdot(object, parent);
  27919. +}
  27920. +
  27921. +/* this is common implementation of done method of dir plugin
  27922. + remove "." entry
  27923. +*/
  27924. +int reiser4_dir_done_common(struct inode *object/* object being deleted */)
  27925. +{
  27926. + int result;
  27927. + reiser4_block_nr reserve;
  27928. + struct dentry goodby_dots;
  27929. + reiser4_dir_entry_desc entry;
  27930. +
  27931. + assert("nikita-1449", object != NULL);
  27932. +
  27933. + if (reiser4_inode_get_flag(object, REISER4_NO_SD))
  27934. + return 0;
  27935. +
  27936. + /* of course, this can be rewritten to sweep everything in one
  27937. + reiser4_cut_tree(). */
  27938. + memset(&entry, 0, sizeof entry);
  27939. +
  27940. + /* FIXME: this done method is called from reiser4_delete_dir_common
  27941. + * which reserved space already */
  27942. + reserve = inode_dir_plugin(object)->estimate.rem_entry(object);
  27943. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT | BA_RESERVED))
  27944. + return RETERR(-ENOSPC);
  27945. +
  27946. + memset(&goodby_dots, 0, sizeof goodby_dots);
  27947. + entry.obj = goodby_dots.d_inode = object;
  27948. + goodby_dots.d_name.name = ".";
  27949. + goodby_dots.d_name.len = 1;
  27950. + result = reiser4_rem_entry_common(object, &goodby_dots, &entry);
  27951. + reiser4_free_dentry_fsdata(&goodby_dots);
  27952. + if (unlikely(result != 0 && result != -ENOMEM && result != -ENOENT))
  27953. + warning("nikita-2252", "Cannot remove dot of %lli: %i",
  27954. + (unsigned long long)get_inode_oid(object), result);
  27955. + return 0;
  27956. +}
  27957. +
  27958. +/* this is common implementation of attach method of dir plugin
  27959. +*/
  27960. +int reiser4_attach_common(struct inode *child UNUSED_ARG,
  27961. + struct inode *parent UNUSED_ARG)
  27962. +{
  27963. + assert("nikita-2647", child != NULL);
  27964. + assert("nikita-2648", parent != NULL);
  27965. +
  27966. + return 0;
  27967. +}
  27968. +
  27969. +/* this is common implementation of detach method of dir plugin
  27970. + remove "..", decrease nlink on parent
  27971. +*/
  27972. +int reiser4_detach_common(struct inode *object, struct inode *parent)
  27973. +{
  27974. + int result;
  27975. + struct dentry goodby_dots;
  27976. + reiser4_dir_entry_desc entry;
  27977. +
  27978. + assert("nikita-2885", object != NULL);
  27979. + assert("nikita-2886", !reiser4_inode_get_flag(object, REISER4_NO_SD));
  27980. +
  27981. + memset(&entry, 0, sizeof entry);
  27982. +
  27983. + /* NOTE-NIKITA this only works if @parent is -the- parent of
  27984. + @object, viz. object whose key is stored in dotdot
  27985. + entry. Wouldn't work with hard-links on directories. */
  27986. + memset(&goodby_dots, 0, sizeof goodby_dots);
  27987. + entry.obj = goodby_dots.d_inode = parent;
  27988. + goodby_dots.d_name.name = "..";
  27989. + goodby_dots.d_name.len = 2;
  27990. + result = reiser4_rem_entry_common(object, &goodby_dots, &entry);
  27991. + reiser4_free_dentry_fsdata(&goodby_dots);
  27992. + if (result == 0) {
  27993. + /* the dot should be the only entry remaining at this time... */
  27994. + assert("nikita-3400",
  27995. + object->i_size == 1 && object->i_nlink <= 2);
  27996. +#if 0
  27997. + /* and, together with the only name directory can have, they
  27998. + * provides for the last 2 remaining references. If we get
  27999. + * here as part of error handling during mkdir, @object
  28000. + * possibly has no name yet, so its nlink == 1. If we get here
  28001. + * from rename (targeting empty directory), it has no name
  28002. + * already, so its nlink == 1. */
  28003. + assert("nikita-3401",
  28004. + object->i_nlink == 2 || object->i_nlink == 1);
  28005. +#endif
  28006. +
  28007. + /* decrement nlink of directory removed ".." pointed
  28008. + to */
  28009. + reiser4_del_nlink(parent, NULL, 0);
  28010. + }
  28011. + return result;
  28012. +}
  28013. +
  28014. +/* this is common implementation of estimate.add_entry method of
  28015. + dir plugin
  28016. + estimation of adding entry which supposes that entry is inserting a
  28017. + unit into item
  28018. +*/
  28019. +reiser4_block_nr estimate_add_entry_common(const struct inode *inode)
  28020. +{
  28021. + return estimate_one_insert_into_item(reiser4_tree_by_inode(inode));
  28022. +}
  28023. +
  28024. +/* this is common implementation of estimate.rem_entry method of dir
  28025. + plugin
  28026. +*/
  28027. +reiser4_block_nr estimate_rem_entry_common(const struct inode *inode)
  28028. +{
  28029. + return estimate_one_item_removal(reiser4_tree_by_inode(inode));
  28030. +}
  28031. +
  28032. +/* this is common implementation of estimate.unlink method of dir
  28033. + plugin
  28034. +*/
  28035. +reiser4_block_nr
  28036. +dir_estimate_unlink_common(const struct inode *parent,
  28037. + const struct inode *object)
  28038. +{
  28039. + reiser4_block_nr res;
  28040. +
  28041. + /* hashed_rem_entry(object) */
  28042. + res = inode_dir_plugin(object)->estimate.rem_entry(object);
  28043. + /* del_nlink(parent) */
  28044. + res += 2 * inode_file_plugin(parent)->estimate.update(parent);
  28045. +
  28046. + return res;
  28047. +}
  28048. +
  28049. +/*
  28050. + * helper for inode_ops ->lookup() and dir plugin's ->get_parent()
  28051. + * methods: if @inode is a light-weight file, setup its credentials
  28052. + * that are not stored in the stat-data in this case
  28053. + */
  28054. +void check_light_weight(struct inode *inode, struct inode *parent)
  28055. +{
  28056. + if (reiser4_inode_get_flag(inode, REISER4_LIGHT_WEIGHT)) {
  28057. + inode->i_uid = parent->i_uid;
  28058. + inode->i_gid = parent->i_gid;
  28059. + /* clear light-weight flag. If inode would be read by any
  28060. + other name, [ug]id wouldn't change. */
  28061. + reiser4_inode_clr_flag(inode, REISER4_LIGHT_WEIGHT);
  28062. + }
  28063. +}
  28064. +
  28065. +/* looks for name specified in @dentry in directory @parent and if name is
  28066. + found - key of object found entry points to is stored in @entry->key */
  28067. +int reiser4_lookup_name(struct inode *parent, /* inode of directory to lookup
  28068. + * for name in */
  28069. + struct dentry *dentry, /* name to look for */
  28070. + reiser4_key * key/* place to store key */)
  28071. +{
  28072. + int result;
  28073. + coord_t *coord;
  28074. + lock_handle lh;
  28075. + const char *name;
  28076. + int len;
  28077. + reiser4_dir_entry_desc entry;
  28078. + struct reiser4_dentry_fsdata *fsdata;
  28079. +
  28080. + assert("nikita-1247", parent != NULL);
  28081. + assert("nikita-1248", dentry != NULL);
  28082. + assert("nikita-1123", dentry->d_name.name != NULL);
  28083. + assert("vs-1486",
  28084. + dentry->d_op == &get_super_private(parent->i_sb)->ops.dentry);
  28085. +
  28086. + name = dentry->d_name.name;
  28087. + len = dentry->d_name.len;
  28088. +
  28089. + if (!inode_dir_plugin(parent)->is_name_acceptable(parent, name, len))
  28090. + /* some arbitrary error code to return */
  28091. + return RETERR(-ENAMETOOLONG);
  28092. +
  28093. + fsdata = reiser4_get_dentry_fsdata(dentry);
  28094. + if (IS_ERR(fsdata))
  28095. + return PTR_ERR(fsdata);
  28096. +
  28097. + coord = &fsdata->dec.entry_coord;
  28098. + coord_clear_iplug(coord);
  28099. + init_lh(&lh);
  28100. +
  28101. + /* find entry in a directory. This is plugin method. */
  28102. + result = reiser4_find_entry(parent, dentry, &lh, ZNODE_READ_LOCK,
  28103. + &entry);
  28104. + if (result == 0) {
  28105. + /* entry was found, extract object key from it. */
  28106. + result =
  28107. + WITH_COORD(coord,
  28108. + item_plugin_by_coord(coord)->s.dir.
  28109. + extract_key(coord, key));
  28110. + }
  28111. + done_lh(&lh);
  28112. + return result;
  28113. +
  28114. +}
  28115. +
  28116. +/* helper for reiser4_dir_init_common(): estimate number of blocks to reserve */
  28117. +static reiser4_block_nr
  28118. +estimate_init(struct inode *parent, struct inode *object)
  28119. +{
  28120. + reiser4_block_nr res = 0;
  28121. +
  28122. + assert("vpf-321", parent != NULL);
  28123. + assert("vpf-322", object != NULL);
  28124. +
  28125. + /* hashed_add_entry(object) */
  28126. + res += inode_dir_plugin(object)->estimate.add_entry(object);
  28127. + /* reiser4_add_nlink(object) */
  28128. + res += inode_file_plugin(object)->estimate.update(object);
  28129. + /* hashed_add_entry(object) */
  28130. + res += inode_dir_plugin(object)->estimate.add_entry(object);
  28131. + /* reiser4_add_nlink(parent) */
  28132. + res += inode_file_plugin(parent)->estimate.update(parent);
  28133. +
  28134. + return 0;
  28135. +}
  28136. +
  28137. +/* helper function for reiser4_dir_init_common(). Create "." and ".." */
  28138. +static int create_dot_dotdot(struct inode *object/* object to create dot and
  28139. + * dotdot for */ ,
  28140. + struct inode *parent/* parent of @object */)
  28141. +{
  28142. + int result;
  28143. + struct dentry dots_entry;
  28144. + reiser4_dir_entry_desc entry;
  28145. +
  28146. + assert("nikita-688", object != NULL);
  28147. + assert("nikita-689", S_ISDIR(object->i_mode));
  28148. + assert("nikita-691", parent != NULL);
  28149. +
  28150. + /* We store dot and dotdot as normal directory entries. This is
  28151. + not necessary, because almost all information stored in them
  28152. + is already in the stat-data of directory, the only thing
  28153. + being missed is objectid of grand-parent directory that can
  28154. + easily be added there as extension.
  28155. +
  28156. + But it is done the way it is done, because not storing dot
  28157. + and dotdot will lead to the following complications:
  28158. +
  28159. + . special case handling in ->lookup().
  28160. + . addition of another extension to the sd.
  28161. + . dependency on key allocation policy for stat data.
  28162. +
  28163. + */
  28164. +
  28165. + memset(&entry, 0, sizeof entry);
  28166. + memset(&dots_entry, 0, sizeof dots_entry);
  28167. + entry.obj = dots_entry.d_inode = object;
  28168. + dots_entry.d_name.name = ".";
  28169. + dots_entry.d_name.len = 1;
  28170. + result = reiser4_add_entry_common(object, &dots_entry, NULL, &entry);
  28171. + reiser4_free_dentry_fsdata(&dots_entry);
  28172. +
  28173. + if (result == 0) {
  28174. + result = reiser4_add_nlink(object, object, 0);
  28175. + if (result == 0) {
  28176. + entry.obj = dots_entry.d_inode = parent;
  28177. + dots_entry.d_name.name = "..";
  28178. + dots_entry.d_name.len = 2;
  28179. + result = reiser4_add_entry_common(object,
  28180. + &dots_entry, NULL, &entry);
  28181. + reiser4_free_dentry_fsdata(&dots_entry);
  28182. + /* if creation of ".." failed, iput() will delete
  28183. + object with ".". */
  28184. + if (result == 0) {
  28185. + result = reiser4_add_nlink(parent, object, 0);
  28186. + if (result != 0)
  28187. + /*
  28188. + * if we failed to bump i_nlink, try
  28189. + * to remove ".."
  28190. + */
  28191. + reiser4_detach_common(object, parent);
  28192. + }
  28193. + }
  28194. + }
  28195. +
  28196. + if (result != 0) {
  28197. + /*
  28198. + * in the case of error, at least update stat-data so that,
  28199. + * ->i_nlink updates are not lingering.
  28200. + */
  28201. + reiser4_update_sd(object);
  28202. + reiser4_update_sd(parent);
  28203. + }
  28204. +
  28205. + return result;
  28206. +}
  28207. +
  28208. +/*
  28209. + * return 0 iff @coord contains a directory entry for the file with the name
  28210. + * @name.
  28211. + */
  28212. +static int
  28213. +check_item(const struct inode *dir, const coord_t *coord, const char *name)
  28214. +{
  28215. + item_plugin *iplug;
  28216. + char buf[DE_NAME_BUF_LEN];
  28217. +
  28218. + iplug = item_plugin_by_coord(coord);
  28219. + if (iplug == NULL) {
  28220. + warning("nikita-1135", "Cannot get item plugin");
  28221. + print_coord("coord", coord, 1);
  28222. + return RETERR(-EIO);
  28223. + } else if (item_id_by_coord(coord) !=
  28224. + item_id_by_plugin(inode_dir_item_plugin(dir))) {
  28225. + /* item id of current item does not match to id of items a
  28226. + directory is built of */
  28227. + warning("nikita-1136", "Wrong item plugin");
  28228. + print_coord("coord", coord, 1);
  28229. + return RETERR(-EIO);
  28230. + }
  28231. + assert("nikita-1137", iplug->s.dir.extract_name);
  28232. +
  28233. + /* Compare name stored in this entry with name we are looking for.
  28234. +
  28235. + NOTE-NIKITA Here should go code for support of something like
  28236. + unicode, code tables, etc.
  28237. + */
  28238. + return !!strcmp(name, iplug->s.dir.extract_name(coord, buf));
  28239. +}
  28240. +
  28241. +static int
  28242. +check_entry(const struct inode *dir, coord_t *coord, const struct qstr *name)
  28243. +{
  28244. + return WITH_COORD(coord, check_item(dir, coord, name->name));
  28245. +}
  28246. +
  28247. +/*
  28248. + * argument package used by entry_actor to scan entries with identical keys.
  28249. + */
  28250. +struct entry_actor_args {
  28251. + /* name we are looking for */
  28252. + const char *name;
  28253. + /* key of directory entry. entry_actor() scans through sequence of
  28254. + * items/units having the same key */
  28255. + reiser4_key *key;
  28256. + /* how many entries with duplicate key was scanned so far. */
  28257. + int non_uniq;
  28258. +#if REISER4_USE_COLLISION_LIMIT
  28259. + /* scan limit */
  28260. + int max_non_uniq;
  28261. +#endif
  28262. + /* return parameter: set to true, if ->name wasn't found */
  28263. + int not_found;
  28264. + /* what type of lock to take when moving to the next node during
  28265. + * scan */
  28266. + znode_lock_mode mode;
  28267. +
  28268. + /* last coord that was visited during scan */
  28269. + coord_t last_coord;
  28270. + /* last node locked during scan */
  28271. + lock_handle last_lh;
  28272. + /* inode of directory */
  28273. + const struct inode *inode;
  28274. +};
  28275. +
  28276. +/* Function called by reiser4_find_entry() to look for given name
  28277. + in the directory. */
  28278. +static int entry_actor(reiser4_tree * tree UNUSED_ARG /* tree being scanned */ ,
  28279. + coord_t *coord /* current coord */ ,
  28280. + lock_handle * lh /* current lock handle */ ,
  28281. + void *entry_actor_arg/* argument to scan */)
  28282. +{
  28283. + reiser4_key unit_key;
  28284. + struct entry_actor_args *args;
  28285. +
  28286. + assert("nikita-1131", tree != NULL);
  28287. + assert("nikita-1132", coord != NULL);
  28288. + assert("nikita-1133", entry_actor_arg != NULL);
  28289. +
  28290. + args = entry_actor_arg;
  28291. + ++args->non_uniq;
  28292. +#if REISER4_USE_COLLISION_LIMIT
  28293. + if (args->non_uniq > args->max_non_uniq) {
  28294. + args->not_found = 1;
  28295. + /* hash collision overflow. */
  28296. + return RETERR(-EBUSY);
  28297. + }
  28298. +#endif
  28299. +
  28300. + /*
  28301. + * did we just reach the end of the sequence of items/units with
  28302. + * identical keys?
  28303. + */
  28304. + if (!keyeq(args->key, unit_key_by_coord(coord, &unit_key))) {
  28305. + assert("nikita-1791",
  28306. + keylt(args->key, unit_key_by_coord(coord, &unit_key)));
  28307. + args->not_found = 1;
  28308. + args->last_coord.between = AFTER_UNIT;
  28309. + return 0;
  28310. + }
  28311. +
  28312. + coord_dup(&args->last_coord, coord);
  28313. + /*
  28314. + * did scan just moved to the next node?
  28315. + */
  28316. + if (args->last_lh.node != lh->node) {
  28317. + int lock_result;
  28318. +
  28319. + /*
  28320. + * if so, lock new node with the mode requested by the caller
  28321. + */
  28322. + done_lh(&args->last_lh);
  28323. + assert("nikita-1896", znode_is_any_locked(lh->node));
  28324. + lock_result = longterm_lock_znode(&args->last_lh, lh->node,
  28325. + args->mode, ZNODE_LOCK_HIPRI);
  28326. + if (lock_result != 0)
  28327. + return lock_result;
  28328. + }
  28329. + return check_item(args->inode, coord, args->name);
  28330. +}
  28331. +
  28332. +/* Look for given @name within directory @dir.
  28333. +
  28334. + This is called during lookup, creation and removal of directory
  28335. + entries and on reiser4_rename_common
  28336. +
  28337. + First calculate key that directory entry for @name would have. Search
  28338. + for this key in the tree. If such key is found, scan all items with
  28339. + the same key, checking name in each directory entry along the way.
  28340. +*/
  28341. +int reiser4_find_entry(struct inode *dir, /* directory to scan */
  28342. + struct dentry *de, /* name to search for */
  28343. + lock_handle * lh, /* resulting lock handle */
  28344. + znode_lock_mode mode, /* required lock mode */
  28345. + reiser4_dir_entry_desc * entry /* parameters of found
  28346. + directory entry */)
  28347. +{
  28348. + const struct qstr *name;
  28349. + seal_t *seal;
  28350. + coord_t *coord;
  28351. + int result;
  28352. + __u32 flags;
  28353. + struct de_location *dec;
  28354. + struct reiser4_dentry_fsdata *fsdata;
  28355. +
  28356. + assert("nikita-1130", lh != NULL);
  28357. + assert("nikita-1128", dir != NULL);
  28358. +
  28359. + name = &de->d_name;
  28360. + assert("nikita-1129", name != NULL);
  28361. +
  28362. + /* dentry private data don't require lock, because dentry
  28363. + manipulations are protected by i_mutex on parent.
  28364. +
  28365. + This is not so for inodes, because there is no -the- parent in
  28366. + inode case.
  28367. + */
  28368. + fsdata = reiser4_get_dentry_fsdata(de);
  28369. + if (IS_ERR(fsdata))
  28370. + return PTR_ERR(fsdata);
  28371. + dec = &fsdata->dec;
  28372. +
  28373. + coord = &dec->entry_coord;
  28374. + coord_clear_iplug(coord);
  28375. + seal = &dec->entry_seal;
  28376. + /* compose key of directory entry for @name */
  28377. + inode_dir_plugin(dir)->build_entry_key(dir, name, &entry->key);
  28378. +
  28379. + if (reiser4_seal_is_set(seal)) {
  28380. + /* check seal */
  28381. + result = reiser4_seal_validate(seal, coord, &entry->key,
  28382. + lh, mode, ZNODE_LOCK_LOPRI);
  28383. + if (result == 0) {
  28384. + /* key was found. Check that it is really item we are
  28385. + looking for. */
  28386. + result = check_entry(dir, coord, name);
  28387. + if (result == 0)
  28388. + return 0;
  28389. + }
  28390. + }
  28391. + flags = (mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0;
  28392. + /*
  28393. + * find place in the tree where directory item should be located.
  28394. + */
  28395. + result = reiser4_object_lookup(dir, &entry->key, coord, lh, mode,
  28396. + FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL,
  28397. + flags, NULL/*ra_info */);
  28398. + if (result == CBK_COORD_FOUND) {
  28399. + struct entry_actor_args arg;
  28400. +
  28401. + /* fast path: no hash collisions */
  28402. + result = check_entry(dir, coord, name);
  28403. + if (result == 0) {
  28404. + reiser4_seal_init(seal, coord, &entry->key);
  28405. + dec->pos = 0;
  28406. + } else if (result > 0) {
  28407. + /* Iterate through all units with the same keys. */
  28408. + arg.name = name->name;
  28409. + arg.key = &entry->key;
  28410. + arg.not_found = 0;
  28411. + arg.non_uniq = 0;
  28412. +#if REISER4_USE_COLLISION_LIMIT
  28413. + arg.max_non_uniq = max_hash_collisions(dir);
  28414. + assert("nikita-2851", arg.max_non_uniq > 1);
  28415. +#endif
  28416. + arg.mode = mode;
  28417. + arg.inode = dir;
  28418. + coord_init_zero(&arg.last_coord);
  28419. + init_lh(&arg.last_lh);
  28420. +
  28421. + result = reiser4_iterate_tree
  28422. + (reiser4_tree_by_inode(dir),
  28423. + coord, lh,
  28424. + entry_actor, &arg, mode, 1);
  28425. + /* if end of the tree or extent was reached during
  28426. + scanning. */
  28427. + if (arg.not_found || (result == -E_NO_NEIGHBOR)) {
  28428. + /* step back */
  28429. + done_lh(lh);
  28430. +
  28431. + result = zload(arg.last_coord.node);
  28432. + if (result == 0) {
  28433. + coord_clear_iplug(&arg.last_coord);
  28434. + coord_dup(coord, &arg.last_coord);
  28435. + move_lh(lh, &arg.last_lh);
  28436. + result = RETERR(-ENOENT);
  28437. + zrelse(arg.last_coord.node);
  28438. + --arg.non_uniq;
  28439. + }
  28440. + }
  28441. +
  28442. + done_lh(&arg.last_lh);
  28443. + if (result == 0)
  28444. + reiser4_seal_init(seal, coord, &entry->key);
  28445. +
  28446. + if (result == 0 || result == -ENOENT) {
  28447. + assert("nikita-2580", arg.non_uniq > 0);
  28448. + dec->pos = arg.non_uniq - 1;
  28449. + }
  28450. + }
  28451. + } else
  28452. + dec->pos = -1;
  28453. + return result;
  28454. +}
  28455. +
  28456. +/*
  28457. + Local variables:
  28458. + c-indentation-style: "K&R"
  28459. + mode-name: "LC"
  28460. + c-basic-offset: 8
  28461. + tab-width: 8
  28462. + fill-column: 120
  28463. + scroll-step: 1
  28464. + End:
  28465. +*/
  28466. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/disk_format/disk_format40.c linux-5.16.14/fs/reiser4/plugin/disk_format/disk_format40.c
  28467. --- linux-5.16.14.orig/fs/reiser4/plugin/disk_format/disk_format40.c 1970-01-01 01:00:00.000000000 +0100
  28468. +++ linux-5.16.14/fs/reiser4/plugin/disk_format/disk_format40.c 2022-03-12 13:26:19.665892760 +0100
  28469. @@ -0,0 +1,668 @@
  28470. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  28471. +
  28472. +#include "../../debug.h"
  28473. +#include "../../dformat.h"
  28474. +#include "../../key.h"
  28475. +#include "../node/node.h"
  28476. +#include "../space/space_allocator.h"
  28477. +#include "disk_format40.h"
  28478. +#include "../plugin.h"
  28479. +#include "../../txnmgr.h"
  28480. +#include "../../jnode.h"
  28481. +#include "../../tree.h"
  28482. +#include "../../super.h"
  28483. +#include "../../wander.h"
  28484. +#include "../../inode.h"
  28485. +#include "../../ktxnmgrd.h"
  28486. +#include "../../status_flags.h"
  28487. +
  28488. +#include <linux/types.h> /* for __u?? */
  28489. +#include <linux/fs.h> /* for struct super_block */
  28490. +#include <linux/buffer_head.h>
  28491. +
  28492. +/* reiser 4.0 default disk layout */
  28493. +
  28494. +/* Amount of free blocks needed to perform release_format40 when fs gets
  28495. + mounted RW: 1 for SB, 1 for non-leaves in overwrite set, 2 for tx header
  28496. + & tx record. */
  28497. +#define RELEASE_RESERVED 4
  28498. +
  28499. +/* This flag indicates that backup should be updated
  28500. + (the update is performed by fsck) */
  28501. +#define FORMAT40_UPDATE_BACKUP (1 << 31)
  28502. +
  28503. +/* functions to access fields of format40_disk_super_block */
  28504. +static __u64 get_format40_block_count(const format40_disk_super_block * sb)
  28505. +{
  28506. + return le64_to_cpu(get_unaligned(&sb->block_count));
  28507. +}
  28508. +
  28509. +static __u64 get_format40_free_blocks(const format40_disk_super_block * sb)
  28510. +{
  28511. + return le64_to_cpu(get_unaligned(&sb->free_blocks));
  28512. +}
  28513. +
  28514. +static __u64 get_format40_root_block(const format40_disk_super_block * sb)
  28515. +{
  28516. + return le64_to_cpu(get_unaligned(&sb->root_block));
  28517. +}
  28518. +
  28519. +static __u16 get_format40_tree_height(const format40_disk_super_block * sb)
  28520. +{
  28521. + return le16_to_cpu(get_unaligned(&sb->tree_height));
  28522. +}
  28523. +
  28524. +static __u64 get_format40_file_count(const format40_disk_super_block * sb)
  28525. +{
  28526. + return le64_to_cpu(get_unaligned(&sb->file_count));
  28527. +}
  28528. +
  28529. +static __u64 get_format40_oid(const format40_disk_super_block * sb)
  28530. +{
  28531. + return le64_to_cpu(get_unaligned(&sb->oid));
  28532. +}
  28533. +
  28534. +static __u32 get_format40_mkfs_id(const format40_disk_super_block * sb)
  28535. +{
  28536. + return le32_to_cpu(get_unaligned(&sb->mkfs_id));
  28537. +}
  28538. +
  28539. +static __u32 get_format40_node_plugin_id(const format40_disk_super_block * sb)
  28540. +{
  28541. + return le32_to_cpu(get_unaligned(&sb->node_pid));
  28542. +}
  28543. +
  28544. +static __u64 get_format40_flags(const format40_disk_super_block * sb)
  28545. +{
  28546. + return le64_to_cpu(get_unaligned(&sb->flags));
  28547. +}
  28548. +
  28549. +static __u32 get_format40_version(const format40_disk_super_block * sb)
  28550. +{
  28551. + return le32_to_cpu(get_unaligned(&sb->version)) &
  28552. + ~FORMAT40_UPDATE_BACKUP;
  28553. +}
  28554. +
  28555. +static int update_backup_version(const format40_disk_super_block * sb)
  28556. +{
  28557. + return (le32_to_cpu(get_unaligned(&sb->version)) &
  28558. + FORMAT40_UPDATE_BACKUP);
  28559. +}
  28560. +
  28561. +static int update_disk_version_minor(const format40_disk_super_block * sb)
  28562. +{
  28563. + return (get_format40_version(sb) < get_release_number_minor());
  28564. +}
  28565. +
  28566. +static int incomplete_compatibility(const format40_disk_super_block * sb)
  28567. +{
  28568. + return (get_format40_version(sb) > get_release_number_minor());
  28569. +}
  28570. +
  28571. +static format40_super_info *get_sb_info(struct super_block *super)
  28572. +{
  28573. + return &get_super_private(super)->u.format40;
  28574. +}
  28575. +
  28576. +static int consult_diskmap(struct super_block *s)
  28577. +{
  28578. + format40_super_info *info;
  28579. + journal_location *jloc;
  28580. +
  28581. + info = get_sb_info(s);
  28582. + jloc = &get_super_private(s)->jloc;
  28583. + /* Default format-specific locations, if there is nothing in
  28584. + * diskmap */
  28585. + jloc->footer = FORMAT40_JOURNAL_FOOTER_BLOCKNR;
  28586. + jloc->header = FORMAT40_JOURNAL_HEADER_BLOCKNR;
  28587. + info->loc.super = FORMAT40_OFFSET / s->s_blocksize;
  28588. +#ifdef CONFIG_REISER4_BADBLOCKS
  28589. + reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JF,
  28590. + &jloc->footer);
  28591. + reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JH,
  28592. + &jloc->header);
  28593. + reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_SUPER,
  28594. + &info->loc.super);
  28595. +#endif
  28596. + return 0;
  28597. +}
  28598. +
  28599. +/* find any valid super block of disk_format40 (even if the first
  28600. + super block is destroyed), will change block numbers of actual journal header/footer (jf/jh)
  28601. + if needed */
  28602. +static struct buffer_head *find_a_disk_format40_super_block(struct super_block
  28603. + *s)
  28604. +{
  28605. + struct buffer_head *super_bh;
  28606. + format40_disk_super_block *disk_sb;
  28607. + format40_super_info *info;
  28608. +
  28609. + assert("umka-487", s != NULL);
  28610. +
  28611. + info = get_sb_info(s);
  28612. +
  28613. + super_bh = sb_bread(s, info->loc.super);
  28614. + if (super_bh == NULL)
  28615. + return ERR_PTR(RETERR(-EIO));
  28616. +
  28617. + disk_sb = (format40_disk_super_block *) super_bh->b_data;
  28618. + if (strncmp(disk_sb->magic, FORMAT40_MAGIC, sizeof(FORMAT40_MAGIC))) {
  28619. + brelse(super_bh);
  28620. + return ERR_PTR(RETERR(-EINVAL));
  28621. + }
  28622. +
  28623. + reiser4_set_block_count(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)));
  28624. + reiser4_set_data_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)) -
  28625. + le64_to_cpu(get_unaligned(&disk_sb->free_blocks)));
  28626. + reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->free_blocks)));
  28627. +
  28628. + return super_bh;
  28629. +}
  28630. +
  28631. +/* find the most recent version of super block. This is called after journal is
  28632. + replayed */
  28633. +static struct buffer_head *read_super_block(struct super_block *s UNUSED_ARG)
  28634. +{
  28635. + /* Here the most recent superblock copy has to be read. However, as
  28636. + journal replay isn't complete, we are using
  28637. + find_a_disk_format40_super_block() function. */
  28638. + return find_a_disk_format40_super_block(s);
  28639. +}
  28640. +
  28641. +static int get_super_jnode(struct super_block *s)
  28642. +{
  28643. + reiser4_super_info_data *sbinfo = get_super_private(s);
  28644. + jnode *sb_jnode;
  28645. + int ret;
  28646. +
  28647. + sb_jnode = reiser4_alloc_io_head(&get_sb_info(s)->loc.super);
  28648. +
  28649. + ret = jload(sb_jnode);
  28650. +
  28651. + if (ret) {
  28652. + reiser4_drop_io_head(sb_jnode);
  28653. + return ret;
  28654. + }
  28655. +
  28656. + pin_jnode_data(sb_jnode);
  28657. + jrelse(sb_jnode);
  28658. +
  28659. + sbinfo->u.format40.sb_jnode = sb_jnode;
  28660. +
  28661. + return 0;
  28662. +}
  28663. +
  28664. +static void done_super_jnode(struct super_block *s)
  28665. +{
  28666. + jnode *sb_jnode = get_super_private(s)->u.format40.sb_jnode;
  28667. +
  28668. + if (sb_jnode) {
  28669. + unpin_jnode_data(sb_jnode);
  28670. + reiser4_drop_io_head(sb_jnode);
  28671. + }
  28672. +}
  28673. +
  28674. +typedef enum format40_init_stage {
  28675. + NONE_DONE = 0,
  28676. + CONSULT_DISKMAP,
  28677. + FIND_A_SUPER,
  28678. + INIT_JOURNAL_INFO,
  28679. + INIT_STATUS,
  28680. + JOURNAL_REPLAY,
  28681. + READ_SUPER,
  28682. + KEY_CHECK,
  28683. + INIT_OID,
  28684. + INIT_TREE,
  28685. + JOURNAL_RECOVER,
  28686. + INIT_SA,
  28687. + INIT_JNODE,
  28688. + ALL_DONE
  28689. +} format40_init_stage;
  28690. +
  28691. +static format40_disk_super_block *copy_sb(const struct buffer_head *super_bh)
  28692. +{
  28693. + format40_disk_super_block *sb_copy;
  28694. +
  28695. + sb_copy = kmalloc(sizeof(format40_disk_super_block),
  28696. + reiser4_ctx_gfp_mask_get());
  28697. + if (sb_copy == NULL)
  28698. + return ERR_PTR(RETERR(-ENOMEM));
  28699. + memcpy(sb_copy, ((format40_disk_super_block *) super_bh->b_data),
  28700. + sizeof(format40_disk_super_block));
  28701. + return sb_copy;
  28702. +}
  28703. +
  28704. +static int check_key_format(const format40_disk_super_block *sb_copy)
  28705. +{
  28706. + if (!equi(REISER4_LARGE_KEY,
  28707. + get_format40_flags(sb_copy) & (1 << FORMAT40_LARGE_KEYS))) {
  28708. + warning("nikita-3228", "Key format mismatch. "
  28709. + "Only %s keys are supported.",
  28710. + REISER4_LARGE_KEY ? "large" : "small");
  28711. + return RETERR(-EINVAL);
  28712. + }
  28713. + return 0;
  28714. +}
  28715. +
  28716. +/**
  28717. + * try_init_format40
  28718. + * @super:
  28719. + * @stage:
  28720. + *
  28721. + */
  28722. +static int try_init_format40(struct super_block *super,
  28723. + format40_init_stage *stage)
  28724. +{
  28725. + int result;
  28726. + struct buffer_head *super_bh;
  28727. + reiser4_super_info_data *sbinfo;
  28728. + format40_disk_super_block *sb_copy;
  28729. + tree_level height;
  28730. + reiser4_block_nr root_block;
  28731. + node_plugin *nplug;
  28732. +
  28733. + assert("vs-475", super != NULL);
  28734. + assert("vs-474", get_super_private(super));
  28735. +
  28736. + *stage = NONE_DONE;
  28737. +
  28738. + result = consult_diskmap(super);
  28739. + if (result)
  28740. + return result;
  28741. + *stage = CONSULT_DISKMAP;
  28742. +
  28743. + super_bh = find_a_disk_format40_super_block(super);
  28744. + if (IS_ERR(super_bh))
  28745. + return PTR_ERR(super_bh);
  28746. + brelse(super_bh);
  28747. + *stage = FIND_A_SUPER;
  28748. +
  28749. + /* ok, we are sure that filesystem format is a format40 format */
  28750. +
  28751. + /* map jnodes for journal control blocks (header, footer) to disk */
  28752. + result = reiser4_init_journal_info(super);
  28753. + if (result)
  28754. + return result;
  28755. + *stage = INIT_JOURNAL_INFO;
  28756. +
  28757. + /* ok, we are sure that filesystem format is a format40 format */
  28758. + /* Now check it's state */
  28759. + result = reiser4_status_init(FORMAT40_STATUS_BLOCKNR);
  28760. + if (result != 0 && result != -EINVAL)
  28761. + /* -EINVAL means there is no magic, so probably just old
  28762. + * fs. */
  28763. + return result;
  28764. + *stage = INIT_STATUS;
  28765. +
  28766. + result = reiser4_status_query(NULL, NULL);
  28767. + if (result == REISER4_STATUS_MOUNT_WARN)
  28768. + notice("vpf-1363", "Warning: mounting %s with errors.",
  28769. + super->s_id);
  28770. + if (result == REISER4_STATUS_MOUNT_RO) {
  28771. + notice("vpf-1364", "Warning: mounting %s with fatal errors,"
  28772. + " forcing read-only mount.", super->s_id);
  28773. + super->s_flags |= SB_RDONLY;
  28774. + }
  28775. + result = reiser4_journal_replay(super);
  28776. + if (result)
  28777. + return result;
  28778. + *stage = JOURNAL_REPLAY;
  28779. +
  28780. + super_bh = read_super_block(super);
  28781. + if (IS_ERR(super_bh))
  28782. + return PTR_ERR(super_bh);
  28783. + *stage = READ_SUPER;
  28784. +
  28785. + /* allocate and make a copy of format40_disk_super_block */
  28786. + sb_copy = copy_sb(super_bh);
  28787. + brelse(super_bh);
  28788. +
  28789. + if (IS_ERR(sb_copy))
  28790. + return PTR_ERR(sb_copy);
  28791. + printk("reiser4: %s: found disk format 4.0.%u.\n",
  28792. + super->s_id,
  28793. + get_format40_version(sb_copy));
  28794. + if (incomplete_compatibility(sb_copy))
  28795. + printk("reiser4: %s: format version number (4.0.%u) is "
  28796. + "greater than release number (4.%u.%u) of reiser4 "
  28797. + "kernel module. Some objects of the volume can be "
  28798. + "inaccessible.\n",
  28799. + super->s_id,
  28800. + get_format40_version(sb_copy),
  28801. + get_release_number_major(),
  28802. + get_release_number_minor());
  28803. + /* make sure that key format of kernel and filesystem match */
  28804. + result = check_key_format(sb_copy);
  28805. + if (result) {
  28806. + kfree(sb_copy);
  28807. + return result;
  28808. + }
  28809. + *stage = KEY_CHECK;
  28810. +
  28811. + result = oid_init_allocator(super, get_format40_file_count(sb_copy),
  28812. + get_format40_oid(sb_copy));
  28813. + if (result) {
  28814. + kfree(sb_copy);
  28815. + return result;
  28816. + }
  28817. + *stage = INIT_OID;
  28818. +
  28819. + /* get things necessary to init reiser4_tree */
  28820. + root_block = get_format40_root_block(sb_copy);
  28821. + height = get_format40_tree_height(sb_copy);
  28822. + nplug = node_plugin_by_id(get_format40_node_plugin_id(sb_copy));
  28823. +
  28824. + /* initialize reiser4_super_info_data */
  28825. + sbinfo = get_super_private(super);
  28826. + assert("", sbinfo->tree.super == super);
  28827. + /* init reiser4_tree for the filesystem */
  28828. + result = reiser4_init_tree(&sbinfo->tree, &root_block, height, nplug);
  28829. + if (result) {
  28830. + kfree(sb_copy);
  28831. + return result;
  28832. + }
  28833. + *stage = INIT_TREE;
  28834. +
  28835. + /*
  28836. + * initialize reiser4_super_info_data with data from format40 super
  28837. + * block
  28838. + */
  28839. + sbinfo->default_uid = 0;
  28840. + sbinfo->default_gid = 0;
  28841. + sbinfo->mkfs_id = get_format40_mkfs_id(sb_copy);
  28842. + /* number of blocks in filesystem and reserved space */
  28843. + reiser4_set_block_count(super, get_format40_block_count(sb_copy));
  28844. + sbinfo->blocks_free = get_format40_free_blocks(sb_copy);
  28845. + sbinfo->version = get_format40_version(sb_copy);
  28846. +
  28847. + if (update_backup_version(sb_copy))
  28848. + printk("reiser4: %s: use 'fsck.reiser4 --fix' "
  28849. + "to complete disk format upgrade.\n", super->s_id);
  28850. + kfree(sb_copy);
  28851. +
  28852. + sbinfo->fsuid = 0;
  28853. + sbinfo->fs_flags |= (1 << REISER4_ADG); /* hard links for directories
  28854. + * are not supported */
  28855. + sbinfo->fs_flags |= (1 << REISER4_ONE_NODE_PLUGIN); /* all nodes in
  28856. + * layout 40 are
  28857. + * of one
  28858. + * plugin */
  28859. + /* sbinfo->tmgr is initialized already */
  28860. +
  28861. + /* recover sb data which were logged separately from sb block */
  28862. +
  28863. + /* NOTE-NIKITA: reiser4_journal_recover_sb_data() calls
  28864. + * oid_init_allocator() and reiser4_set_free_blocks() with new
  28865. + * data. What's the reason to call them above? */
  28866. + result = reiser4_journal_recover_sb_data(super);
  28867. + if (result != 0)
  28868. + return result;
  28869. + *stage = JOURNAL_RECOVER;
  28870. +
  28871. + /*
  28872. + * Set number of used blocks. The number of used blocks is not stored
  28873. + * neither in on-disk super block nor in the journal footer blocks. At
  28874. + * this moment actual values of total blocks and free block counters
  28875. + * are set in the reiser4 super block (in-memory structure) and we can
  28876. + * calculate number of used blocks from them.
  28877. + */
  28878. + reiser4_set_data_blocks(super,
  28879. + reiser4_block_count(super) -
  28880. + reiser4_free_blocks(super));
  28881. +
  28882. +#if REISER4_DEBUG
  28883. + sbinfo->min_blocks_used = 16 /* reserved area */ +
  28884. + 2 /* super blocks */ +
  28885. + 2 /* journal footer and header */ ;
  28886. +#endif
  28887. +
  28888. + /* init disk space allocator */
  28889. + result = sa_init_allocator(reiser4_get_space_allocator(super),
  28890. + super, NULL);
  28891. + if (result)
  28892. + return result;
  28893. + *stage = INIT_SA;
  28894. +
  28895. + result = get_super_jnode(super);
  28896. + if (result == 0)
  28897. + *stage = ALL_DONE;
  28898. + return result;
  28899. +}
  28900. +
  28901. +/* plugin->u.format.get_ready */
  28902. +int init_format_format40(struct super_block *s, void *data UNUSED_ARG)
  28903. +{
  28904. + int result;
  28905. + format40_init_stage stage;
  28906. +
  28907. + result = try_init_format40(s, &stage);
  28908. + switch (stage) {
  28909. + case ALL_DONE:
  28910. + assert("nikita-3458", result == 0);
  28911. + break;
  28912. + case INIT_JNODE:
  28913. + done_super_jnode(s);
  28914. + fallthrough;
  28915. + case INIT_SA:
  28916. + sa_destroy_allocator(reiser4_get_space_allocator(s), s);
  28917. + fallthrough;
  28918. + case JOURNAL_RECOVER:
  28919. + case INIT_TREE:
  28920. + reiser4_done_tree(&get_super_private(s)->tree);
  28921. + fallthrough;
  28922. + case INIT_OID:
  28923. + case KEY_CHECK:
  28924. + case READ_SUPER:
  28925. + case JOURNAL_REPLAY:
  28926. + case INIT_STATUS:
  28927. + reiser4_status_finish();
  28928. + fallthrough;
  28929. + case INIT_JOURNAL_INFO:
  28930. + reiser4_done_journal_info(s);
  28931. + case FIND_A_SUPER:
  28932. + case CONSULT_DISKMAP:
  28933. + case NONE_DONE:
  28934. + break;
  28935. + default:
  28936. + impossible("nikita-3457", "init stage: %i", stage);
  28937. + }
  28938. +
  28939. + if (!sb_rdonly(s) && reiser4_free_blocks(s) < RELEASE_RESERVED)
  28940. + return RETERR(-ENOSPC);
  28941. +
  28942. + return result;
  28943. +}
  28944. +
  28945. +static void pack_format40_super(const struct super_block *s, char *data)
  28946. +{
  28947. + format40_disk_super_block *super_data =
  28948. + (format40_disk_super_block *) data;
  28949. +
  28950. + reiser4_super_info_data *sbinfo = get_super_private(s);
  28951. +
  28952. + assert("zam-591", data != NULL);
  28953. +
  28954. + put_unaligned(cpu_to_le64(reiser4_free_committed_blocks(s)),
  28955. + &super_data->free_blocks);
  28956. +
  28957. + put_unaligned(cpu_to_le64(sbinfo->tree.root_block),
  28958. + &super_data->root_block);
  28959. +
  28960. + put_unaligned(cpu_to_le64(oid_next(s)),
  28961. + &super_data->oid);
  28962. +
  28963. + put_unaligned(cpu_to_le64(oids_used(s)),
  28964. + &super_data->file_count);
  28965. +
  28966. + put_unaligned(cpu_to_le16(sbinfo->tree.height),
  28967. + &super_data->tree_height);
  28968. +
  28969. + if (update_disk_version_minor(super_data)) {
  28970. + __u32 version = PLUGIN_LIBRARY_VERSION | FORMAT40_UPDATE_BACKUP;
  28971. +
  28972. + put_unaligned(cpu_to_le32(version), &super_data->version);
  28973. + }
  28974. +}
  28975. +
  28976. +/* plugin->u.format.log_super
  28977. + return a jnode which should be added to transaction when the super block
  28978. + gets logged */
  28979. +jnode *log_super_format40(struct super_block *s)
  28980. +{
  28981. + jnode *sb_jnode;
  28982. +
  28983. + sb_jnode = get_super_private(s)->u.format40.sb_jnode;
  28984. +
  28985. + jload(sb_jnode);
  28986. +
  28987. + pack_format40_super(s, jdata(sb_jnode));
  28988. +
  28989. + jrelse(sb_jnode);
  28990. +
  28991. + return sb_jnode;
  28992. +}
  28993. +
  28994. +/* plugin->u.format.release */
  28995. +int release_format40(struct super_block *s)
  28996. +{
  28997. + int ret;
  28998. + reiser4_super_info_data *sbinfo;
  28999. +
  29000. + sbinfo = get_super_private(s);
  29001. + assert("zam-579", sbinfo != NULL);
  29002. +
  29003. + if (!sb_rdonly(s)) {
  29004. + ret = reiser4_capture_super_block(s);
  29005. + if (ret != 0)
  29006. + warning("vs-898",
  29007. + "reiser4_capture_super_block failed: %d",
  29008. + ret);
  29009. +
  29010. + ret = txnmgr_force_commit_all(s, 1);
  29011. + if (ret != 0)
  29012. + warning("jmacd-74438", "txn_force failed: %d", ret);
  29013. +
  29014. + all_grabbed2free();
  29015. + }
  29016. +
  29017. + sa_destroy_allocator(&sbinfo->space_allocator, s);
  29018. + reiser4_done_journal_info(s);
  29019. + done_super_jnode(s);
  29020. +
  29021. + rcu_barrier();
  29022. + reiser4_done_tree(&sbinfo->tree);
  29023. + /* call finish_rcu(), because some znode were "released" in
  29024. + * reiser4_done_tree(). */
  29025. + rcu_barrier();
  29026. +
  29027. + return 0;
  29028. +}
  29029. +
  29030. +#define FORMAT40_ROOT_LOCALITY 41
  29031. +#define FORMAT40_ROOT_OBJECTID 42
  29032. +
  29033. +/* plugin->u.format.root_dir_key */
  29034. +const reiser4_key *root_dir_key_format40(const struct super_block *super
  29035. + UNUSED_ARG)
  29036. +{
  29037. + static const reiser4_key FORMAT40_ROOT_DIR_KEY = {
  29038. + .el = {
  29039. + __constant_cpu_to_le64((FORMAT40_ROOT_LOCALITY << 4) | KEY_SD_MINOR),
  29040. +#if REISER4_LARGE_KEY
  29041. + ON_LARGE_KEY(0ull,)
  29042. +#endif
  29043. + __constant_cpu_to_le64(FORMAT40_ROOT_OBJECTID),
  29044. + 0ull
  29045. + }
  29046. + };
  29047. +
  29048. + return &FORMAT40_ROOT_DIR_KEY;
  29049. +}
  29050. +
  29051. +/* plugin->u.format.check_open.
  29052. + Check the opened object for validness. For now it checks for the valid oid &
  29053. + locality only, can be improved later and it its work may depend on the mount
  29054. + options. */
  29055. +int check_open_format40(const struct inode *object)
  29056. +{
  29057. + oid_t max, oid;
  29058. +
  29059. + max = oid_next(object->i_sb) - 1;
  29060. +
  29061. + /* Check the oid. */
  29062. + oid = get_inode_oid(object);
  29063. + if (oid > max) {
  29064. + warning("vpf-1360", "The object with the oid %llu "
  29065. + "greater then the max used oid %llu found.",
  29066. + (unsigned long long)oid, (unsigned long long)max);
  29067. +
  29068. + return RETERR(-EIO);
  29069. + }
  29070. +
  29071. + /* Check the locality. */
  29072. + oid = reiser4_inode_data(object)->locality_id;
  29073. + if (oid > max) {
  29074. + warning("vpf-1361", "The object with the locality %llu "
  29075. + "greater then the max used oid %llu found.",
  29076. + (unsigned long long)oid, (unsigned long long)max);
  29077. +
  29078. + return RETERR(-EIO);
  29079. + }
  29080. +
  29081. + return 0;
  29082. +}
  29083. +
  29084. +/*
  29085. + * plugin->u.format.version_update
  29086. + * Upgrade minor disk format version number
  29087. + */
  29088. +int version_update_format40(struct super_block *super) {
  29089. + txn_handle * trans;
  29090. + lock_handle lh;
  29091. + txn_atom *atom;
  29092. + int ret;
  29093. +
  29094. + /* Nothing to do if RO mount or the on-disk version is not less. */
  29095. + if (sb_rdonly(super))
  29096. + return 0;
  29097. +
  29098. + if (get_super_private(super)->version >= get_release_number_minor())
  29099. + return 0;
  29100. +
  29101. + printk("reiser4: %s: upgrading disk format to 4.0.%u.\n",
  29102. + super->s_id,
  29103. + get_release_number_minor());
  29104. + printk("reiser4: %s: use 'fsck.reiser4 --fix' "
  29105. + "to complete disk format upgrade.\n", super->s_id);
  29106. +
  29107. + /* Mark the uber znode dirty to call log_super on write_logs. */
  29108. + init_lh(&lh);
  29109. + ret = get_uber_znode(reiser4_get_tree(super), ZNODE_WRITE_LOCK,
  29110. + ZNODE_LOCK_HIPRI, &lh);
  29111. + if (ret != 0)
  29112. + return ret;
  29113. +
  29114. + znode_make_dirty(lh.node);
  29115. + done_lh(&lh);
  29116. +
  29117. + /* Update the backup blocks. */
  29118. +
  29119. + /* Force write_logs immediately. */
  29120. + trans = get_current_context()->trans;
  29121. + atom = get_current_atom_locked();
  29122. + assert("vpf-1906", atom != NULL);
  29123. +
  29124. + spin_lock_txnh(trans);
  29125. + return force_commit_atom(trans);
  29126. +}
  29127. +
  29128. +/* Make Linus happy.
  29129. + Local variables:
  29130. + c-indentation-style: "K&R"
  29131. + mode-name: "LC"
  29132. + c-basic-offset: 8
  29133. + tab-width: 8
  29134. + fill-column: 120
  29135. + scroll-step: 1
  29136. + End:
  29137. +*/
  29138. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/disk_format/disk_format40.h linux-5.16.14/fs/reiser4/plugin/disk_format/disk_format40.h
  29139. --- linux-5.16.14.orig/fs/reiser4/plugin/disk_format/disk_format40.h 1970-01-01 01:00:00.000000000 +0100
  29140. +++ linux-5.16.14/fs/reiser4/plugin/disk_format/disk_format40.h 2022-03-12 13:26:19.666892763 +0100
  29141. @@ -0,0 +1,111 @@
  29142. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  29143. +
  29144. +/* this file contains:
  29145. + - definition of ondisk super block of standart disk layout for
  29146. + reiser 4.0 (layout 40)
  29147. + - definition of layout 40 specific portion of in-core super block
  29148. + - declarations of functions implementing methods of layout plugin
  29149. + for layout 40
  29150. + - declarations of functions used to get/set fields in layout 40 super block
  29151. +*/
  29152. +
  29153. +#ifndef __DISK_FORMAT40_H__
  29154. +#define __DISK_FORMAT40_H__
  29155. +
  29156. +/* magic for default reiser4 layout */
  29157. +#define FORMAT40_MAGIC "ReIsEr40FoRmAt"
  29158. +#define FORMAT40_OFFSET (REISER4_MASTER_OFFSET + PAGE_SIZE)
  29159. +
  29160. +#include "../../dformat.h"
  29161. +
  29162. +#include <linux/fs.h> /* for struct super_block */
  29163. +
  29164. +typedef enum {
  29165. + FORMAT40_LARGE_KEYS
  29166. +} format40_flags;
  29167. +
  29168. +/* ondisk super block for format 40. It is 512 bytes long */
  29169. +typedef struct format40_disk_super_block {
  29170. + /* 0 */ d64 block_count;
  29171. + /* number of block in a filesystem */
  29172. + /* 8 */ d64 free_blocks;
  29173. + /* number of free blocks */
  29174. + /* 16 */ d64 root_block;
  29175. + /* filesystem tree root block */
  29176. + /* 24 */ d64 oid;
  29177. + /* smallest free objectid */
  29178. + /* 32 */ d64 file_count;
  29179. + /* number of files in a filesystem */
  29180. + /* 40 */ d64 flushes;
  29181. + /* number of times super block was
  29182. + flushed. Needed if format 40
  29183. + will have few super blocks */
  29184. + /* 48 */ d32 mkfs_id;
  29185. + /* unique identifier of fs */
  29186. + /* 52 */ char magic[16];
  29187. + /* magic string ReIsEr40FoRmAt */
  29188. + /* 68 */ d16 tree_height;
  29189. + /* height of filesystem tree */
  29190. + /* 70 */ d16 formatting_policy;
  29191. + /* not used anymore */
  29192. + /* 72 */ d64 flags;
  29193. + /* 80 */ d32 version;
  29194. + /* on-disk format version number
  29195. + initially assigned by mkfs as the greatest format40
  29196. + version number supported by reiser4progs and updated
  29197. + in mount time in accordance with the greatest format40
  29198. + version number supported by kernel.
  29199. + Is used by fsck to catch possible corruption and
  29200. + for various compatibility issues */
  29201. + /* 84 */ d32 node_pid;
  29202. + /* node plugin id */
  29203. + /* 88 */ char not_used[424];
  29204. +} format40_disk_super_block;
  29205. +
  29206. +/* format 40 specific part of reiser4_super_info_data */
  29207. +typedef struct format40_super_info {
  29208. +/* format40_disk_super_block actual_sb; */
  29209. + jnode *sb_jnode;
  29210. + struct {
  29211. + reiser4_block_nr super;
  29212. + } loc;
  29213. +} format40_super_info;
  29214. +
  29215. +/* Defines for journal header and footer respectively. */
  29216. +#define FORMAT40_JOURNAL_HEADER_BLOCKNR \
  29217. + ((REISER4_MASTER_OFFSET / PAGE_SIZE) + 3)
  29218. +
  29219. +#define FORMAT40_JOURNAL_FOOTER_BLOCKNR \
  29220. + ((REISER4_MASTER_OFFSET / PAGE_SIZE) + 4)
  29221. +
  29222. +#define FORMAT40_STATUS_BLOCKNR \
  29223. + ((REISER4_MASTER_OFFSET / PAGE_SIZE) + 5)
  29224. +
  29225. +/* Diskmap declarations */
  29226. +#define FORMAT40_PLUGIN_DISKMAP_ID ((REISER4_FORMAT_PLUGIN_TYPE<<16) | (FORMAT40_ID))
  29227. +#define FORMAT40_SUPER 1
  29228. +#define FORMAT40_JH 2
  29229. +#define FORMAT40_JF 3
  29230. +
  29231. +/* declarations of functions implementing methods of layout plugin for
  29232. + format 40. The functions theirself are in disk_format40.c */
  29233. +extern int init_format_format40(struct super_block *, void *data);
  29234. +extern const reiser4_key *root_dir_key_format40(const struct super_block *);
  29235. +extern int release_format40(struct super_block *s);
  29236. +extern jnode *log_super_format40(struct super_block *s);
  29237. +extern int check_open_format40(const struct inode *object);
  29238. +extern int version_update_format40(struct super_block *super);
  29239. +
  29240. +/* __DISK_FORMAT40_H__ */
  29241. +#endif
  29242. +
  29243. +/* Make Linus happy.
  29244. + Local variables:
  29245. + c-indentation-style: "K&R"
  29246. + mode-name: "LC"
  29247. + c-basic-offset: 8
  29248. + tab-width: 8
  29249. + fill-column: 120
  29250. + scroll-step: 1
  29251. + End:
  29252. +*/
  29253. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/disk_format/disk_format.c linux-5.16.14/fs/reiser4/plugin/disk_format/disk_format.c
  29254. --- linux-5.16.14.orig/fs/reiser4/plugin/disk_format/disk_format.c 1970-01-01 01:00:00.000000000 +0100
  29255. +++ linux-5.16.14/fs/reiser4/plugin/disk_format/disk_format.c 2022-03-12 13:26:19.665892760 +0100
  29256. @@ -0,0 +1,38 @@
  29257. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  29258. +
  29259. +#include "../../debug.h"
  29260. +#include "../plugin_header.h"
  29261. +#include "disk_format40.h"
  29262. +#include "disk_format.h"
  29263. +#include "../plugin.h"
  29264. +
  29265. +/* initialization of disk layout plugins */
  29266. +disk_format_plugin format_plugins[LAST_FORMAT_ID] = {
  29267. + [FORMAT40_ID] = {
  29268. + .h = {
  29269. + .type_id = REISER4_FORMAT_PLUGIN_TYPE,
  29270. + .id = FORMAT40_ID,
  29271. + .pops = NULL,
  29272. + .label = "reiser40",
  29273. + .desc = "standard disk layout for reiser40",
  29274. + .linkage = {NULL, NULL}
  29275. + },
  29276. + .init_format = init_format_format40,
  29277. + .root_dir_key = root_dir_key_format40,
  29278. + .release = release_format40,
  29279. + .log_super = log_super_format40,
  29280. + .check_open = check_open_format40,
  29281. + .version_update = version_update_format40
  29282. + }
  29283. +};
  29284. +
  29285. +/* Make Linus happy.
  29286. + Local variables:
  29287. + c-indentation-style: "K&R"
  29288. + mode-name: "LC"
  29289. + c-basic-offset: 8
  29290. + tab-width: 8
  29291. + fill-column: 120
  29292. + scroll-step: 1
  29293. + End:
  29294. +*/
  29295. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/disk_format/disk_format.h linux-5.16.14/fs/reiser4/plugin/disk_format/disk_format.h
  29296. --- linux-5.16.14.orig/fs/reiser4/plugin/disk_format/disk_format.h 1970-01-01 01:00:00.000000000 +0100
  29297. +++ linux-5.16.14/fs/reiser4/plugin/disk_format/disk_format.h 2022-03-12 13:26:19.665892760 +0100
  29298. @@ -0,0 +1,27 @@
  29299. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  29300. +
  29301. +/* identifiers for disk layouts, they are also used as indexes in array of disk
  29302. + plugins */
  29303. +
  29304. +#if !defined( __REISER4_DISK_FORMAT_H__ )
  29305. +#define __REISER4_DISK_FORMAT_H__
  29306. +
  29307. +typedef enum {
  29308. + /* standard reiser4 disk layout plugin id */
  29309. + FORMAT40_ID,
  29310. + LAST_FORMAT_ID
  29311. +} disk_format_id;
  29312. +
  29313. +/* __REISER4_DISK_FORMAT_H__ */
  29314. +#endif
  29315. +
  29316. +/* Make Linus happy.
  29317. + Local variables:
  29318. + c-indentation-style: "K&R"
  29319. + mode-name: "LC"
  29320. + c-basic-offset: 8
  29321. + tab-width: 8
  29322. + fill-column: 120
  29323. + scroll-step: 1
  29324. + End:
  29325. +*/
  29326. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/disk_format/Makefile linux-5.16.14/fs/reiser4/plugin/disk_format/Makefile
  29327. --- linux-5.16.14.orig/fs/reiser4/plugin/disk_format/Makefile 1970-01-01 01:00:00.000000000 +0100
  29328. +++ linux-5.16.14/fs/reiser4/plugin/disk_format/Makefile 2022-03-12 13:26:19.664892758 +0100
  29329. @@ -0,0 +1,8 @@
  29330. +
  29331. +MODULE := df_plugins
  29332. +
  29333. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  29334. +
  29335. +$(MODULE)-objs += \
  29336. + disk_format40.o \
  29337. + disk_format.o
  29338. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/fibration.c linux-5.16.14/fs/reiser4/plugin/fibration.c
  29339. --- linux-5.16.14.orig/fs/reiser4/plugin/fibration.c 1970-01-01 01:00:00.000000000 +0100
  29340. +++ linux-5.16.14/fs/reiser4/plugin/fibration.c 2022-03-12 13:26:19.666892763 +0100
  29341. @@ -0,0 +1,175 @@
  29342. +/* Copyright 2004 by Hans Reiser, licensing governed by
  29343. + * reiser4/README */
  29344. +
  29345. +/* Directory fibrations */
  29346. +
  29347. +/*
  29348. + * Suppose we have a directory tree with sources of some project. During
  29349. + * compilation .o files are created within this tree. This makes access
  29350. + * to the original source files less efficient, because source files are
  29351. + * now "diluted" by object files: default directory plugin uses prefix
  29352. + * of a file name as a part of the key for directory entry (and this
  29353. + * part is also inherited by the key of file body). This means that
  29354. + * foo.o will be located close to foo.c and foo.h in the tree.
  29355. + *
  29356. + * To avoid this effect directory plugin fill highest 7 (unused
  29357. + * originally) bits of the second component of the directory entry key
  29358. + * by bit-pattern depending on the file name (see
  29359. + * fs/reiser4/kassign.c:build_entry_key_common()). These bits are called
  29360. + * "fibre". Fibre of the file name key is inherited by key of stat data
  29361. + * and keys of file body (in the case of REISER4_LARGE_KEY).
  29362. + *
  29363. + * Fibre for a given file is chosen by per-directory fibration
  29364. + * plugin. Names within given fibre are ordered lexicographically.
  29365. + */
  29366. +
  29367. +#include "../debug.h"
  29368. +#include "plugin_header.h"
  29369. +#include "plugin.h"
  29370. +#include "../super.h"
  29371. +#include "../inode.h"
  29372. +
  29373. +#include <linux/types.h>
  29374. +
  29375. +static const int fibre_shift = 57;
  29376. +
  29377. +#define FIBRE_NO(n) (((__u64)(n)) << fibre_shift)
  29378. +
  29379. +/*
  29380. + * Trivial fibration: all files of directory are just ordered
  29381. + * lexicographically.
  29382. + */
  29383. +static __u64 fibre_trivial(const struct inode *dir, const char *name, int len)
  29384. +{
  29385. + return FIBRE_NO(0);
  29386. +}
  29387. +
  29388. +/*
  29389. + * dot-o fibration: place .o files after all others.
  29390. + */
  29391. +static __u64 fibre_dot_o(const struct inode *dir, const char *name, int len)
  29392. +{
  29393. + /* special treatment for .*\.o */
  29394. + if (len > 2 && name[len - 1] == 'o' && name[len - 2] == '.')
  29395. + return FIBRE_NO(1);
  29396. + else
  29397. + return FIBRE_NO(0);
  29398. +}
  29399. +
  29400. +/*
  29401. + * ext.1 fibration: subdivide directory into 128 fibrations one for each
  29402. + * 7bit extension character (file "foo.h" goes into fibre "h"), plus
  29403. + * default fibre for the rest.
  29404. + */
  29405. +static __u64 fibre_ext_1(const struct inode *dir, const char *name, int len)
  29406. +{
  29407. + if (len > 2 && name[len - 2] == '.')
  29408. + return FIBRE_NO(name[len - 1]);
  29409. + else
  29410. + return FIBRE_NO(0);
  29411. +}
  29412. +
  29413. +/*
  29414. + * ext.3 fibration: try to separate files with different 3-character
  29415. + * extensions from each other.
  29416. + */
  29417. +static __u64 fibre_ext_3(const struct inode *dir, const char *name, int len)
  29418. +{
  29419. + if (len > 4 && name[len - 4] == '.')
  29420. + return FIBRE_NO(name[len - 3] + name[len - 2] + name[len - 1]);
  29421. + else
  29422. + return FIBRE_NO(0);
  29423. +}
  29424. +
  29425. +static int change_fibration(struct inode *inode,
  29426. + reiser4_plugin * plugin,
  29427. + pset_member memb)
  29428. +{
  29429. + int result;
  29430. +
  29431. + assert("nikita-3503", inode != NULL);
  29432. + assert("nikita-3504", plugin != NULL);
  29433. +
  29434. + assert("nikita-3505", is_reiser4_inode(inode));
  29435. + assert("nikita-3506", inode_dir_plugin(inode) != NULL);
  29436. + assert("nikita-3507",
  29437. + plugin->h.type_id == REISER4_FIBRATION_PLUGIN_TYPE);
  29438. +
  29439. + result = 0;
  29440. + if (inode_fibration_plugin(inode) == NULL ||
  29441. + inode_fibration_plugin(inode)->h.id != plugin->h.id) {
  29442. + if (is_dir_empty(inode) == 0)
  29443. + result = aset_set_unsafe(&reiser4_inode_data(inode)->pset,
  29444. + PSET_FIBRATION, plugin);
  29445. + else
  29446. + result = RETERR(-ENOTEMPTY);
  29447. +
  29448. + }
  29449. + return result;
  29450. +}
  29451. +
  29452. +static reiser4_plugin_ops fibration_plugin_ops = {
  29453. + .init = NULL,
  29454. + .load = NULL,
  29455. + .save_len = NULL,
  29456. + .save = NULL,
  29457. + .change = change_fibration
  29458. +};
  29459. +
  29460. +/* fibration plugins */
  29461. +fibration_plugin fibration_plugins[LAST_FIBRATION_ID] = {
  29462. + [FIBRATION_LEXICOGRAPHIC] = {
  29463. + .h = {
  29464. + .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
  29465. + .id = FIBRATION_LEXICOGRAPHIC,
  29466. + .pops = &fibration_plugin_ops,
  29467. + .label = "lexicographic",
  29468. + .desc = "no fibration",
  29469. + .linkage = {NULL, NULL}
  29470. + },
  29471. + .fibre = fibre_trivial
  29472. + },
  29473. + [FIBRATION_DOT_O] = {
  29474. + .h = {
  29475. + .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
  29476. + .id = FIBRATION_DOT_O,
  29477. + .pops = &fibration_plugin_ops,
  29478. + .label = "dot-o",
  29479. + .desc = "fibrate .o files separately",
  29480. + .linkage = {NULL, NULL}
  29481. + },
  29482. + .fibre = fibre_dot_o
  29483. + },
  29484. + [FIBRATION_EXT_1] = {
  29485. + .h = {
  29486. + .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
  29487. + .id = FIBRATION_EXT_1,
  29488. + .pops = &fibration_plugin_ops,
  29489. + .label = "ext-1",
  29490. + .desc = "fibrate file by single character extension",
  29491. + .linkage = {NULL, NULL}
  29492. + },
  29493. + .fibre = fibre_ext_1
  29494. + },
  29495. + [FIBRATION_EXT_3] = {
  29496. + .h = {
  29497. + .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
  29498. + .id = FIBRATION_EXT_3,
  29499. + .pops = &fibration_plugin_ops,
  29500. + .label = "ext-3",
  29501. + .desc = "fibrate file by three character extension",
  29502. + .linkage = {NULL, NULL}
  29503. + },
  29504. + .fibre = fibre_ext_3
  29505. + }
  29506. +};
  29507. +
  29508. +/*
  29509. + * Local variables:
  29510. + * c-indentation-style: "K&R"
  29511. + * mode-name: "LC"
  29512. + * c-basic-offset: 8
  29513. + * tab-width: 8
  29514. + * fill-column: 79
  29515. + * End:
  29516. + */
  29517. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/fibration.h linux-5.16.14/fs/reiser4/plugin/fibration.h
  29518. --- linux-5.16.14.orig/fs/reiser4/plugin/fibration.h 1970-01-01 01:00:00.000000000 +0100
  29519. +++ linux-5.16.14/fs/reiser4/plugin/fibration.h 2022-03-12 13:26:19.666892763 +0100
  29520. @@ -0,0 +1,37 @@
  29521. +/* Copyright 2004 by Hans Reiser, licensing governed by reiser4/README */
  29522. +
  29523. +/* Fibration plugin used by hashed directory plugin to segment content
  29524. + * of directory. See fs/reiser4/plugin/fibration.c for more on this. */
  29525. +
  29526. +#if !defined(__FS_REISER4_PLUGIN_FIBRATION_H__)
  29527. +#define __FS_REISER4_PLUGIN_FIBRATION_H__
  29528. +
  29529. +#include "plugin_header.h"
  29530. +
  29531. +typedef struct fibration_plugin {
  29532. + /* generic fields */
  29533. + plugin_header h;
  29534. +
  29535. + __u64(*fibre) (const struct inode *dir, const char *name, int len);
  29536. +} fibration_plugin;
  29537. +
  29538. +typedef enum {
  29539. + FIBRATION_LEXICOGRAPHIC,
  29540. + FIBRATION_DOT_O,
  29541. + FIBRATION_EXT_1,
  29542. + FIBRATION_EXT_3,
  29543. + LAST_FIBRATION_ID
  29544. +} reiser4_fibration_id;
  29545. +
  29546. +/* __FS_REISER4_PLUGIN_FIBRATION_H__ */
  29547. +#endif
  29548. +
  29549. +/* Make Linus happy.
  29550. + Local variables:
  29551. + c-indentation-style: "K&R"
  29552. + mode-name: "LC"
  29553. + c-basic-offset: 8
  29554. + tab-width: 8
  29555. + fill-column: 120
  29556. + End:
  29557. +*/
  29558. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/cryptcompress.c linux-5.16.14/fs/reiser4/plugin/file/cryptcompress.c
  29559. --- linux-5.16.14.orig/fs/reiser4/plugin/file/cryptcompress.c 1970-01-01 01:00:00.000000000 +0100
  29560. +++ linux-5.16.14/fs/reiser4/plugin/file/cryptcompress.c 2022-03-12 13:26:19.668892768 +0100
  29561. @@ -0,0 +1,3805 @@
  29562. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  29563. + reiser4/README */
  29564. +/*
  29565. + * Written by Edward Shishkin.
  29566. + *
  29567. + * Implementations of inode/file/address_space operations
  29568. + * specific for cryptcompress file plugin which manages
  29569. + * regular files built of compressed and(or) encrypted bodies.
  29570. + * See http://dev.namesys.com/CryptcompressPlugin for details.
  29571. + */
  29572. +
  29573. +#include "../../inode.h"
  29574. +#include "../cluster.h"
  29575. +#include "../object.h"
  29576. +#include "../../tree_walk.h"
  29577. +#include "cryptcompress.h"
  29578. +
  29579. +#include <linux/pagevec.h>
  29580. +#include <linux/swap.h>
  29581. +#include <linux/writeback.h>
  29582. +#include <linux/random.h>
  29583. +#include <linux/scatterlist.h>
  29584. +
  29585. +/*
  29586. + Managing primary and secondary caches by Reiser4
  29587. + cryptcompress file plugin. Synchronization scheme.
  29588. +
  29589. +
  29590. + +------------------+
  29591. + +------------------->| tfm stream |
  29592. + | | (compressed data)|
  29593. + flush | +------------------+
  29594. + +-----------------+ |
  29595. + |(->)longterm lock| V
  29596. +--+ writepages() | | +-***-+ reiser4 +---+
  29597. + | | +--+ | *** | storage tree | |
  29598. + | | | +-***-+ (primary cache)| |
  29599. +u | write() (secondary| cache) V / | \ | |
  29600. +s | ----> +----+ +----+ +----+ +----+ +-***** ******* **----+ ----> | d |
  29601. +e | | | |page cluster | | | **disk cluster** | | i |
  29602. +r | <---- +----+ +----+ +----+ +----+ +-***** **********----+ <---- | s |
  29603. + | read() ^ ^ | | k |
  29604. + | | (->)longterm lock| | page_io()| |
  29605. + | | +------+ | |
  29606. +--+ readpages() | | +---+
  29607. + | V
  29608. + | +------------------+
  29609. + +--------------------| tfm stream |
  29610. + | (plain text) |
  29611. + +------------------+
  29612. +*/
  29613. +
  29614. +/* get cryptcompress specific portion of inode */
  29615. +struct cryptcompress_info *cryptcompress_inode_data(const struct inode *inode)
  29616. +{
  29617. + return &reiser4_inode_data(inode)->file_plugin_data.cryptcompress_info;
  29618. +}
  29619. +
  29620. +/* plugin->u.file.init_inode_data */
  29621. +void init_inode_data_cryptcompress(struct inode *inode,
  29622. + reiser4_object_create_data * crd,
  29623. + int create)
  29624. +{
  29625. + struct cryptcompress_info *data;
  29626. +
  29627. + data = cryptcompress_inode_data(inode);
  29628. + assert("edward-685", data != NULL);
  29629. +
  29630. + memset(data, 0, sizeof(*data));
  29631. +
  29632. + mutex_init(&data->checkin_mutex);
  29633. + data->trunc_index = ULONG_MAX;
  29634. + turn_on_compression(data);
  29635. + set_lattice_factor(data, MIN_LATTICE_FACTOR);
  29636. + init_inode_ordering(inode, crd, create);
  29637. +}
  29638. +
  29639. +/* The following is a part of reiser4 cipher key manager
  29640. + which is called when opening/creating a cryptcompress file */
  29641. +
  29642. +/* get/set cipher key info */
  29643. +struct reiser4_crypto_info * inode_crypto_info (struct inode * inode)
  29644. +{
  29645. + assert("edward-90", inode != NULL);
  29646. + assert("edward-91", reiser4_inode_data(inode) != NULL);
  29647. + return cryptcompress_inode_data(inode)->crypt;
  29648. +}
  29649. +
  29650. +static void set_inode_crypto_info (struct inode * inode,
  29651. + struct reiser4_crypto_info * info)
  29652. +{
  29653. + cryptcompress_inode_data(inode)->crypt = info;
  29654. +}
  29655. +
  29656. +/* allocate a cipher key info */
  29657. +struct reiser4_crypto_info * reiser4_alloc_crypto_info (struct inode * inode)
  29658. +{
  29659. + struct reiser4_crypto_info *info;
  29660. + int fipsize;
  29661. +
  29662. + info = kzalloc(sizeof(*info), reiser4_ctx_gfp_mask_get());
  29663. + if (!info)
  29664. + return ERR_PTR(-ENOMEM);
  29665. +
  29666. + fipsize = inode_digest_plugin(inode)->fipsize;
  29667. + info->keyid = kmalloc(fipsize, reiser4_ctx_gfp_mask_get());
  29668. + if (!info->keyid) {
  29669. + kfree(info);
  29670. + return ERR_PTR(-ENOMEM);
  29671. + }
  29672. + info->host = inode;
  29673. + return info;
  29674. +}
  29675. +
  29676. +#if 0
  29677. +/* allocate/free low-level info for cipher and digest
  29678. + transforms */
  29679. +static int alloc_crypto_tfms(struct reiser4_crypto_info * info)
  29680. +{
  29681. + struct crypto_blkcipher * ctfm = NULL;
  29682. + struct crypto_hash * dtfm = NULL;
  29683. + cipher_plugin * cplug = inode_cipher_plugin(info->host);
  29684. + digest_plugin * dplug = inode_digest_plugin(info->host);
  29685. +
  29686. + if (cplug->alloc) {
  29687. + ctfm = cplug->alloc();
  29688. + if (IS_ERR(ctfm)) {
  29689. + warning("edward-1364",
  29690. + "Can not allocate info for %s\n",
  29691. + cplug->h.desc);
  29692. + return RETERR(PTR_ERR(ctfm));
  29693. + }
  29694. + }
  29695. + info_set_cipher(info, ctfm);
  29696. + if (dplug->alloc) {
  29697. + dtfm = dplug->alloc();
  29698. + if (IS_ERR(dtfm)) {
  29699. + warning("edward-1365",
  29700. + "Can not allocate info for %s\n",
  29701. + dplug->h.desc);
  29702. + goto unhappy_with_digest;
  29703. + }
  29704. + }
  29705. + info_set_digest(info, dtfm);
  29706. + return 0;
  29707. + unhappy_with_digest:
  29708. + if (cplug->free) {
  29709. + cplug->free(ctfm);
  29710. + info_set_cipher(info, NULL);
  29711. + }
  29712. + return RETERR(PTR_ERR(dtfm));
  29713. +}
  29714. +#endif
  29715. +
  29716. +static void
  29717. +free_crypto_tfms(struct reiser4_crypto_info * info)
  29718. +{
  29719. + assert("edward-1366", info != NULL);
  29720. + if (!info_get_cipher(info)) {
  29721. + assert("edward-1601", !info_get_digest(info));
  29722. + return;
  29723. + }
  29724. + inode_cipher_plugin(info->host)->free(info_get_cipher(info));
  29725. + info_set_cipher(info, NULL);
  29726. + inode_digest_plugin(info->host)->free(info_get_digest(info));
  29727. + info_set_digest(info, NULL);
  29728. + return;
  29729. +}
  29730. +
  29731. +#if 0
  29732. +/* create a key fingerprint for disk stat-data */
  29733. +static int create_keyid (struct reiser4_crypto_info * info,
  29734. + struct reiser4_crypto_data * data)
  29735. +{
  29736. + int ret = -ENOMEM;
  29737. + size_t blk, pad;
  29738. + __u8 * dmem;
  29739. + __u8 * cmem;
  29740. + struct hash_desc ddesc;
  29741. + struct blkcipher_desc cdesc;
  29742. + struct scatterlist sg;
  29743. +
  29744. + assert("edward-1367", info != NULL);
  29745. + assert("edward-1368", info->keyid != NULL);
  29746. +
  29747. + ddesc.tfm = info_get_digest(info);
  29748. + ddesc.flags = 0;
  29749. + cdesc.tfm = info_get_cipher(info);
  29750. + cdesc.flags = 0;
  29751. +
  29752. + dmem = kmalloc((size_t)crypto_hash_digestsize(ddesc.tfm),
  29753. + reiser4_ctx_gfp_mask_get());
  29754. + if (!dmem)
  29755. + goto exit1;
  29756. +
  29757. + blk = crypto_blkcipher_blocksize(cdesc.tfm);
  29758. +
  29759. + pad = data->keyid_size % blk;
  29760. + pad = (pad ? blk - pad : 0);
  29761. +
  29762. + cmem = kmalloc((size_t)data->keyid_size + pad,
  29763. + reiser4_ctx_gfp_mask_get());
  29764. + if (!cmem)
  29765. + goto exit2;
  29766. + memcpy(cmem, data->keyid, data->keyid_size);
  29767. + memset(cmem + data->keyid_size, 0, pad);
  29768. +
  29769. + sg_init_one(&sg, cmem, data->keyid_size + pad);
  29770. +
  29771. + ret = crypto_blkcipher_encrypt(&cdesc, &sg, &sg,
  29772. + data->keyid_size + pad);
  29773. + if (ret) {
  29774. + warning("edward-1369",
  29775. + "encryption failed flags=%x\n", cdesc.flags);
  29776. + goto exit3;
  29777. + }
  29778. + ret = crypto_hash_digest(&ddesc, &sg, sg.length, dmem);
  29779. + if (ret) {
  29780. + warning("edward-1602",
  29781. + "digest failed flags=%x\n", ddesc.flags);
  29782. + goto exit3;
  29783. + }
  29784. + memcpy(info->keyid, dmem, inode_digest_plugin(info->host)->fipsize);
  29785. + exit3:
  29786. + kfree(cmem);
  29787. + exit2:
  29788. + kfree(dmem);
  29789. + exit1:
  29790. + return ret;
  29791. +}
  29792. +#endif
  29793. +
  29794. +static void destroy_keyid(struct reiser4_crypto_info * info)
  29795. +{
  29796. + assert("edward-1370", info != NULL);
  29797. + assert("edward-1371", info->keyid != NULL);
  29798. + kfree(info->keyid);
  29799. + return;
  29800. +}
  29801. +
  29802. +static void __free_crypto_info (struct inode * inode)
  29803. +{
  29804. + struct reiser4_crypto_info * info = inode_crypto_info(inode);
  29805. + assert("edward-1372", info != NULL);
  29806. +
  29807. + free_crypto_tfms(info);
  29808. + destroy_keyid(info);
  29809. + kfree(info);
  29810. +}
  29811. +
  29812. +#if 0
  29813. +static void instantiate_crypto_info(struct reiser4_crypto_info * info)
  29814. +{
  29815. + assert("edward-1373", info != NULL);
  29816. + assert("edward-1374", info->inst == 0);
  29817. + info->inst = 1;
  29818. +}
  29819. +#endif
  29820. +
  29821. +static void uninstantiate_crypto_info(struct reiser4_crypto_info * info)
  29822. +{
  29823. + assert("edward-1375", info != NULL);
  29824. + info->inst = 0;
  29825. +}
  29826. +
  29827. +#if 0
  29828. +static int is_crypto_info_instantiated(struct reiser4_crypto_info * info)
  29829. +{
  29830. + return info->inst;
  29831. +}
  29832. +
  29833. +static int inode_has_cipher_key(struct inode * inode)
  29834. +{
  29835. + assert("edward-1376", inode != NULL);
  29836. + return inode_crypto_info(inode) &&
  29837. + is_crypto_info_instantiated(inode_crypto_info(inode));
  29838. +}
  29839. +#endif
  29840. +
  29841. +static void free_crypto_info (struct inode * inode)
  29842. +{
  29843. + uninstantiate_crypto_info(inode_crypto_info(inode));
  29844. + __free_crypto_info(inode);
  29845. +}
  29846. +
  29847. +static int need_cipher(struct inode * inode)
  29848. +{
  29849. + return inode_cipher_plugin(inode) !=
  29850. + cipher_plugin_by_id(NONE_CIPHER_ID);
  29851. +}
  29852. +
  29853. +/* Parse @data which contains a (uninstantiated) cipher key imported
  29854. + from user space, create a low-level cipher info and attach it to
  29855. + the @object. If success, then info contains an instantiated key */
  29856. +#if 0
  29857. +struct reiser4_crypto_info * create_crypto_info(struct inode * object,
  29858. + struct reiser4_crypto_data * data)
  29859. +{
  29860. + int ret;
  29861. + struct reiser4_crypto_info * info;
  29862. +
  29863. + assert("edward-1377", data != NULL);
  29864. + assert("edward-1378", need_cipher(object));
  29865. +
  29866. + if (inode_file_plugin(object) !=
  29867. + file_plugin_by_id(DIRECTORY_FILE_PLUGIN_ID))
  29868. + return ERR_PTR(-EINVAL);
  29869. +
  29870. + info = reiser4_alloc_crypto_info(object);
  29871. + if (IS_ERR(info))
  29872. + return info;
  29873. + ret = alloc_crypto_tfms(info);
  29874. + if (ret)
  29875. + goto err;
  29876. + /* instantiating a key */
  29877. + ret = crypto_blkcipher_setkey(info_get_cipher(info),
  29878. + data->key,
  29879. + data->keysize);
  29880. + if (ret) {
  29881. + warning("edward-1379",
  29882. + "setkey failed flags=%x",
  29883. + crypto_blkcipher_get_flags(info_get_cipher(info)));
  29884. + goto err;
  29885. + }
  29886. + info->keysize = data->keysize;
  29887. + ret = create_keyid(info, data);
  29888. + if (ret)
  29889. + goto err;
  29890. + instantiate_crypto_info(info);
  29891. + return info;
  29892. + err:
  29893. + __free_crypto_info(object);
  29894. + return ERR_PTR(ret);
  29895. +}
  29896. +#endif
  29897. +
  29898. +/* increment/decrement a load counter when
  29899. + attaching/detaching the crypto-stat to any object */
  29900. +static void load_crypto_info(struct reiser4_crypto_info * info)
  29901. +{
  29902. + assert("edward-1380", info != NULL);
  29903. + inc_keyload_count(info);
  29904. +}
  29905. +
  29906. +static void unload_crypto_info(struct inode * inode)
  29907. +{
  29908. + struct reiser4_crypto_info * info = inode_crypto_info(inode);
  29909. + assert("edward-1381", info->keyload_count > 0);
  29910. +
  29911. + dec_keyload_count(inode_crypto_info(inode));
  29912. + if (info->keyload_count == 0)
  29913. + /* final release */
  29914. + free_crypto_info(inode);
  29915. +}
  29916. +
  29917. +/* attach/detach an existing crypto-stat */
  29918. +void reiser4_attach_crypto_info(struct inode * inode,
  29919. + struct reiser4_crypto_info * info)
  29920. +{
  29921. + assert("edward-1382", inode != NULL);
  29922. + assert("edward-1383", info != NULL);
  29923. + assert("edward-1384", inode_crypto_info(inode) == NULL);
  29924. +
  29925. + set_inode_crypto_info(inode, info);
  29926. + load_crypto_info(info);
  29927. +}
  29928. +
  29929. +/* returns true, if crypto stat can be attached to the @host */
  29930. +#if REISER4_DEBUG
  29931. +static int host_allows_crypto_info(struct inode * host)
  29932. +{
  29933. + int ret;
  29934. + file_plugin * fplug = inode_file_plugin(host);
  29935. +
  29936. + switch (fplug->h.id) {
  29937. + case CRYPTCOMPRESS_FILE_PLUGIN_ID:
  29938. + ret = 1;
  29939. + break;
  29940. + default:
  29941. + ret = 0;
  29942. + }
  29943. + return ret;
  29944. +}
  29945. +#endif /* REISER4_DEBUG */
  29946. +
  29947. +static void reiser4_detach_crypto_info(struct inode * inode)
  29948. +{
  29949. + assert("edward-1385", inode != NULL);
  29950. + assert("edward-1386", host_allows_crypto_info(inode));
  29951. +
  29952. + if (inode_crypto_info(inode))
  29953. + unload_crypto_info(inode);
  29954. + set_inode_crypto_info(inode, NULL);
  29955. +}
  29956. +
  29957. +#if 0
  29958. +
  29959. +/* compare fingerprints of @child and @parent */
  29960. +static int keyid_eq(struct reiser4_crypto_info * child,
  29961. + struct reiser4_crypto_info * parent)
  29962. +{
  29963. + return !memcmp(child->keyid,
  29964. + parent->keyid,
  29965. + info_digest_plugin(parent)->fipsize);
  29966. +}
  29967. +
  29968. +/* check if a crypto-stat (which is bound to @parent) can be inherited */
  29969. +int can_inherit_crypto_cryptcompress(struct inode *child, struct inode *parent)
  29970. +{
  29971. + if (!need_cipher(child))
  29972. + return 0;
  29973. + /* the child is created */
  29974. + if (!inode_crypto_info(child))
  29975. + return 1;
  29976. + /* the child is looked up */
  29977. + if (!inode_crypto_info(parent))
  29978. + return 0;
  29979. + return (inode_cipher_plugin(child) == inode_cipher_plugin(parent) &&
  29980. + inode_digest_plugin(child) == inode_digest_plugin(parent) &&
  29981. + inode_crypto_info(child)->keysize ==
  29982. + inode_crypto_info(parent)->keysize &&
  29983. + keyid_eq(inode_crypto_info(child), inode_crypto_info(parent)));
  29984. +}
  29985. +#endif
  29986. +
  29987. +/* helper functions for ->create() method of the cryptcompress plugin */
  29988. +static int inode_set_crypto(struct inode * object)
  29989. +{
  29990. + reiser4_inode * info;
  29991. + if (!inode_crypto_info(object)) {
  29992. + if (need_cipher(object))
  29993. + return RETERR(-EINVAL);
  29994. + /* the file is not to be encrypted */
  29995. + return 0;
  29996. + }
  29997. + info = reiser4_inode_data(object);
  29998. + info->extmask |= (1 << CRYPTO_STAT);
  29999. + return 0;
  30000. +}
  30001. +
  30002. +static int inode_init_compression(struct inode * object)
  30003. +{
  30004. + int result = 0;
  30005. + assert("edward-1461", object != NULL);
  30006. + if (inode_compression_plugin(object)->init)
  30007. + result = inode_compression_plugin(object)->init();
  30008. + return result;
  30009. +}
  30010. +
  30011. +static int inode_check_cluster(struct inode * object)
  30012. +{
  30013. + assert("edward-696", object != NULL);
  30014. +
  30015. + if (unlikely(inode_cluster_size(object) < PAGE_SIZE)) {
  30016. + warning("edward-1320", "Can not support '%s' "
  30017. + "logical clusters (less then page size)",
  30018. + inode_cluster_plugin(object)->h.label);
  30019. + return RETERR(-EINVAL);
  30020. + }
  30021. + if (unlikely(inode_cluster_shift(object)) >= BITS_PER_BYTE*sizeof(int)){
  30022. + warning("edward-1463", "Can not support '%s' "
  30023. + "logical clusters (too big for transform)",
  30024. + inode_cluster_plugin(object)->h.label);
  30025. + return RETERR(-EINVAL);
  30026. + }
  30027. + return 0;
  30028. +}
  30029. +
  30030. +/* plugin->destroy_inode() */
  30031. +void destroy_inode_cryptcompress(struct inode * inode)
  30032. +{
  30033. + assert("edward-1464", INODE_PGCOUNT(inode) == 0);
  30034. + reiser4_detach_crypto_info(inode);
  30035. + return;
  30036. +}
  30037. +
  30038. +/* plugin->create_object():
  30039. +. install plugins
  30040. +. attach crypto info if specified
  30041. +. attach compression info if specified
  30042. +. attach cluster info
  30043. +*/
  30044. +int create_object_cryptcompress(struct inode *object, struct inode *parent,
  30045. + reiser4_object_create_data * data)
  30046. +{
  30047. + int result;
  30048. + reiser4_inode *info;
  30049. +
  30050. + assert("edward-23", object != NULL);
  30051. + assert("edward-24", parent != NULL);
  30052. + assert("edward-30", data != NULL);
  30053. + assert("edward-26", reiser4_inode_get_flag(object, REISER4_NO_SD));
  30054. + assert("edward-27", data->id == CRYPTCOMPRESS_FILE_PLUGIN_ID);
  30055. +
  30056. + info = reiser4_inode_data(object);
  30057. +
  30058. + assert("edward-29", info != NULL);
  30059. +
  30060. + /* set file bit */
  30061. + info->plugin_mask |= (1 << PSET_FILE);
  30062. +
  30063. + /* set crypto */
  30064. + result = inode_set_crypto(object);
  30065. + if (result)
  30066. + goto error;
  30067. + /* set compression */
  30068. + result = inode_init_compression(object);
  30069. + if (result)
  30070. + goto error;
  30071. + /* set cluster */
  30072. + result = inode_check_cluster(object);
  30073. + if (result)
  30074. + goto error;
  30075. +
  30076. + /* save everything in disk stat-data */
  30077. + result = write_sd_by_inode_common(object);
  30078. + if (!result)
  30079. + return 0;
  30080. + error:
  30081. + reiser4_detach_crypto_info(object);
  30082. + return result;
  30083. +}
  30084. +
  30085. +/* plugin->open() */
  30086. +int open_cryptcompress(struct inode * inode, struct file * file)
  30087. +{
  30088. + return 0;
  30089. +}
  30090. +
  30091. +#if REISER4_CRYPTO
  30092. +/* returns a blocksize, the attribute of a cipher algorithm */
  30093. +static unsigned int
  30094. +cipher_blocksize(struct inode * inode)
  30095. +{
  30096. + assert("edward-758", need_cipher(inode));
  30097. + assert("edward-1400", inode_crypto_info(inode) != NULL);
  30098. + return crypto_blkcipher_blocksize
  30099. + (info_get_cipher(inode_crypto_info(inode)));
  30100. +}
  30101. +
  30102. +/* returns offset translated by scale factor of the crypto-algorithm */
  30103. +static loff_t inode_scaled_offset (struct inode * inode,
  30104. + const loff_t src_off /* input offset */)
  30105. +{
  30106. + assert("edward-97", inode != NULL);
  30107. +
  30108. + if (!need_cipher(inode) ||
  30109. + src_off == get_key_offset(reiser4_min_key()) ||
  30110. + src_off == get_key_offset(reiser4_max_key()))
  30111. + return src_off;
  30112. +
  30113. + return inode_cipher_plugin(inode)->scale(inode,
  30114. + cipher_blocksize(inode),
  30115. + src_off);
  30116. +}
  30117. +#else
  30118. +#define inode_scaled_offset(__inode, __off) __off
  30119. +#endif
  30120. +
  30121. +/* returns disk cluster size */
  30122. +size_t inode_scaled_cluster_size(struct inode * inode)
  30123. +{
  30124. + assert("edward-110", inode != NULL);
  30125. +
  30126. + return inode_scaled_offset(inode, inode_cluster_size(inode));
  30127. +}
  30128. +
  30129. +/* set number of cluster pages */
  30130. +static void set_cluster_nrpages(struct cluster_handle * clust,
  30131. + struct inode *inode)
  30132. +{
  30133. + struct reiser4_slide * win;
  30134. +
  30135. + assert("edward-180", clust != NULL);
  30136. + assert("edward-1040", inode != NULL);
  30137. +
  30138. + clust->old_nrpages = size_in_pages(lbytes(clust->index, inode));
  30139. + win = clust->win;
  30140. + if (!win) {
  30141. + clust->nr_pages = size_in_pages(lbytes(clust->index, inode));
  30142. + return;
  30143. + }
  30144. + assert("edward-1176", clust->op != LC_INVAL);
  30145. + assert("edward-1064", win->off + win->count + win->delta != 0);
  30146. +
  30147. + if (win->stat == HOLE_WINDOW &&
  30148. + win->off == 0 && win->count == inode_cluster_size(inode)) {
  30149. + /* special case: writing a "fake" logical cluster */
  30150. + clust->nr_pages = 0;
  30151. + return;
  30152. + }
  30153. + clust->nr_pages = size_in_pages(max(win->off + win->count + win->delta,
  30154. + lbytes(clust->index, inode)));
  30155. + return;
  30156. +}
  30157. +
  30158. +/* plugin->key_by_inode()
  30159. + build key of a disk cluster */
  30160. +int key_by_inode_cryptcompress(struct inode *inode, loff_t off,
  30161. + reiser4_key * key)
  30162. +{
  30163. + assert("edward-64", inode != 0);
  30164. +
  30165. + if (likely(off != get_key_offset(reiser4_max_key())))
  30166. + off = off_to_clust_to_off(off, inode);
  30167. + if (inode_crypto_info(inode))
  30168. + off = inode_scaled_offset(inode, off);
  30169. +
  30170. + key_by_inode_and_offset_common(inode, 0, key);
  30171. + set_key_offset(key, (__u64)off);
  30172. + return 0;
  30173. +}
  30174. +
  30175. +/* plugin->flow_by_inode() */
  30176. +/* flow is used to read/write disk clusters */
  30177. +int flow_by_inode_cryptcompress(struct inode *inode, const char __user * buf,
  30178. + int user, /* 1: @buf is of user space,
  30179. + 0: kernel space */
  30180. + loff_t size, /* @buf size */
  30181. + loff_t off, /* offset to start io from */
  30182. + rw_op op, /* READ or WRITE */
  30183. + flow_t * f /* resulting flow */)
  30184. +{
  30185. + assert("edward-436", f != NULL);
  30186. + assert("edward-149", inode != NULL);
  30187. + assert("edward-150", inode_file_plugin(inode) != NULL);
  30188. + assert("edward-1465", user == 0); /* we use flow to read/write
  30189. + disk clusters located in
  30190. + kernel space */
  30191. + f->length = size;
  30192. + memcpy(&f->data, &buf, sizeof(buf));
  30193. + f->user = user;
  30194. + f->op = op;
  30195. +
  30196. + return key_by_inode_cryptcompress(inode, off, &f->key);
  30197. +}
  30198. +
  30199. +static int
  30200. +cryptcompress_hint_validate(hint_t * hint, const reiser4_key * key,
  30201. + znode_lock_mode lock_mode)
  30202. +{
  30203. + coord_t *coord;
  30204. +
  30205. + assert("edward-704", hint != NULL);
  30206. + assert("edward-1089", !hint_is_valid(hint));
  30207. + assert("edward-706", hint->lh.owner == NULL);
  30208. +
  30209. + coord = &hint->ext_coord.coord;
  30210. +
  30211. + if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
  30212. + /* hint either not set or set by different operation */
  30213. + return RETERR(-E_REPEAT);
  30214. +
  30215. + if (get_key_offset(key) != hint->offset)
  30216. + /* hint is set for different key */
  30217. + return RETERR(-E_REPEAT);
  30218. +
  30219. + assert("edward-707", reiser4_schedulable());
  30220. +
  30221. + return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord,
  30222. + key, &hint->lh, lock_mode,
  30223. + ZNODE_LOCK_LOPRI);
  30224. +}
  30225. +
  30226. +/* reserve disk space when writing a logical cluster */
  30227. +static int reserve4cluster(struct inode *inode, struct cluster_handle *clust)
  30228. +{
  30229. + int result = 0;
  30230. +
  30231. + assert("edward-965", reiser4_schedulable());
  30232. + assert("edward-439", inode != NULL);
  30233. + assert("edward-440", clust != NULL);
  30234. + assert("edward-441", clust->pages != NULL);
  30235. +
  30236. + if (clust->nr_pages == 0) {
  30237. + assert("edward-1152", clust->win != NULL);
  30238. + assert("edward-1153", clust->win->stat == HOLE_WINDOW);
  30239. + /* don't reserve disk space for fake logical cluster */
  30240. + return 0;
  30241. + }
  30242. + assert("edward-442", jprivate(clust->pages[0]) != NULL);
  30243. +
  30244. + result = reiser4_grab_space_force(estimate_insert_cluster(inode) +
  30245. + estimate_update_cluster(inode),
  30246. + BA_CAN_COMMIT);
  30247. + if (result)
  30248. + return result;
  30249. + clust->reserved = 1;
  30250. + grabbed2cluster_reserved(estimate_insert_cluster(inode) +
  30251. + estimate_update_cluster(inode));
  30252. +#if REISER4_DEBUG
  30253. + clust->reserved_prepped = estimate_update_cluster(inode);
  30254. + clust->reserved_unprepped = estimate_insert_cluster(inode);
  30255. +#endif
  30256. + /* there can be space grabbed by txnmgr_force_commit_all */
  30257. + return 0;
  30258. +}
  30259. +
  30260. +/* free reserved disk space if writing a logical cluster fails */
  30261. +static void free_reserved4cluster(struct inode *inode,
  30262. + struct cluster_handle *ch, int count)
  30263. +{
  30264. + assert("edward-967", ch->reserved == 1);
  30265. +
  30266. + cluster_reserved2free(count);
  30267. + ch->reserved = 0;
  30268. +}
  30269. +
  30270. +/*
  30271. + * The core search procedure of the cryptcompress plugin.
  30272. + * If returned value is not cbk_errored, then current position
  30273. + * is locked.
  30274. + */
  30275. +static int find_cluster_item(hint_t * hint,
  30276. + const reiser4_key * key, /* key of the item we are
  30277. + looking for */
  30278. + znode_lock_mode lock_mode /* which lock */ ,
  30279. + ra_info_t * ra_info, lookup_bias bias, __u32 flags)
  30280. +{
  30281. + int result;
  30282. + reiser4_key ikey;
  30283. + coord_t *coord = &hint->ext_coord.coord;
  30284. + coord_t orig = *coord;
  30285. +
  30286. + assert("edward-152", hint != NULL);
  30287. +
  30288. + if (!hint_is_valid(hint)) {
  30289. + result = cryptcompress_hint_validate(hint, key, lock_mode);
  30290. + if (result == -E_REPEAT)
  30291. + goto traverse_tree;
  30292. + else if (result) {
  30293. + assert("edward-1216", 0);
  30294. + return result;
  30295. + }
  30296. + hint_set_valid(hint);
  30297. + }
  30298. + assert("edward-709", znode_is_any_locked(coord->node));
  30299. + /*
  30300. + * Hint is valid, so we perform in-place lookup.
  30301. + * It means we just need to check if the next item in
  30302. + * the tree (relative to the current position @coord)
  30303. + * has key @key.
  30304. + *
  30305. + * Valid hint means in particular, that node is not
  30306. + * empty and at least one its item has been processed
  30307. + */
  30308. + if (equal_to_rdk(coord->node, key)) {
  30309. + /*
  30310. + * Look for the item in the right neighbor
  30311. + */
  30312. + lock_handle lh_right;
  30313. +
  30314. + init_lh(&lh_right);
  30315. + result = reiser4_get_right_neighbor(&lh_right, coord->node,
  30316. + znode_is_wlocked(coord->node) ?
  30317. + ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
  30318. + GN_CAN_USE_UPPER_LEVELS);
  30319. + if (result) {
  30320. + done_lh(&lh_right);
  30321. + reiser4_unset_hint(hint);
  30322. + if (result == -E_NO_NEIGHBOR)
  30323. + return RETERR(-EIO);
  30324. + return result;
  30325. + }
  30326. + assert("edward-1218",
  30327. + equal_to_ldk(lh_right.node, key));
  30328. + result = zload(lh_right.node);
  30329. + if (result) {
  30330. + done_lh(&lh_right);
  30331. + reiser4_unset_hint(hint);
  30332. + return result;
  30333. + }
  30334. + coord_init_first_unit_nocheck(coord, lh_right.node);
  30335. +
  30336. + if (!coord_is_existing_item(coord)) {
  30337. + zrelse(lh_right.node);
  30338. + done_lh(&lh_right);
  30339. + goto traverse_tree;
  30340. + }
  30341. + item_key_by_coord(coord, &ikey);
  30342. + zrelse(coord->node);
  30343. + if (unlikely(!keyeq(key, &ikey))) {
  30344. + warning("edward-1608",
  30345. + "Expected item not found. Fsck?");
  30346. + done_lh(&lh_right);
  30347. + goto not_found;
  30348. + }
  30349. + /*
  30350. + * item has been found in the right neighbor;
  30351. + * move lock to the right
  30352. + */
  30353. + done_lh(&hint->lh);
  30354. + move_lh(&hint->lh, &lh_right);
  30355. +
  30356. + dclust_inc_extension_ncount(hint);
  30357. +
  30358. + return CBK_COORD_FOUND;
  30359. + } else {
  30360. + /*
  30361. + * Look for the item in the current node
  30362. + */
  30363. + coord->item_pos++;
  30364. + coord->unit_pos = 0;
  30365. + coord->between = AT_UNIT;
  30366. +
  30367. + result = zload(coord->node);
  30368. + if (result) {
  30369. + done_lh(&hint->lh);
  30370. + return result;
  30371. + }
  30372. + if (!coord_is_existing_item(coord)) {
  30373. + zrelse(coord->node);
  30374. + goto not_found;
  30375. + }
  30376. + item_key_by_coord(coord, &ikey);
  30377. + zrelse(coord->node);
  30378. + if (!keyeq(key, &ikey))
  30379. + goto not_found;
  30380. + /*
  30381. + * item has been found in the current node
  30382. + */
  30383. + dclust_inc_extension_ncount(hint);
  30384. +
  30385. + return CBK_COORD_FOUND;
  30386. + }
  30387. + not_found:
  30388. + /*
  30389. + * The tree doesn't contain an item with @key;
  30390. + * roll back the coord
  30391. + */
  30392. + *coord = orig;
  30393. + ON_DEBUG(coord_update_v(coord));
  30394. + return CBK_COORD_NOTFOUND;
  30395. +
  30396. + traverse_tree:
  30397. +
  30398. + reiser4_unset_hint(hint);
  30399. + dclust_init_extension(hint);
  30400. + coord_init_zero(coord);
  30401. +
  30402. + assert("edward-713", hint->lh.owner == NULL);
  30403. + assert("edward-714", reiser4_schedulable());
  30404. +
  30405. + result = coord_by_key(current_tree, key, coord, &hint->lh,
  30406. + lock_mode, bias, LEAF_LEVEL, LEAF_LEVEL,
  30407. + CBK_UNIQUE | flags, ra_info);
  30408. + if (cbk_errored(result))
  30409. + return result;
  30410. + if(result == CBK_COORD_FOUND)
  30411. + dclust_inc_extension_ncount(hint);
  30412. + hint_set_valid(hint);
  30413. + return result;
  30414. +}
  30415. +
  30416. +#if REISER4_CRYPTO
  30417. +
  30418. +/* This function is called by deflate[inflate] manager when
  30419. + creating a transformed/plain stream to check if we should
  30420. + create/cut some overhead. If this returns true, then @oh
  30421. + contains the size of this overhead.
  30422. + */
  30423. +static int need_cut_or_align(struct inode * inode,
  30424. + struct cluster_handle * ch, rw_op rw, int * oh)
  30425. +{
  30426. + struct tfm_cluster * tc = &ch->tc;
  30427. + switch (rw) {
  30428. + case WRITE_OP: /* estimate align */
  30429. + *oh = tc->len % cipher_blocksize(inode);
  30430. + if (*oh != 0)
  30431. + return 1;
  30432. + break;
  30433. + case READ_OP: /* estimate cut */
  30434. + *oh = *(tfm_output_data(ch) + tc->len - 1);
  30435. + break;
  30436. + default:
  30437. + impossible("edward-1401", "bad option");
  30438. + }
  30439. + return (tc->len != tc->lsize);
  30440. +}
  30441. +
  30442. +/* create/cut an overhead of transformed/plain stream */
  30443. +static void align_or_cut_overhead(struct inode * inode,
  30444. + struct cluster_handle * ch, rw_op rw)
  30445. +{
  30446. + unsigned int oh;
  30447. + cipher_plugin * cplug = inode_cipher_plugin(inode);
  30448. +
  30449. + assert("edward-1402", need_cipher(inode));
  30450. +
  30451. + if (!need_cut_or_align(inode, ch, rw, &oh))
  30452. + return;
  30453. + switch (rw) {
  30454. + case WRITE_OP: /* do align */
  30455. + ch->tc.len +=
  30456. + cplug->align_stream(tfm_input_data(ch) +
  30457. + ch->tc.len, ch->tc.len,
  30458. + cipher_blocksize(inode));
  30459. + *(tfm_input_data(ch) + ch->tc.len - 1) =
  30460. + cipher_blocksize(inode) - oh;
  30461. + break;
  30462. + case READ_OP: /* do cut */
  30463. + assert("edward-1403", oh <= cipher_blocksize(inode));
  30464. + ch->tc.len -= oh;
  30465. + break;
  30466. + default:
  30467. + impossible("edward-1404", "bad option");
  30468. + }
  30469. + return;
  30470. +}
  30471. +
  30472. +static unsigned max_cipher_overhead(struct inode * inode)
  30473. +{
  30474. + if (!need_cipher(inode) || !inode_cipher_plugin(inode)->align_stream)
  30475. + return 0;
  30476. + return cipher_blocksize(inode);
  30477. +}
  30478. +#else
  30479. +#define max_cipher_overhead(_inode) 0
  30480. +#endif
  30481. +
  30482. +static int deflate_overhead(struct inode *inode)
  30483. +{
  30484. + return (inode_compression_plugin(inode)->
  30485. + checksum ? DC_CHECKSUM_SIZE : 0);
  30486. +}
  30487. +
  30488. +static unsigned deflate_overrun(struct inode * inode, int ilen)
  30489. +{
  30490. + return coa_overrun(inode_compression_plugin(inode), ilen);
  30491. +}
  30492. +
  30493. +static bool is_all_zero(char const* mem, size_t size)
  30494. +{
  30495. + while (size-- > 0)
  30496. + if (*mem++)
  30497. + return false;
  30498. + return true;
  30499. +}
  30500. +
  30501. +static inline bool should_punch_hole(struct tfm_cluster *tc)
  30502. +{
  30503. + if (0 &&
  30504. + !reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_PUNCH_HOLES)
  30505. + && is_all_zero(tfm_stream_data(tc, INPUT_STREAM), tc->lsize)) {
  30506. +
  30507. + tc->hole = 1;
  30508. + return true;
  30509. + }
  30510. + return false;
  30511. +}
  30512. +
  30513. +/* Estimating compressibility of a logical cluster by various
  30514. + policies represented by compression mode plugin.
  30515. + If this returns false, then compressor won't be called for
  30516. + the cluster of index @index.
  30517. +*/
  30518. +static int should_compress(struct tfm_cluster *tc, cloff_t index,
  30519. + struct inode *inode)
  30520. +{
  30521. + compression_plugin *cplug = inode_compression_plugin(inode);
  30522. + compression_mode_plugin *mplug = inode_compression_mode_plugin(inode);
  30523. +
  30524. + assert("edward-1321", tc->len != 0);
  30525. + assert("edward-1322", cplug != NULL);
  30526. + assert("edward-1323", mplug != NULL);
  30527. +
  30528. + if (should_punch_hole(tc))
  30529. + /*
  30530. + * we are about to punch a hole,
  30531. + * so don't compress data
  30532. + */
  30533. + return 0;
  30534. + return /* estimate by size */
  30535. + (cplug->min_size_deflate ?
  30536. + tc->len >= cplug->min_size_deflate() :
  30537. + 1) &&
  30538. + /* estimate by compression mode plugin */
  30539. + (mplug->should_deflate ?
  30540. + mplug->should_deflate(inode, index) :
  30541. + 1);
  30542. +}
  30543. +
  30544. +/* Evaluating results of compression transform.
  30545. + Returns true, if we need to accept this results */
  30546. +static int save_compressed(int size_before, int size_after, struct inode *inode)
  30547. +{
  30548. + return (size_after + deflate_overhead(inode) +
  30549. + max_cipher_overhead(inode) < size_before);
  30550. +}
  30551. +
  30552. +/* Guess result of the evaluation above */
  30553. +static int need_inflate(struct cluster_handle * ch, struct inode * inode,
  30554. + int encrypted /* is cluster encrypted */ )
  30555. +{
  30556. + struct tfm_cluster * tc = &ch->tc;
  30557. +
  30558. + assert("edward-142", tc != 0);
  30559. + assert("edward-143", inode != NULL);
  30560. +
  30561. + return tc->len <
  30562. + (encrypted ?
  30563. + inode_scaled_offset(inode, tc->lsize) :
  30564. + tc->lsize);
  30565. +}
  30566. +
  30567. +/* If results of compression were accepted, then we add
  30568. + a checksum to catch possible disk cluster corruption.
  30569. + The following is a format of the data stored in disk clusters:
  30570. +
  30571. + data This is (transformed) logical cluster.
  30572. + cipher_overhead This is created by ->align() method
  30573. + of cipher plugin. May be absent.
  30574. + checksum (4) This is created by ->checksum method
  30575. + of compression plugin to check
  30576. + integrity. May be absent.
  30577. +
  30578. + Crypto overhead format:
  30579. +
  30580. + data
  30581. + control_byte (1) contains aligned overhead size:
  30582. + 1 <= overhead <= cipher_blksize
  30583. +*/
  30584. +/* Append a checksum at the end of a transformed stream */
  30585. +static void dc_set_checksum(compression_plugin * cplug, struct tfm_cluster * tc)
  30586. +{
  30587. + __u32 checksum;
  30588. +
  30589. + assert("edward-1309", tc != NULL);
  30590. + assert("edward-1310", tc->len > 0);
  30591. + assert("edward-1311", cplug->checksum != NULL);
  30592. +
  30593. + checksum = cplug->checksum(tfm_stream_data(tc, OUTPUT_STREAM), tc->len);
  30594. + put_unaligned(cpu_to_le32(checksum),
  30595. + (d32 *)(tfm_stream_data(tc, OUTPUT_STREAM) + tc->len));
  30596. + tc->len += (int)DC_CHECKSUM_SIZE;
  30597. +}
  30598. +
  30599. +/* Check a disk cluster checksum.
  30600. + Returns 0 if checksum is correct, otherwise returns 1 */
  30601. +static int dc_check_checksum(compression_plugin * cplug, struct tfm_cluster * tc)
  30602. +{
  30603. + assert("edward-1312", tc != NULL);
  30604. + assert("edward-1313", tc->len > (int)DC_CHECKSUM_SIZE);
  30605. + assert("edward-1314", cplug->checksum != NULL);
  30606. +
  30607. + if (cplug->checksum(tfm_stream_data(tc, INPUT_STREAM),
  30608. + tc->len - (int)DC_CHECKSUM_SIZE) !=
  30609. + le32_to_cpu(get_unaligned((d32 *)
  30610. + (tfm_stream_data(tc, INPUT_STREAM)
  30611. + + tc->len - (int)DC_CHECKSUM_SIZE)))) {
  30612. + warning("edward-156",
  30613. + "Bad disk cluster checksum %d, (should be %d) Fsck?\n",
  30614. + (int)le32_to_cpu
  30615. + (get_unaligned((d32 *)
  30616. + (tfm_stream_data(tc, INPUT_STREAM) +
  30617. + tc->len - (int)DC_CHECKSUM_SIZE))),
  30618. + (int)cplug->checksum
  30619. + (tfm_stream_data(tc, INPUT_STREAM),
  30620. + tc->len - (int)DC_CHECKSUM_SIZE));
  30621. + return 1;
  30622. + }
  30623. + tc->len -= (int)DC_CHECKSUM_SIZE;
  30624. + return 0;
  30625. +}
  30626. +
  30627. +/* get input/output stream for some transform action */
  30628. +int grab_tfm_stream(struct inode * inode, struct tfm_cluster * tc,
  30629. + tfm_stream_id id)
  30630. +{
  30631. + size_t size = inode_scaled_cluster_size(inode);
  30632. +
  30633. + assert("edward-901", tc != NULL);
  30634. + assert("edward-1027", inode_compression_plugin(inode) != NULL);
  30635. +
  30636. + if (cluster_get_tfm_act(tc) == TFMA_WRITE)
  30637. + size += deflate_overrun(inode, inode_cluster_size(inode));
  30638. +
  30639. + if (!get_tfm_stream(tc, id) && id == INPUT_STREAM)
  30640. + alternate_streams(tc);
  30641. + if (!get_tfm_stream(tc, id))
  30642. + return alloc_tfm_stream(tc, size, id);
  30643. +
  30644. + assert("edward-902", tfm_stream_is_set(tc, id));
  30645. +
  30646. + if (tfm_stream_size(tc, id) < size)
  30647. + return realloc_tfm_stream(tc, size, id);
  30648. + return 0;
  30649. +}
  30650. +
  30651. +/* Common deflate manager */
  30652. +int reiser4_deflate_cluster(struct cluster_handle * clust, struct inode * inode)
  30653. +{
  30654. + int result = 0;
  30655. + int compressed = 0;
  30656. + int encrypted = 0;
  30657. + struct tfm_cluster * tc = &clust->tc;
  30658. + compression_plugin * coplug;
  30659. +
  30660. + assert("edward-401", inode != NULL);
  30661. + assert("edward-903", tfm_stream_is_set(tc, INPUT_STREAM));
  30662. + assert("edward-1348", cluster_get_tfm_act(tc) == TFMA_WRITE);
  30663. + assert("edward-498", !tfm_cluster_is_uptodate(tc));
  30664. +
  30665. + coplug = inode_compression_plugin(inode);
  30666. + if (should_compress(tc, clust->index, inode)) {
  30667. + /* try to compress, discard bad results */
  30668. + size_t dst_len;
  30669. + compression_mode_plugin * mplug =
  30670. + inode_compression_mode_plugin(inode);
  30671. + assert("edward-602", coplug != NULL);
  30672. + assert("edward-1423", coplug->compress != NULL);
  30673. +
  30674. + result = grab_coa(tc, coplug);
  30675. + if (result)
  30676. + /*
  30677. + * can not allocate memory to perform
  30678. + * compression, leave data uncompressed
  30679. + */
  30680. + goto cipher;
  30681. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  30682. + if (result) {
  30683. + warning("edward-1425",
  30684. + "alloc stream failed with ret=%d, skipped compression",
  30685. + result);
  30686. + goto cipher;
  30687. + }
  30688. + dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
  30689. + coplug->compress(get_coa(tc, coplug->h.id, tc->act),
  30690. + tfm_input_data(clust), tc->len,
  30691. + tfm_output_data(clust), &dst_len);
  30692. + /* make sure we didn't overwrite extra bytes */
  30693. + assert("edward-603",
  30694. + dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
  30695. +
  30696. + /* evaluate results of compression transform */
  30697. + if (save_compressed(tc->len, dst_len, inode)) {
  30698. + /* good result, accept */
  30699. + tc->len = dst_len;
  30700. + if (mplug->accept_hook != NULL) {
  30701. + result = mplug->accept_hook(inode, clust->index);
  30702. + if (result)
  30703. + warning("edward-1426",
  30704. + "accept_hook failed with ret=%d",
  30705. + result);
  30706. + }
  30707. + compressed = 1;
  30708. + }
  30709. + else {
  30710. + /* bad result, discard */
  30711. +#if 0
  30712. + if (cluster_is_complete(clust, inode))
  30713. + warning("edward-1496",
  30714. + "incompressible cluster %lu (inode %llu)",
  30715. + clust->index,
  30716. + (unsigned long long)get_inode_oid(inode));
  30717. +#endif
  30718. + if (mplug->discard_hook != NULL &&
  30719. + cluster_is_complete(clust, inode)) {
  30720. + result = mplug->discard_hook(inode,
  30721. + clust->index);
  30722. + if (result)
  30723. + warning("edward-1427",
  30724. + "discard_hook failed with ret=%d",
  30725. + result);
  30726. + }
  30727. + }
  30728. + }
  30729. + cipher:
  30730. +#if REISER4_CRYPTO
  30731. + if (need_cipher(inode)) {
  30732. + cipher_plugin * ciplug;
  30733. + struct blkcipher_desc desc;
  30734. + struct scatterlist src;
  30735. + struct scatterlist dst;
  30736. +
  30737. + ciplug = inode_cipher_plugin(inode);
  30738. + desc.tfm = info_get_cipher(inode_crypto_info(inode));
  30739. + desc.flags = 0;
  30740. + if (compressed)
  30741. + alternate_streams(tc);
  30742. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  30743. + if (result)
  30744. + return result;
  30745. +
  30746. + align_or_cut_overhead(inode, clust, WRITE_OP);
  30747. + sg_init_one(&src, tfm_input_data(clust), tc->len);
  30748. + sg_init_one(&dst, tfm_output_data(clust), tc->len);
  30749. +
  30750. + result = crypto_blkcipher_encrypt(&desc, &dst, &src, tc->len);
  30751. + if (result) {
  30752. + warning("edward-1405",
  30753. + "encryption failed flags=%x\n", desc.flags);
  30754. + return result;
  30755. + }
  30756. + encrypted = 1;
  30757. + }
  30758. +#endif
  30759. + if (compressed && coplug->checksum != NULL)
  30760. + dc_set_checksum(coplug, tc);
  30761. + if (!compressed && !encrypted)
  30762. + alternate_streams(tc);
  30763. + return result;
  30764. +}
  30765. +
  30766. +/* Common inflate manager. */
  30767. +int reiser4_inflate_cluster(struct cluster_handle * clust, struct inode * inode)
  30768. +{
  30769. + int result = 0;
  30770. + int transformed = 0;
  30771. + struct tfm_cluster * tc = &clust->tc;
  30772. + compression_plugin * coplug;
  30773. +
  30774. + assert("edward-905", inode != NULL);
  30775. + assert("edward-1178", clust->dstat == PREP_DISK_CLUSTER);
  30776. + assert("edward-906", tfm_stream_is_set(&clust->tc, INPUT_STREAM));
  30777. + assert("edward-1349", tc->act == TFMA_READ);
  30778. + assert("edward-907", !tfm_cluster_is_uptodate(tc));
  30779. +
  30780. + /* Handle a checksum (if any) */
  30781. + coplug = inode_compression_plugin(inode);
  30782. + if (need_inflate(clust, inode, need_cipher(inode)) &&
  30783. + coplug->checksum != NULL) {
  30784. + result = dc_check_checksum(coplug, tc);
  30785. + if (unlikely(result)) {
  30786. + warning("edward-1460",
  30787. + "Inode %llu: disk cluster %lu looks corrupted",
  30788. + (unsigned long long)get_inode_oid(inode),
  30789. + clust->index);
  30790. + return RETERR(-EIO);
  30791. + }
  30792. + }
  30793. +#if REISER4_CRYPTO
  30794. + if (need_cipher(inode)) {
  30795. + cipher_plugin * ciplug;
  30796. + struct blkcipher_desc desc;
  30797. + struct scatterlist src;
  30798. + struct scatterlist dst;
  30799. +
  30800. + ciplug = inode_cipher_plugin(inode);
  30801. + desc.tfm = info_get_cipher(inode_crypto_info(inode));
  30802. + desc.flags = 0;
  30803. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  30804. + if (result)
  30805. + return result;
  30806. + assert("edward-909", tfm_cluster_is_set(tc));
  30807. +
  30808. + sg_init_one(&src, tfm_input_data(clust), tc->len);
  30809. + sg_init_one(&dst, tfm_output_data(clust), tc->len);
  30810. +
  30811. + result = crypto_blkcipher_decrypt(&desc, &dst, &src, tc->len);
  30812. + if (result) {
  30813. + warning("edward-1600", "decrypt failed flags=%x\n",
  30814. + desc.flags);
  30815. + return result;
  30816. + }
  30817. + align_or_cut_overhead(inode, clust, READ_OP);
  30818. + transformed = 1;
  30819. + }
  30820. +#endif
  30821. + if (need_inflate(clust, inode, 0)) {
  30822. + size_t dst_len = inode_cluster_size(inode);
  30823. + if(transformed)
  30824. + alternate_streams(tc);
  30825. +
  30826. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  30827. + if (result)
  30828. + return result;
  30829. + assert("edward-1305", coplug->decompress != NULL);
  30830. + assert("edward-910", tfm_cluster_is_set(tc));
  30831. +
  30832. + coplug->decompress(get_coa(tc, coplug->h.id, tc->act),
  30833. + tfm_input_data(clust), tc->len,
  30834. + tfm_output_data(clust), &dst_len);
  30835. + /* check length */
  30836. + tc->len = dst_len;
  30837. + assert("edward-157", dst_len == tc->lsize);
  30838. + transformed = 1;
  30839. + }
  30840. + if (!transformed)
  30841. + alternate_streams(tc);
  30842. + return result;
  30843. +}
  30844. +
  30845. +/* This is implementation of readpage method of struct
  30846. + address_space_operations for cryptcompress plugin. */
  30847. +int readpage_cryptcompress(struct file *file, struct page *page)
  30848. +{
  30849. + reiser4_context *ctx;
  30850. + struct cluster_handle clust;
  30851. + item_plugin *iplug;
  30852. + int result;
  30853. +
  30854. + assert("edward-88", PageLocked(page));
  30855. + assert("vs-976", !PageUptodate(page));
  30856. + assert("edward-89", page->mapping && page->mapping->host);
  30857. +
  30858. + ctx = reiser4_init_context(page->mapping->host->i_sb);
  30859. + if (IS_ERR(ctx)) {
  30860. + unlock_page(page);
  30861. + return PTR_ERR(ctx);
  30862. + }
  30863. + assert("edward-113",
  30864. + ergo(file != NULL,
  30865. + page->mapping == file_inode(file)->i_mapping));
  30866. +
  30867. + if (PageUptodate(page)) {
  30868. + warning("edward-1338", "page is already uptodate\n");
  30869. + unlock_page(page);
  30870. + reiser4_exit_context(ctx);
  30871. + return 0;
  30872. + }
  30873. + cluster_init_read(&clust, NULL);
  30874. + clust.file = file;
  30875. + iplug = item_plugin_by_id(CTAIL_ID);
  30876. + if (!iplug->s.file.readpage) {
  30877. + unlock_page(page);
  30878. + put_cluster_handle(&clust);
  30879. + reiser4_exit_context(ctx);
  30880. + return -EINVAL;
  30881. + }
  30882. + result = iplug->s.file.readpage(&clust, page);
  30883. +
  30884. + put_cluster_handle(&clust);
  30885. + reiser4_txn_restart(ctx);
  30886. + reiser4_exit_context(ctx);
  30887. + return result;
  30888. +}
  30889. +
  30890. +/* number of pages to check in */
  30891. +static int get_new_nrpages(struct cluster_handle * clust)
  30892. +{
  30893. + switch (clust->op) {
  30894. + case LC_APPOV:
  30895. + case LC_EXPAND:
  30896. + return clust->nr_pages;
  30897. + case LC_SHRINK:
  30898. + assert("edward-1179", clust->win != NULL);
  30899. + return size_in_pages(clust->win->off + clust->win->count);
  30900. + default:
  30901. + impossible("edward-1180", "bad page cluster option");
  30902. + return 0;
  30903. + }
  30904. +}
  30905. +
  30906. +static void set_cluster_pages_dirty(struct cluster_handle * clust,
  30907. + struct inode * inode)
  30908. +{
  30909. + int i;
  30910. + struct page *pg;
  30911. + int nrpages = get_new_nrpages(clust);
  30912. +
  30913. + for (i = 0; i < nrpages; i++) {
  30914. +
  30915. + pg = clust->pages[i];
  30916. + assert("edward-968", pg != NULL);
  30917. + lock_page(pg);
  30918. + assert("edward-1065", PageUptodate(pg));
  30919. + set_page_dirty_notag(pg);
  30920. + unlock_page(pg);
  30921. + mark_page_accessed(pg);
  30922. + }
  30923. +}
  30924. +
  30925. +/* Grab a page cluster for read/write operations.
  30926. + Attach a jnode for write operations (when preparing for modifications, which
  30927. + are supposed to be committed).
  30928. +
  30929. + We allocate only one jnode per page cluster; this jnode is binded to the
  30930. + first page of this cluster, so we have an extra-reference that will be put
  30931. + as soon as jnode is evicted from memory), other references will be cleaned
  30932. + up in flush time (assume that check in page cluster was successful).
  30933. +*/
  30934. +int grab_page_cluster(struct inode * inode,
  30935. + struct cluster_handle * clust, rw_op rw)
  30936. +{
  30937. + int i;
  30938. + int result = 0;
  30939. + jnode *node = NULL;
  30940. +
  30941. + assert("edward-182", clust != NULL);
  30942. + assert("edward-183", clust->pages != NULL);
  30943. + assert("edward-1466", clust->node == NULL);
  30944. + assert("edward-1428", inode != NULL);
  30945. + assert("edward-1429", inode->i_mapping != NULL);
  30946. + assert("edward-184", clust->nr_pages <= cluster_nrpages(inode));
  30947. +
  30948. + if (clust->nr_pages == 0)
  30949. + return 0;
  30950. +
  30951. + for (i = 0; i < clust->nr_pages; i++) {
  30952. +
  30953. + assert("edward-1044", clust->pages[i] == NULL);
  30954. +
  30955. + clust->pages[i] =
  30956. + find_or_create_page(inode->i_mapping,
  30957. + clust_to_pg(clust->index, inode) + i,
  30958. + reiser4_ctx_gfp_mask_get());
  30959. + if (!clust->pages[i]) {
  30960. + result = RETERR(-ENOMEM);
  30961. + break;
  30962. + }
  30963. + if (i == 0 && rw == WRITE_OP) {
  30964. + node = jnode_of_page(clust->pages[i]);
  30965. + if (IS_ERR(node)) {
  30966. + result = PTR_ERR(node);
  30967. + unlock_page(clust->pages[i]);
  30968. + break;
  30969. + }
  30970. + JF_SET(node, JNODE_CLUSTER_PAGE);
  30971. + assert("edward-920", jprivate(clust->pages[0]));
  30972. + }
  30973. + INODE_PGCOUNT_INC(inode);
  30974. + unlock_page(clust->pages[i]);
  30975. + }
  30976. + if (unlikely(result)) {
  30977. + while (i) {
  30978. + put_cluster_page(clust->pages[--i]);
  30979. + INODE_PGCOUNT_DEC(inode);
  30980. + }
  30981. + if (node && !IS_ERR(node))
  30982. + jput(node);
  30983. + return result;
  30984. + }
  30985. + clust->node = node;
  30986. + return 0;
  30987. +}
  30988. +
  30989. +static void truncate_page_cluster_range(struct inode * inode,
  30990. + struct page ** pages,
  30991. + cloff_t index,
  30992. + int from, int count,
  30993. + int even_cows)
  30994. +{
  30995. + assert("edward-1467", count > 0);
  30996. + reiser4_invalidate_pages(inode->i_mapping,
  30997. + clust_to_pg(index, inode) + from,
  30998. + count, even_cows);
  30999. +}
  31000. +
  31001. +/* Put @count pages starting from @from offset */
  31002. +void __put_page_cluster(int from, int count,
  31003. + struct page ** pages, struct inode * inode)
  31004. +{
  31005. + int i;
  31006. + assert("edward-1468", pages != NULL);
  31007. + assert("edward-1469", inode != NULL);
  31008. + assert("edward-1470", from >= 0 && count >= 0);
  31009. +
  31010. + for (i = 0; i < count; i++) {
  31011. + assert("edward-1471", pages[from + i] != NULL);
  31012. + assert("edward-1472",
  31013. + pages[from + i]->index == pages[from]->index + i);
  31014. +
  31015. + put_cluster_page(pages[from + i]);
  31016. + INODE_PGCOUNT_DEC(inode);
  31017. + }
  31018. +}
  31019. +
  31020. +/*
  31021. + * This is dual to grab_page_cluster,
  31022. + * however if @rw == WRITE_OP, then we call this function
  31023. + * only if something is failed before checkin page cluster.
  31024. + */
  31025. +void put_page_cluster(struct cluster_handle * clust,
  31026. + struct inode * inode, rw_op rw)
  31027. +{
  31028. + assert("edward-445", clust != NULL);
  31029. + assert("edward-922", clust->pages != NULL);
  31030. + assert("edward-446",
  31031. + ergo(clust->nr_pages != 0, clust->pages[0] != NULL));
  31032. +
  31033. + __put_page_cluster(0, clust->nr_pages, clust->pages, inode);
  31034. + if (rw == WRITE_OP) {
  31035. + if (unlikely(clust->node)) {
  31036. + assert("edward-447",
  31037. + clust->node == jprivate(clust->pages[0]));
  31038. + jput(clust->node);
  31039. + clust->node = NULL;
  31040. + }
  31041. + }
  31042. +}
  31043. +
  31044. +#if REISER4_DEBUG
  31045. +int cryptcompress_inode_ok(struct inode *inode)
  31046. +{
  31047. + if (!(reiser4_inode_data(inode)->plugin_mask & (1 << PSET_FILE)))
  31048. + return 0;
  31049. + if (!cluster_shift_ok(inode_cluster_shift(inode)))
  31050. + return 0;
  31051. + return 1;
  31052. +}
  31053. +
  31054. +static int window_ok(struct reiser4_slide * win, struct inode *inode)
  31055. +{
  31056. + assert("edward-1115", win != NULL);
  31057. + assert("edward-1116", ergo(win->delta, win->stat == HOLE_WINDOW));
  31058. +
  31059. + return (win->off != inode_cluster_size(inode)) &&
  31060. + (win->off + win->count + win->delta <= inode_cluster_size(inode));
  31061. +}
  31062. +
  31063. +static int cluster_ok(struct cluster_handle * clust, struct inode *inode)
  31064. +{
  31065. + assert("edward-279", clust != NULL);
  31066. +
  31067. + if (!clust->pages)
  31068. + return 0;
  31069. + return (clust->win ? window_ok(clust->win, inode) : 1);
  31070. +}
  31071. +#if 0
  31072. +static int pages_truncate_ok(struct inode *inode, pgoff_t start)
  31073. +{
  31074. + int found;
  31075. + struct page * page;
  31076. +
  31077. +
  31078. + found = find_get_pages(inode->i_mapping, &start, 1, &page);
  31079. + if (found)
  31080. + put_cluster_page(page);
  31081. + return !found;
  31082. +}
  31083. +#else
  31084. +#define pages_truncate_ok(inode, start) 1
  31085. +#endif
  31086. +
  31087. +static int jnode_truncate_ok(struct inode *inode, cloff_t index)
  31088. +{
  31089. + jnode *node;
  31090. + node = jlookup(current_tree, get_inode_oid(inode),
  31091. + clust_to_pg(index, inode));
  31092. + if (likely(!node))
  31093. + return 1;
  31094. + jput(node);
  31095. + return 0;
  31096. +}
  31097. +#endif
  31098. +
  31099. +/* guess next window stat */
  31100. +static inline window_stat next_window_stat(struct reiser4_slide * win)
  31101. +{
  31102. + assert("edward-1130", win != NULL);
  31103. + return ((win->stat == HOLE_WINDOW && win->delta == 0) ?
  31104. + HOLE_WINDOW : DATA_WINDOW);
  31105. +}
  31106. +
  31107. +/* guess and set next cluster index and window params */
  31108. +static void move_update_window(struct inode * inode,
  31109. + struct cluster_handle * clust,
  31110. + loff_t file_off, loff_t to_file)
  31111. +{
  31112. + struct reiser4_slide * win;
  31113. +
  31114. + assert("edward-185", clust != NULL);
  31115. + assert("edward-438", clust->pages != NULL);
  31116. + assert("edward-281", cluster_ok(clust, inode));
  31117. +
  31118. + win = clust->win;
  31119. + if (!win)
  31120. + return;
  31121. +
  31122. + switch (win->stat) {
  31123. + case DATA_WINDOW:
  31124. + /* increment */
  31125. + clust->index++;
  31126. + win->stat = DATA_WINDOW;
  31127. + win->off = 0;
  31128. + win->count = min((loff_t)inode_cluster_size(inode), to_file);
  31129. + break;
  31130. + case HOLE_WINDOW:
  31131. + switch (next_window_stat(win)) {
  31132. + case HOLE_WINDOW:
  31133. + /* skip */
  31134. + clust->index = off_to_clust(file_off, inode);
  31135. + win->stat = HOLE_WINDOW;
  31136. + win->off = 0;
  31137. + win->count = off_to_cloff(file_off, inode);
  31138. + win->delta = min((loff_t)(inode_cluster_size(inode) -
  31139. + win->count), to_file);
  31140. + break;
  31141. + case DATA_WINDOW:
  31142. + /* stay */
  31143. + win->stat = DATA_WINDOW;
  31144. + /* off+count+delta=inv */
  31145. + win->off = win->off + win->count;
  31146. + win->count = win->delta;
  31147. + win->delta = 0;
  31148. + break;
  31149. + default:
  31150. + impossible("edward-282", "wrong next window state");
  31151. + }
  31152. + break;
  31153. + default:
  31154. + impossible("edward-283", "wrong current window state");
  31155. + }
  31156. + assert("edward-1068", cluster_ok(clust, inode));
  31157. +}
  31158. +
  31159. +static int update_sd_cryptcompress(struct inode *inode)
  31160. +{
  31161. + int result = 0;
  31162. +
  31163. + assert("edward-978", reiser4_schedulable());
  31164. +
  31165. + result = reiser4_grab_space_force(/* one for stat data update */
  31166. + estimate_update_common(inode),
  31167. + BA_CAN_COMMIT);
  31168. + if (result)
  31169. + return result;
  31170. + if (!IS_NOCMTIME(inode))
  31171. + inode->i_ctime = inode->i_mtime = current_time(inode);
  31172. +
  31173. + result = reiser4_update_sd(inode);
  31174. +
  31175. + if (unlikely(result != 0))
  31176. + warning("edward-1573",
  31177. + "Can not update stat-data: %i. FSCK?",
  31178. + result);
  31179. + return result;
  31180. +}
  31181. +
  31182. +static void uncapture_cluster_jnode(jnode * node)
  31183. +{
  31184. + txn_atom *atom;
  31185. +
  31186. + assert_spin_locked(&(node->guard));
  31187. +
  31188. + atom = jnode_get_atom(node);
  31189. + if (atom == NULL) {
  31190. + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
  31191. + spin_unlock_jnode(node);
  31192. + return;
  31193. + }
  31194. + reiser4_uncapture_block(node);
  31195. + spin_unlock_atom(atom);
  31196. + jput(node);
  31197. +}
  31198. +
  31199. +static void put_found_pages(struct page **pages, int nr)
  31200. +{
  31201. + int i;
  31202. + for (i = 0; i < nr; i++) {
  31203. + assert("edward-1045", pages[i] != NULL);
  31204. + put_cluster_page(pages[i]);
  31205. + }
  31206. +}
  31207. +
  31208. +/* Lifecycle of a logical cluster in the system.
  31209. + *
  31210. + *
  31211. + * Logical cluster of a cryptcompress file is represented in the system by
  31212. + * . page cluster (in memory, primary cache, contains plain text);
  31213. + * . disk cluster (in memory, secondary cache, contains transformed text).
  31214. + * Primary cache is to reduce number of transform operations (compression,
  31215. + * encryption), i.e. to implement transform-caching strategy.
  31216. + * Secondary cache is to reduce number of I/O operations, i.e. for usual
  31217. + * write-caching strategy. Page cluster is a set of pages, i.e. mapping of
  31218. + * a logical cluster to the primary cache. Disk cluster is a set of items
  31219. + * of the same type defined by some reiser4 item plugin id.
  31220. + *
  31221. + * 1. Performing modifications
  31222. + *
  31223. + * Every modification of a cryptcompress file is considered as a set of
  31224. + * operations performed on file's logical clusters. Every such "atomic"
  31225. + * modification is truncate, append and(or) overwrite some bytes of a
  31226. + * logical cluster performed in the primary cache with the following
  31227. + * synchronization with the secondary cache (in flush time). Disk clusters,
  31228. + * which live in the secondary cache, are supposed to be synchronized with
  31229. + * disk. The mechanism of synchronization of primary and secondary caches
  31230. + * includes so-called checkin/checkout technique described below.
  31231. + *
  31232. + * 2. Submitting modifications
  31233. + *
  31234. + * Each page cluster has associated jnode (a special in-memory header to
  31235. + * keep a track of transactions in reiser4), which is attached to its first
  31236. + * page when grabbing page cluster for modifications (see grab_page_cluster).
  31237. + * Submitting modifications (see checkin_logical_cluster) is going per logical
  31238. + * cluster and includes:
  31239. + * . checkin_cluster_size;
  31240. + * . checkin_page_cluster.
  31241. + * checkin_cluster_size() is resolved to file size update (which completely
  31242. + * defines new size of logical cluster (number of file's bytes in a logical
  31243. + * cluster).
  31244. + * checkin_page_cluster() captures jnode of a page cluster and installs
  31245. + * jnode's dirty flag (if needed) to indicate that modifications are
  31246. + * successfully checked in.
  31247. + *
  31248. + * 3. Checking out modifications
  31249. + *
  31250. + * Is going per logical cluster in flush time (see checkout_logical_cluster).
  31251. + * This is the time of synchronizing primary and secondary caches.
  31252. + * checkout_logical_cluster() includes:
  31253. + * . checkout_page_cluster (retrieving checked in pages).
  31254. + * . uncapture jnode (including clear dirty flag and unlock)
  31255. + *
  31256. + * 4. Committing modifications
  31257. + *
  31258. + * Proceeding a synchronization of primary and secondary caches. When checking
  31259. + * out page cluster (the phase above) pages are locked/flushed/unlocked
  31260. + * one-by-one in ascending order of their indexes to contiguous stream, which
  31261. + * is supposed to be transformed (compressed, encrypted), chopped up into items
  31262. + * and committed to disk as a disk cluster.
  31263. + *
  31264. + * 5. Managing page references
  31265. + *
  31266. + * Every checked in page have a special additional "control" reference,
  31267. + * which is dropped at checkout. We need this to avoid unexpected evicting
  31268. + * pages from memory before checkout. Control references are managed so
  31269. + * they are not accumulated with every checkin:
  31270. + *
  31271. + * 0
  31272. + * checkin -> 1
  31273. + * 0 -> checkout
  31274. + * checkin -> 1
  31275. + * checkin -> 1
  31276. + * checkin -> 1
  31277. + * 0 -> checkout
  31278. + * ...
  31279. + *
  31280. + * Every page cluster has its own unique "cluster lock". Update/drop
  31281. + * references are serialized via this lock. Number of checked in cluster
  31282. + * pages is calculated by i_size under cluster lock. File size is updated
  31283. + * at every checkin action also under cluster lock (except cases of
  31284. + * appending/truncating fake logical clusters).
  31285. + *
  31286. + * Proof of correctness:
  31287. + *
  31288. + * Since we update file size under cluster lock, in the case of non-fake
  31289. + * logical cluster with its lock held we do have expected number of checked
  31290. + * in pages. On the other hand, append/truncate of fake logical clusters
  31291. + * doesn't change number of checked in pages of any cluster.
  31292. + *
  31293. + * NOTE-EDWARD: As cluster lock we use guard (spinlock_t) of its jnode.
  31294. + * Currently, I don't see any reason to create a special lock for those
  31295. + * needs.
  31296. + */
  31297. +
  31298. +static inline void lock_cluster(jnode * node)
  31299. +{
  31300. + spin_lock_jnode(node);
  31301. +}
  31302. +
  31303. +static inline void unlock_cluster(jnode * node)
  31304. +{
  31305. + spin_unlock_jnode(node);
  31306. +}
  31307. +
  31308. +static inline void unlock_cluster_uncapture(jnode * node)
  31309. +{
  31310. + uncapture_cluster_jnode(node);
  31311. +}
  31312. +
  31313. +/* Set new file size by window. Cluster lock is required. */
  31314. +static void checkin_file_size(struct cluster_handle * clust,
  31315. + struct inode * inode)
  31316. +{
  31317. + loff_t new_size;
  31318. + struct reiser4_slide * win;
  31319. +
  31320. + assert("edward-1181", clust != NULL);
  31321. + assert("edward-1182", inode != NULL);
  31322. + assert("edward-1473", clust->pages != NULL);
  31323. + assert("edward-1474", clust->pages[0] != NULL);
  31324. + assert("edward-1475", jprivate(clust->pages[0]) != NULL);
  31325. + assert_spin_locked(&(jprivate(clust->pages[0])->guard));
  31326. +
  31327. +
  31328. + win = clust->win;
  31329. + assert("edward-1183", win != NULL);
  31330. +
  31331. + new_size = clust_to_off(clust->index, inode) + win->off;
  31332. +
  31333. + switch (clust->op) {
  31334. + case LC_APPOV:
  31335. + case LC_EXPAND:
  31336. + if (new_size + win->count <= i_size_read(inode))
  31337. + /* overwrite only */
  31338. + return;
  31339. + new_size += win->count;
  31340. + break;
  31341. + case LC_SHRINK:
  31342. + break;
  31343. + default:
  31344. + impossible("edward-1184", "bad page cluster option");
  31345. + break;
  31346. + }
  31347. + inode_check_scale_nolock(inode, i_size_read(inode), new_size);
  31348. + i_size_write(inode, new_size);
  31349. + return;
  31350. +}
  31351. +
  31352. +static inline void checkin_cluster_size(struct cluster_handle * clust,
  31353. + struct inode * inode)
  31354. +{
  31355. + if (clust->win)
  31356. + checkin_file_size(clust, inode);
  31357. +}
  31358. +
  31359. +static int checkin_page_cluster(struct cluster_handle * clust,
  31360. + struct inode * inode)
  31361. +{
  31362. + int result;
  31363. + jnode * node;
  31364. + int old_nrpages = clust->old_nrpages;
  31365. + int new_nrpages = get_new_nrpages(clust);
  31366. +
  31367. + node = clust->node;
  31368. +
  31369. + assert("edward-221", node != NULL);
  31370. + assert("edward-971", clust->reserved == 1);
  31371. + assert("edward-1263",
  31372. + clust->reserved_prepped == estimate_update_cluster(inode));
  31373. + assert("edward-1264", clust->reserved_unprepped == 0);
  31374. +
  31375. + if (JF_ISSET(node, JNODE_DIRTY)) {
  31376. + /*
  31377. + * page cluster was checked in, but not yet
  31378. + * checked out, so release related resources
  31379. + */
  31380. + free_reserved4cluster(inode, clust,
  31381. + estimate_update_cluster(inode));
  31382. + __put_page_cluster(0, clust->old_nrpages,
  31383. + clust->pages, inode);
  31384. + } else {
  31385. + result = capture_cluster_jnode(node);
  31386. + if (unlikely(result)) {
  31387. + unlock_cluster(node);
  31388. + return result;
  31389. + }
  31390. + jnode_make_dirty_locked(node);
  31391. + clust->reserved = 0;
  31392. + }
  31393. + unlock_cluster(node);
  31394. +
  31395. + if (new_nrpages < old_nrpages) {
  31396. + /* truncate >= 1 complete pages */
  31397. + __put_page_cluster(new_nrpages,
  31398. + old_nrpages - new_nrpages,
  31399. + clust->pages, inode);
  31400. + truncate_page_cluster_range(inode,
  31401. + clust->pages, clust->index,
  31402. + new_nrpages,
  31403. + old_nrpages - new_nrpages,
  31404. + 0);
  31405. + }
  31406. +#if REISER4_DEBUG
  31407. + clust->reserved_prepped -= estimate_update_cluster(inode);
  31408. +#endif
  31409. + return 0;
  31410. +}
  31411. +
  31412. +/* Submit modifications of a logical cluster */
  31413. +static int checkin_logical_cluster(struct cluster_handle * clust,
  31414. + struct inode *inode)
  31415. +{
  31416. + int result = 0;
  31417. + jnode * node;
  31418. +
  31419. + node = clust->node;
  31420. +
  31421. + assert("edward-1035", node != NULL);
  31422. + assert("edward-1029", clust != NULL);
  31423. + assert("edward-1030", clust->reserved == 1);
  31424. + assert("edward-1031", clust->nr_pages != 0);
  31425. + assert("edward-1032", clust->pages != NULL);
  31426. + assert("edward-1033", clust->pages[0] != NULL);
  31427. + assert("edward-1446", jnode_is_cluster_page(node));
  31428. + assert("edward-1476", node == jprivate(clust->pages[0]));
  31429. +
  31430. + lock_cluster(node);
  31431. + checkin_cluster_size(clust, inode);
  31432. + /*
  31433. + * this will unlock the cluster
  31434. + */
  31435. + result = checkin_page_cluster(clust, inode);
  31436. + jput(node);
  31437. + clust->node = NULL;
  31438. + return result;
  31439. +}
  31440. +
  31441. +/*
  31442. + * Retrieve size of logical cluster that was checked in at
  31443. + * the latest modifying session (cluster lock is required)
  31444. + */
  31445. +static inline void checkout_cluster_size(struct cluster_handle * clust,
  31446. + struct inode * inode)
  31447. +{
  31448. + struct tfm_cluster *tc = &clust->tc;
  31449. +
  31450. + tc->len = lbytes(clust->index, inode);
  31451. + assert("edward-1478", tc->len != 0);
  31452. +}
  31453. +
  31454. +/*
  31455. + * Retrieve a page cluster with the latest submitted modifications
  31456. + * and flush its pages to previously allocated contiguous stream.
  31457. + */
  31458. +static void checkout_page_cluster(struct cluster_handle * clust,
  31459. + jnode * node, struct inode * inode)
  31460. +{
  31461. + int i;
  31462. + int found;
  31463. + int to_put;
  31464. + pgoff_t page_index = clust_to_pg(clust->index, inode);
  31465. + struct tfm_cluster *tc = &clust->tc;
  31466. +
  31467. + /* find and put checked in pages: cluster is locked,
  31468. + * so we must get expected number (to_put) of pages
  31469. + */
  31470. + to_put = size_in_pages(lbytes(clust->index, inode));
  31471. + found = find_get_pages(inode->i_mapping, &page_index,
  31472. + to_put, clust->pages);
  31473. + BUG_ON(found != to_put);
  31474. +
  31475. + __put_page_cluster(0, to_put, clust->pages, inode);
  31476. + unlock_cluster_uncapture(node);
  31477. +
  31478. + /* Flush found pages.
  31479. + *
  31480. + * Note, that we don't disable modifications while flushing,
  31481. + * moreover, some found pages can be truncated, as we have
  31482. + * released cluster lock.
  31483. + */
  31484. + for (i = 0; i < found; i++) {
  31485. + int in_page;
  31486. + char * data;
  31487. + assert("edward-1479",
  31488. + clust->pages[i]->index == clust->pages[0]->index + i);
  31489. +
  31490. + lock_page(clust->pages[i]);
  31491. + if (!PageUptodate(clust->pages[i])) {
  31492. + /* page was truncated */
  31493. + assert("edward-1480",
  31494. + i_size_read(inode) <= page_offset(clust->pages[i]));
  31495. + assert("edward-1481",
  31496. + clust->pages[i]->mapping != inode->i_mapping);
  31497. + unlock_page(clust->pages[i]);
  31498. + break;
  31499. + }
  31500. + /* Update the number of bytes in the logical cluster,
  31501. + * as it could be partially truncated. Note, that only
  31502. + * partial truncate is possible (complete truncate can
  31503. + * not go here, as it is performed via ->kill_hook()
  31504. + * called by cut_file_items(), and the last one must
  31505. + * wait for znode locked with parent coord).
  31506. + */
  31507. + checkout_cluster_size(clust, inode);
  31508. +
  31509. + /* this can be zero, as new file size is
  31510. + checked in before truncating pages */
  31511. + in_page = __mbp(tc->len, i);
  31512. +
  31513. + data = kmap_atomic(clust->pages[i]);
  31514. + memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
  31515. + data, in_page);
  31516. + kunmap_atomic(data);
  31517. + /*
  31518. + * modifications have been checked out and will be
  31519. + * committed later. Anyway, the dirty status of the
  31520. + * page is no longer relevant. However, the uptodate
  31521. + * status of the page is still relevant!
  31522. + */
  31523. + if (PageDirty(clust->pages[i]))
  31524. + cancel_dirty_page(clust->pages[i]);
  31525. +
  31526. + unlock_page(clust->pages[i]);
  31527. +
  31528. + if (in_page < PAGE_SIZE)
  31529. + /* end of the file */
  31530. + break;
  31531. + }
  31532. + put_found_pages(clust->pages, found); /* find_get_pages */
  31533. + tc->lsize = tc->len;
  31534. + return;
  31535. +}
  31536. +
  31537. +/* Check out modifications of a logical cluster */
  31538. +int checkout_logical_cluster(struct cluster_handle * clust,
  31539. + jnode * node, struct inode *inode)
  31540. +{
  31541. + int result;
  31542. + struct tfm_cluster *tc = &clust->tc;
  31543. +
  31544. + assert("edward-980", node != NULL);
  31545. + assert("edward-236", inode != NULL);
  31546. + assert("edward-237", clust != NULL);
  31547. + assert("edward-240", !clust->win);
  31548. + assert("edward-241", reiser4_schedulable());
  31549. + assert("edward-718", cryptcompress_inode_ok(inode));
  31550. +
  31551. + result = grab_tfm_stream(inode, tc, INPUT_STREAM);
  31552. + if (result) {
  31553. + warning("edward-1430", "alloc stream failed with ret=%d",
  31554. + result);
  31555. + return RETERR(-E_REPEAT);
  31556. + }
  31557. + lock_cluster(node);
  31558. +
  31559. + if (unlikely(!JF_ISSET(node, JNODE_DIRTY))) {
  31560. + /* race with another flush */
  31561. + warning("edward-982",
  31562. + "checking out logical cluster %lu of inode %llu: "
  31563. + "jnode is not dirty", clust->index,
  31564. + (unsigned long long)get_inode_oid(inode));
  31565. + unlock_cluster(node);
  31566. + return RETERR(-E_REPEAT);
  31567. + }
  31568. + cluster_reserved2grabbed(estimate_update_cluster(inode));
  31569. +
  31570. + /* this will unlock cluster */
  31571. + checkout_page_cluster(clust, node, inode);
  31572. + return 0;
  31573. +}
  31574. +
  31575. +/* set hint for the cluster of the index @index */
  31576. +static void set_hint_cluster(struct inode *inode, hint_t * hint,
  31577. + cloff_t index, znode_lock_mode mode)
  31578. +{
  31579. + reiser4_key key;
  31580. + assert("edward-722", cryptcompress_inode_ok(inode));
  31581. + assert("edward-723",
  31582. + inode_file_plugin(inode) ==
  31583. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
  31584. +
  31585. + inode_file_plugin(inode)->key_by_inode(inode,
  31586. + clust_to_off(index, inode),
  31587. + &key);
  31588. +
  31589. + reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, &key);
  31590. + hint->offset = get_key_offset(&key);
  31591. + hint->mode = mode;
  31592. +}
  31593. +
  31594. +void invalidate_hint_cluster(struct cluster_handle * clust)
  31595. +{
  31596. + assert("edward-1291", clust != NULL);
  31597. + assert("edward-1292", clust->hint != NULL);
  31598. +
  31599. + done_lh(&clust->hint->lh);
  31600. + hint_clr_valid(clust->hint);
  31601. +}
  31602. +
  31603. +static void put_hint_cluster(struct cluster_handle * clust,
  31604. + struct inode *inode, znode_lock_mode mode)
  31605. +{
  31606. + assert("edward-1286", clust != NULL);
  31607. + assert("edward-1287", clust->hint != NULL);
  31608. +
  31609. + set_hint_cluster(inode, clust->hint, clust->index + 1, mode);
  31610. + invalidate_hint_cluster(clust);
  31611. +}
  31612. +
  31613. +static int balance_dirty_page_cluster(struct cluster_handle * clust,
  31614. + struct inode *inode, loff_t off,
  31615. + loff_t to_file,
  31616. + int nr_dirtied)
  31617. +{
  31618. + int result;
  31619. + struct cryptcompress_info * info;
  31620. +
  31621. + assert("edward-724", inode != NULL);
  31622. + assert("edward-725", cryptcompress_inode_ok(inode));
  31623. + assert("edward-1547", nr_dirtied <= cluster_nrpages(inode));
  31624. +
  31625. + /* set next window params */
  31626. + move_update_window(inode, clust, off, to_file);
  31627. +
  31628. + result = update_sd_cryptcompress(inode);
  31629. + if (result)
  31630. + return result;
  31631. + assert("edward-726", clust->hint->lh.owner == NULL);
  31632. + info = cryptcompress_inode_data(inode);
  31633. +
  31634. + if (nr_dirtied == 0)
  31635. + return 0;
  31636. + mutex_unlock(&info->checkin_mutex);
  31637. + reiser4_throttle_write(inode);
  31638. + mutex_lock(&info->checkin_mutex);
  31639. + return 0;
  31640. +}
  31641. +
  31642. +/*
  31643. + * Check in part of a hole within a logical cluster
  31644. + */
  31645. +static int write_hole(struct inode *inode, struct cluster_handle * clust,
  31646. + loff_t file_off, loff_t to_file)
  31647. +{
  31648. + int result = 0;
  31649. + unsigned cl_off, cl_count = 0;
  31650. + unsigned to_pg, pg_off;
  31651. + struct reiser4_slide * win;
  31652. +
  31653. + assert("edward-190", clust != NULL);
  31654. + assert("edward-1069", clust->win != NULL);
  31655. + assert("edward-191", inode != NULL);
  31656. + assert("edward-727", cryptcompress_inode_ok(inode));
  31657. + assert("edward-1171", clust->dstat != INVAL_DISK_CLUSTER);
  31658. + assert("edward-1154",
  31659. + ergo(clust->dstat != FAKE_DISK_CLUSTER, clust->reserved == 1));
  31660. +
  31661. + win = clust->win;
  31662. +
  31663. + assert("edward-1070", win != NULL);
  31664. + assert("edward-201", win->stat == HOLE_WINDOW);
  31665. + assert("edward-192", cluster_ok(clust, inode));
  31666. +
  31667. + if (win->off == 0 && win->count == inode_cluster_size(inode)) {
  31668. + /*
  31669. + * This part of the hole occupies the whole logical
  31670. + * cluster, so it won't be represented by any items.
  31671. + * Nothing to submit.
  31672. + */
  31673. + move_update_window(inode, clust, file_off, to_file);
  31674. + return 0;
  31675. + }
  31676. + /*
  31677. + * This part of the hole starts not at logical cluster
  31678. + * boundary, so it has to be converted to zeros and written to disk
  31679. + */
  31680. + cl_count = win->count; /* number of zeroes to write */
  31681. + cl_off = win->off;
  31682. + pg_off = off_to_pgoff(win->off);
  31683. +
  31684. + while (cl_count) {
  31685. + struct page *page;
  31686. + page = clust->pages[off_to_pg(cl_off)];
  31687. +
  31688. + assert("edward-284", page != NULL);
  31689. +
  31690. + to_pg = min((typeof(pg_off))PAGE_SIZE - pg_off, cl_count);
  31691. + lock_page(page);
  31692. + zero_user(page, pg_off, to_pg);
  31693. + SetPageUptodate(page);
  31694. + set_page_dirty_notag(page);
  31695. + mark_page_accessed(page);
  31696. + unlock_page(page);
  31697. +
  31698. + cl_off += to_pg;
  31699. + cl_count -= to_pg;
  31700. + pg_off = 0;
  31701. + }
  31702. + if (win->delta == 0) {
  31703. + /* only zeroes in this window, try to capture
  31704. + */
  31705. + result = checkin_logical_cluster(clust, inode);
  31706. + if (result)
  31707. + return result;
  31708. + put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
  31709. + result = balance_dirty_page_cluster(clust,
  31710. + inode, file_off, to_file,
  31711. + win_count_to_nrpages(win));
  31712. + } else
  31713. + move_update_window(inode, clust, file_off, to_file);
  31714. + return result;
  31715. +}
  31716. +
  31717. +/*
  31718. + The main disk search procedure for cryptcompress plugin, which
  31719. + . scans all items of disk cluster with the lock mode @mode
  31720. + . maybe reads each one (if @read)
  31721. + . maybe makes its znode dirty (if write lock mode was specified)
  31722. +
  31723. + NOTE-EDWARD: Callers should handle the case when disk cluster
  31724. + is incomplete (-EIO)
  31725. +*/
  31726. +int find_disk_cluster(struct cluster_handle * clust,
  31727. + struct inode *inode, int read, znode_lock_mode mode)
  31728. +{
  31729. + flow_t f;
  31730. + hint_t *hint;
  31731. + int result = 0;
  31732. + int was_grabbed;
  31733. + ra_info_t ra_info;
  31734. + file_plugin *fplug;
  31735. + item_plugin *iplug;
  31736. + struct tfm_cluster *tc;
  31737. + struct cryptcompress_info * info;
  31738. +
  31739. + assert("edward-138", clust != NULL);
  31740. + assert("edward-728", clust->hint != NULL);
  31741. + assert("edward-226", reiser4_schedulable());
  31742. + assert("edward-137", inode != NULL);
  31743. + assert("edward-729", cryptcompress_inode_ok(inode));
  31744. +
  31745. + hint = clust->hint;
  31746. + fplug = inode_file_plugin(inode);
  31747. + was_grabbed = get_current_context()->grabbed_blocks;
  31748. + info = cryptcompress_inode_data(inode);
  31749. + tc = &clust->tc;
  31750. +
  31751. + assert("edward-462", !tfm_cluster_is_uptodate(tc));
  31752. + assert("edward-461", ergo(read, tfm_stream_is_set(tc, INPUT_STREAM)));
  31753. +
  31754. + dclust_init_extension(hint);
  31755. +
  31756. + /* set key of the first disk cluster item */
  31757. + fplug->flow_by_inode(inode,
  31758. + (read ? (char __user *)tfm_stream_data(tc, INPUT_STREAM) : NULL),
  31759. + 0 /* kernel space */ ,
  31760. + inode_scaled_cluster_size(inode),
  31761. + clust_to_off(clust->index, inode), READ_OP, &f);
  31762. + if (mode == ZNODE_WRITE_LOCK) {
  31763. + /* reserve for flush to make dirty all the leaf nodes
  31764. + which contain disk cluster */
  31765. + result =
  31766. + reiser4_grab_space_force(estimate_dirty_cluster(inode),
  31767. + BA_CAN_COMMIT);
  31768. + if (result)
  31769. + goto out;
  31770. + }
  31771. +
  31772. + ra_info.key_to_stop = f.key;
  31773. + set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key()));
  31774. +
  31775. + while (f.length) {
  31776. + result = find_cluster_item(hint, &f.key, mode,
  31777. + NULL, FIND_EXACT,
  31778. + (mode == ZNODE_WRITE_LOCK ?
  31779. + CBK_FOR_INSERT : 0));
  31780. + switch (result) {
  31781. + case CBK_COORD_NOTFOUND:
  31782. + result = 0;
  31783. + if (inode_scaled_offset
  31784. + (inode, clust_to_off(clust->index, inode)) ==
  31785. + get_key_offset(&f.key)) {
  31786. + /* first item not found, this is treated
  31787. + as disk cluster is absent */
  31788. + clust->dstat = FAKE_DISK_CLUSTER;
  31789. + goto out;
  31790. + }
  31791. + /* we are outside the cluster, stop search here */
  31792. + assert("edward-146",
  31793. + f.length != inode_scaled_cluster_size(inode));
  31794. + goto ok;
  31795. + case CBK_COORD_FOUND:
  31796. + assert("edward-148",
  31797. + hint->ext_coord.coord.between == AT_UNIT);
  31798. + assert("edward-460",
  31799. + hint->ext_coord.coord.unit_pos == 0);
  31800. +
  31801. + coord_clear_iplug(&hint->ext_coord.coord);
  31802. + result = zload_ra(hint->ext_coord.coord.node, &ra_info);
  31803. + if (unlikely(result))
  31804. + goto out;
  31805. + iplug = item_plugin_by_coord(&hint->ext_coord.coord);
  31806. + assert("edward-147",
  31807. + item_id_by_coord(&hint->ext_coord.coord) ==
  31808. + CTAIL_ID);
  31809. +
  31810. + result = iplug->s.file.read(&f, hint, NULL, NULL);
  31811. + if (result) {
  31812. + zrelse(hint->ext_coord.coord.node);
  31813. + goto out;
  31814. + }
  31815. + if (mode == ZNODE_WRITE_LOCK) {
  31816. + /* Don't make dirty more nodes then it was
  31817. + estimated (see comments before
  31818. + estimate_dirty_cluster). Missed nodes will be
  31819. + read up in flush time if they are evicted from
  31820. + memory */
  31821. + if (dclust_get_extension_ncount(hint) <=
  31822. + estimate_dirty_cluster(inode))
  31823. + znode_make_dirty(hint->ext_coord.coord.node);
  31824. +
  31825. + znode_set_convertible(hint->ext_coord.coord.
  31826. + node);
  31827. + }
  31828. + zrelse(hint->ext_coord.coord.node);
  31829. + break;
  31830. + default:
  31831. + goto out;
  31832. + }
  31833. + }
  31834. + ok:
  31835. + /* at least one item was found */
  31836. + /* NOTE-EDWARD: Callers should handle the case
  31837. + when disk cluster is incomplete (-EIO) */
  31838. + tc->len = inode_scaled_cluster_size(inode) - f.length;
  31839. + tc->lsize = lbytes(clust->index, inode);
  31840. + assert("edward-1196", tc->len > 0);
  31841. + assert("edward-1406", tc->lsize > 0);
  31842. +
  31843. + if (hint_is_unprepped_dclust(clust->hint)) {
  31844. + clust->dstat = UNPR_DISK_CLUSTER;
  31845. + } else if (clust->index == info->trunc_index) {
  31846. + clust->dstat = TRNC_DISK_CLUSTER;
  31847. + } else {
  31848. + clust->dstat = PREP_DISK_CLUSTER;
  31849. + dclust_set_extension_dsize(clust->hint, tc->len);
  31850. + }
  31851. + out:
  31852. + assert("edward-1339",
  31853. + get_current_context()->grabbed_blocks >= was_grabbed);
  31854. + grabbed2free(get_current_context(),
  31855. + get_current_super_private(),
  31856. + get_current_context()->grabbed_blocks - was_grabbed);
  31857. + return result;
  31858. +}
  31859. +
  31860. +int get_disk_cluster_locked(struct cluster_handle * clust, struct inode *inode,
  31861. + znode_lock_mode lock_mode)
  31862. +{
  31863. + reiser4_key key;
  31864. + ra_info_t ra_info;
  31865. +
  31866. + assert("edward-730", reiser4_schedulable());
  31867. + assert("edward-731", clust != NULL);
  31868. + assert("edward-732", inode != NULL);
  31869. +
  31870. + if (hint_is_valid(clust->hint)) {
  31871. + assert("edward-1293", clust->dstat != INVAL_DISK_CLUSTER);
  31872. + assert("edward-1294",
  31873. + znode_is_write_locked(clust->hint->lh.node));
  31874. + /* already have a valid locked position */
  31875. + return (clust->dstat ==
  31876. + FAKE_DISK_CLUSTER ? CBK_COORD_NOTFOUND :
  31877. + CBK_COORD_FOUND);
  31878. + }
  31879. + key_by_inode_cryptcompress(inode, clust_to_off(clust->index, inode),
  31880. + &key);
  31881. + ra_info.key_to_stop = key;
  31882. + set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key()));
  31883. +
  31884. + return find_cluster_item(clust->hint, &key, lock_mode, NULL, FIND_EXACT,
  31885. + CBK_FOR_INSERT);
  31886. +}
  31887. +
  31888. +/* Read needed cluster pages before modifying.
  31889. + If success, @clust->hint contains locked position in the tree.
  31890. + Also:
  31891. + . find and set disk cluster state
  31892. + . make disk cluster dirty if its state is not FAKE_DISK_CLUSTER.
  31893. +*/
  31894. +static int read_some_cluster_pages(struct inode * inode,
  31895. + struct cluster_handle * clust)
  31896. +{
  31897. + int i;
  31898. + int result = 0;
  31899. + item_plugin *iplug;
  31900. + struct reiser4_slide * win = clust->win;
  31901. + znode_lock_mode mode = ZNODE_WRITE_LOCK;
  31902. +
  31903. + iplug = item_plugin_by_id(CTAIL_ID);
  31904. +
  31905. + assert("edward-924", !tfm_cluster_is_uptodate(&clust->tc));
  31906. +
  31907. +#if REISER4_DEBUG
  31908. + if (clust->nr_pages == 0) {
  31909. + /* start write hole from fake disk cluster */
  31910. + assert("edward-1117", win != NULL);
  31911. + assert("edward-1118", win->stat == HOLE_WINDOW);
  31912. + assert("edward-1119", new_logical_cluster(clust, inode));
  31913. + }
  31914. +#endif
  31915. + if (new_logical_cluster(clust, inode)) {
  31916. + /*
  31917. + new page cluster is about to be written, nothing to read,
  31918. + */
  31919. + assert("edward-734", reiser4_schedulable());
  31920. + assert("edward-735", clust->hint->lh.owner == NULL);
  31921. +
  31922. + if (clust->nr_pages) {
  31923. + int off;
  31924. + struct page * pg;
  31925. + assert("edward-1419", clust->pages != NULL);
  31926. + pg = clust->pages[clust->nr_pages - 1];
  31927. + assert("edward-1420", pg != NULL);
  31928. + off = off_to_pgoff(win->off+win->count+win->delta);
  31929. + if (off) {
  31930. + lock_page(pg);
  31931. + zero_user_segment(pg, off, PAGE_SIZE);
  31932. + unlock_page(pg);
  31933. + }
  31934. + }
  31935. + clust->dstat = FAKE_DISK_CLUSTER;
  31936. + return 0;
  31937. + }
  31938. + /*
  31939. + Here we should search for disk cluster to figure out its real state.
  31940. + Also there is one more important reason to do disk search: we need
  31941. + to make disk cluster _dirty_ if it exists
  31942. + */
  31943. +
  31944. + /* if windows is specified, read the only pages
  31945. + that will be modified partially */
  31946. +
  31947. + for (i = 0; i < clust->nr_pages; i++) {
  31948. + struct page *pg = clust->pages[i];
  31949. +
  31950. + lock_page(pg);
  31951. + if (PageUptodate(pg)) {
  31952. + unlock_page(pg);
  31953. + continue;
  31954. + }
  31955. + unlock_page(pg);
  31956. +
  31957. + if (win &&
  31958. + i >= size_in_pages(win->off) &&
  31959. + i < off_to_pg(win->off + win->count + win->delta))
  31960. + /* page will be completely overwritten */
  31961. + continue;
  31962. +
  31963. + if (win && (i == clust->nr_pages - 1) &&
  31964. + /* the last page is
  31965. + partially modified,
  31966. + not uptodate .. */
  31967. + (size_in_pages(i_size_read(inode)) <= pg->index)) {
  31968. + /* .. and appended,
  31969. + so set zeroes to the rest */
  31970. + int offset;
  31971. + lock_page(pg);
  31972. + assert("edward-1260",
  31973. + size_in_pages(win->off + win->count +
  31974. + win->delta) - 1 == i);
  31975. +
  31976. + offset =
  31977. + off_to_pgoff(win->off + win->count + win->delta);
  31978. + zero_user_segment(pg, offset, PAGE_SIZE);
  31979. + unlock_page(pg);
  31980. + /* still not uptodate */
  31981. + break;
  31982. + }
  31983. + lock_page(pg);
  31984. + result = do_readpage_ctail(inode, clust, pg, mode);
  31985. +
  31986. + assert("edward-1526", ergo(!result, PageUptodate(pg)));
  31987. + unlock_page(pg);
  31988. + if (result) {
  31989. + warning("edward-219", "do_readpage_ctail failed");
  31990. + goto out;
  31991. + }
  31992. + }
  31993. + if (!tfm_cluster_is_uptodate(&clust->tc)) {
  31994. + /* disk cluster unclaimed, but we need to make its znodes dirty
  31995. + * to make flush update convert its content
  31996. + */
  31997. + result = find_disk_cluster(clust, inode,
  31998. + 0 /* do not read items */,
  31999. + mode);
  32000. + }
  32001. + out:
  32002. + tfm_cluster_clr_uptodate(&clust->tc);
  32003. + return result;
  32004. +}
  32005. +
  32006. +static int should_create_unprepped_cluster(struct cluster_handle * clust,
  32007. + struct inode * inode)
  32008. +{
  32009. + assert("edward-737", clust != NULL);
  32010. +
  32011. + switch (clust->dstat) {
  32012. + case PREP_DISK_CLUSTER:
  32013. + case UNPR_DISK_CLUSTER:
  32014. + return 0;
  32015. + case FAKE_DISK_CLUSTER:
  32016. + if (clust->win &&
  32017. + clust->win->stat == HOLE_WINDOW && clust->nr_pages == 0) {
  32018. + assert("edward-1172",
  32019. + new_logical_cluster(clust, inode));
  32020. + return 0;
  32021. + }
  32022. + return 1;
  32023. + default:
  32024. + impossible("edward-1173", "bad disk cluster state");
  32025. + return 0;
  32026. + }
  32027. +}
  32028. +
  32029. +static int cryptcompress_make_unprepped_cluster(struct cluster_handle * clust,
  32030. + struct inode *inode)
  32031. +{
  32032. + int result;
  32033. +
  32034. + assert("edward-1123", reiser4_schedulable());
  32035. + assert("edward-737", clust != NULL);
  32036. + assert("edward-738", inode != NULL);
  32037. + assert("edward-739", cryptcompress_inode_ok(inode));
  32038. + assert("edward-1053", clust->hint != NULL);
  32039. +
  32040. + if (!should_create_unprepped_cluster(clust, inode)) {
  32041. + if (clust->reserved) {
  32042. + cluster_reserved2free(estimate_insert_cluster(inode));
  32043. +#if REISER4_DEBUG
  32044. + assert("edward-1267",
  32045. + clust->reserved_unprepped ==
  32046. + estimate_insert_cluster(inode));
  32047. + clust->reserved_unprepped -=
  32048. + estimate_insert_cluster(inode);
  32049. +#endif
  32050. + }
  32051. + return 0;
  32052. + }
  32053. + assert("edward-1268", clust->reserved);
  32054. + cluster_reserved2grabbed(estimate_insert_cluster(inode));
  32055. +#if REISER4_DEBUG
  32056. + assert("edward-1441",
  32057. + clust->reserved_unprepped == estimate_insert_cluster(inode));
  32058. + clust->reserved_unprepped -= estimate_insert_cluster(inode);
  32059. +#endif
  32060. + result = ctail_insert_unprepped_cluster(clust, inode);
  32061. + if (result)
  32062. + return result;
  32063. +
  32064. + inode_add_bytes(inode, inode_cluster_size(inode));
  32065. +
  32066. + assert("edward-743", cryptcompress_inode_ok(inode));
  32067. + assert("edward-744", znode_is_write_locked(clust->hint->lh.node));
  32068. +
  32069. + clust->dstat = UNPR_DISK_CLUSTER;
  32070. + return 0;
  32071. +}
  32072. +
  32073. +/* . Grab page cluster for read, write, setattr, etc. operations;
  32074. + * . Truncate its complete pages, if needed;
  32075. + */
  32076. +int prepare_page_cluster(struct inode * inode, struct cluster_handle * clust,
  32077. + rw_op rw)
  32078. +{
  32079. + assert("edward-177", inode != NULL);
  32080. + assert("edward-741", cryptcompress_inode_ok(inode));
  32081. + assert("edward-740", clust->pages != NULL);
  32082. +
  32083. + set_cluster_nrpages(clust, inode);
  32084. + reset_cluster_pgset(clust, cluster_nrpages(inode));
  32085. + return grab_page_cluster(inode, clust, rw);
  32086. +}
  32087. +
  32088. +/* Truncate complete page cluster of index @index.
  32089. + * This is called by ->kill_hook() method of item
  32090. + * plugin when deleting a disk cluster of such index.
  32091. + */
  32092. +void truncate_complete_page_cluster(struct inode *inode, cloff_t index,
  32093. + int even_cows)
  32094. +{
  32095. + int found;
  32096. + int nr_pages;
  32097. + jnode *node;
  32098. + pgoff_t page_index = clust_to_pg(index, inode);
  32099. + struct page *pages[MAX_CLUSTER_NRPAGES];
  32100. +
  32101. + node = jlookup(current_tree, get_inode_oid(inode),
  32102. + clust_to_pg(index, inode));
  32103. + nr_pages = size_in_pages(lbytes(index, inode));
  32104. + assert("edward-1483", nr_pages != 0);
  32105. + if (!node)
  32106. + goto truncate;
  32107. + found = find_get_pages(inode->i_mapping, &page_index,
  32108. + cluster_nrpages(inode), pages);
  32109. + if (!found) {
  32110. + assert("edward-1484", jnode_truncate_ok(inode, index));
  32111. + return;
  32112. + }
  32113. + lock_cluster(node);
  32114. +
  32115. + if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS)
  32116. + && index == 0)
  32117. + /* converting to unix_file is in progress */
  32118. + JF_CLR(node, JNODE_CLUSTER_PAGE);
  32119. + if (JF_ISSET(node, JNODE_DIRTY)) {
  32120. + /*
  32121. + * @nr_pages were checked in, but not yet checked out -
  32122. + * we need to release them. (also there can be pages
  32123. + * attached to page cache by read(), etc. - don't take
  32124. + * them into account).
  32125. + */
  32126. + assert("edward-1198", found >= nr_pages);
  32127. +
  32128. + /* free disk space grabbed for disk cluster converting */
  32129. + cluster_reserved2grabbed(estimate_update_cluster(inode));
  32130. + grabbed2free(get_current_context(),
  32131. + get_current_super_private(),
  32132. + estimate_update_cluster(inode));
  32133. + __put_page_cluster(0, nr_pages, pages, inode);
  32134. +
  32135. + /* This will clear dirty bit, uncapture and unlock jnode */
  32136. + unlock_cluster_uncapture(node);
  32137. + } else
  32138. + unlock_cluster(node);
  32139. + jput(node); /* jlookup */
  32140. + put_found_pages(pages, found); /* find_get_pages */
  32141. + truncate:
  32142. + if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS) &&
  32143. + index == 0)
  32144. + return;
  32145. + truncate_page_cluster_range(inode, pages, index, 0,
  32146. + cluster_nrpages(inode),
  32147. + even_cows);
  32148. + assert("edward-1201",
  32149. + ergo(!reiser4_inode_get_flag(inode,
  32150. + REISER4_FILE_CONV_IN_PROGRESS),
  32151. + jnode_truncate_ok(inode, index)));
  32152. + return;
  32153. +}
  32154. +
  32155. +/*
  32156. + * Set cluster handle @clust of a logical cluster before
  32157. + * modifications which are supposed to be committed.
  32158. + *
  32159. + * . grab cluster pages;
  32160. + * . reserve disk space;
  32161. + * . maybe read pages from disk and set the disk cluster dirty;
  32162. + * . maybe write hole and check in (partially zeroed) logical cluster;
  32163. + * . create 'unprepped' disk cluster for new or fake logical one.
  32164. + */
  32165. +static int prepare_logical_cluster(struct inode *inode,
  32166. + loff_t file_off, /* write position
  32167. + in the file */
  32168. + loff_t to_file, /* bytes of users data
  32169. + to write to the file */
  32170. + struct cluster_handle * clust,
  32171. + logical_cluster_op op)
  32172. +{
  32173. + int result = 0;
  32174. + struct reiser4_slide * win = clust->win;
  32175. +
  32176. + reset_cluster_params(clust);
  32177. + cluster_set_tfm_act(&clust->tc, TFMA_READ);
  32178. +#if REISER4_DEBUG
  32179. + clust->ctx = get_current_context();
  32180. +#endif
  32181. + assert("edward-1190", op != LC_INVAL);
  32182. +
  32183. + clust->op = op;
  32184. +
  32185. + result = prepare_page_cluster(inode, clust, WRITE_OP);
  32186. + if (result)
  32187. + return result;
  32188. + assert("edward-1447",
  32189. + ergo(clust->nr_pages != 0, jprivate(clust->pages[0])));
  32190. + assert("edward-1448",
  32191. + ergo(clust->nr_pages != 0,
  32192. + jnode_is_cluster_page(jprivate(clust->pages[0]))));
  32193. +
  32194. + result = reserve4cluster(inode, clust);
  32195. + if (result)
  32196. + goto out;
  32197. +
  32198. + result = read_some_cluster_pages(inode, clust);
  32199. +
  32200. + if (result ||
  32201. + /*
  32202. + * don't submit data modifications
  32203. + * when expanding or shrinking holes
  32204. + */
  32205. + (op == LC_SHRINK && clust->dstat == FAKE_DISK_CLUSTER) ||
  32206. + (op == LC_EXPAND && clust->dstat == FAKE_DISK_CLUSTER)){
  32207. + free_reserved4cluster(inode,
  32208. + clust,
  32209. + estimate_update_cluster(inode) +
  32210. + estimate_insert_cluster(inode));
  32211. + goto out;
  32212. + }
  32213. + assert("edward-1124", clust->dstat != INVAL_DISK_CLUSTER);
  32214. +
  32215. + result = cryptcompress_make_unprepped_cluster(clust, inode);
  32216. + if (result)
  32217. + goto error;
  32218. + if (win && win->stat == HOLE_WINDOW) {
  32219. + result = write_hole(inode, clust, file_off, to_file);
  32220. + if (result)
  32221. + goto error;
  32222. + }
  32223. + return 0;
  32224. + error:
  32225. + free_reserved4cluster(inode, clust,
  32226. + estimate_update_cluster(inode));
  32227. + out:
  32228. + put_page_cluster(clust, inode, WRITE_OP);
  32229. + return result;
  32230. +}
  32231. +
  32232. +/* set window by two offsets */
  32233. +static void set_window(struct cluster_handle * clust,
  32234. + struct reiser4_slide * win, struct inode *inode,
  32235. + loff_t o1, loff_t o2)
  32236. +{
  32237. + assert("edward-295", clust != NULL);
  32238. + assert("edward-296", inode != NULL);
  32239. + assert("edward-1071", win != NULL);
  32240. + assert("edward-297", o1 <= o2);
  32241. +
  32242. + clust->index = off_to_clust(o1, inode);
  32243. +
  32244. + win->off = off_to_cloff(o1, inode);
  32245. + win->count = min((loff_t)(inode_cluster_size(inode) - win->off),
  32246. + o2 - o1);
  32247. + win->delta = 0;
  32248. +
  32249. + clust->win = win;
  32250. +}
  32251. +
  32252. +static int set_window_and_cluster(struct inode *inode,
  32253. + struct cluster_handle * clust,
  32254. + struct reiser4_slide * win, size_t length,
  32255. + loff_t file_off)
  32256. +{
  32257. + int result;
  32258. +
  32259. + assert("edward-197", clust != NULL);
  32260. + assert("edward-1072", win != NULL);
  32261. + assert("edward-198", inode != NULL);
  32262. +
  32263. + result = alloc_cluster_pgset(clust, cluster_nrpages(inode));
  32264. + if (result)
  32265. + return result;
  32266. +
  32267. + if (file_off > i_size_read(inode)) {
  32268. + /* Uhmm, hole in cryptcompress file... */
  32269. + loff_t hole_size;
  32270. + hole_size = file_off - inode->i_size;
  32271. +
  32272. + set_window(clust, win, inode, inode->i_size, file_off);
  32273. + win->stat = HOLE_WINDOW;
  32274. + if (win->off + hole_size < inode_cluster_size(inode))
  32275. + /* there is also user's data to append to the hole */
  32276. + win->delta = min(inode_cluster_size(inode) -
  32277. + (win->off + win->count), length);
  32278. + return 0;
  32279. + }
  32280. + set_window(clust, win, inode, file_off, file_off + length);
  32281. + win->stat = DATA_WINDOW;
  32282. + return 0;
  32283. +}
  32284. +
  32285. +int set_cluster_by_page(struct cluster_handle * clust, struct page * page,
  32286. + int count)
  32287. +{
  32288. + int result = 0;
  32289. + int (*setting_actor)(struct cluster_handle * clust, int count);
  32290. +
  32291. + assert("edward-1358", clust != NULL);
  32292. + assert("edward-1359", page != NULL);
  32293. + assert("edward-1360", page->mapping != NULL);
  32294. + assert("edward-1361", page->mapping->host != NULL);
  32295. +
  32296. + setting_actor =
  32297. + (clust->pages ? reset_cluster_pgset : alloc_cluster_pgset);
  32298. + result = setting_actor(clust, count);
  32299. + clust->index = pg_to_clust(page->index, page->mapping->host);
  32300. + return result;
  32301. +}
  32302. +
  32303. +/* reset all the params that not get updated */
  32304. +void reset_cluster_params(struct cluster_handle * clust)
  32305. +{
  32306. + assert("edward-197", clust != NULL);
  32307. +
  32308. + clust->dstat = INVAL_DISK_CLUSTER;
  32309. + clust->tc.uptodate = 0;
  32310. + clust->tc.len = 0;
  32311. +}
  32312. +
  32313. +/* the heart of write_cryptcompress */
  32314. +static loff_t do_write_cryptcompress(struct file *file, struct inode *inode,
  32315. + const char __user *buf, size_t to_write,
  32316. + loff_t pos, struct dispatch_context *cont)
  32317. +{
  32318. + int i;
  32319. + hint_t *hint;
  32320. + int result = 0;
  32321. + size_t count;
  32322. + struct reiser4_slide win;
  32323. + struct cluster_handle clust;
  32324. + struct cryptcompress_info * info;
  32325. +
  32326. + assert("edward-154", buf != NULL);
  32327. + assert("edward-161", reiser4_schedulable());
  32328. + assert("edward-748", cryptcompress_inode_ok(inode));
  32329. + assert("edward-159", current_blocksize == PAGE_SIZE);
  32330. + assert("edward-1274", get_current_context()->grabbed_blocks == 0);
  32331. +
  32332. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  32333. + if (hint == NULL)
  32334. + return RETERR(-ENOMEM);
  32335. +
  32336. + result = load_file_hint(file, hint);
  32337. + if (result) {
  32338. + kfree(hint);
  32339. + return result;
  32340. + }
  32341. + count = to_write;
  32342. +
  32343. + reiser4_slide_init(&win);
  32344. + cluster_init_read(&clust, &win);
  32345. + clust.hint = hint;
  32346. + info = cryptcompress_inode_data(inode);
  32347. +
  32348. + mutex_lock(&info->checkin_mutex);
  32349. +
  32350. + result = set_window_and_cluster(inode, &clust, &win, to_write, pos);
  32351. + if (result)
  32352. + goto out;
  32353. +
  32354. + if (next_window_stat(&win) == HOLE_WINDOW) {
  32355. + /* write hole in this iteration
  32356. + separated from the loop below */
  32357. + result = write_dispatch_hook(file, inode,
  32358. + pos, &clust, cont);
  32359. + if (result)
  32360. + goto out;
  32361. + result = prepare_logical_cluster(inode, pos, count, &clust,
  32362. + LC_APPOV);
  32363. + if (result)
  32364. + goto out;
  32365. + }
  32366. + do {
  32367. + const char __user * src;
  32368. + unsigned page_off, to_page;
  32369. +
  32370. + assert("edward-750", reiser4_schedulable());
  32371. +
  32372. + result = write_dispatch_hook(file, inode,
  32373. + pos + to_write - count,
  32374. + &clust, cont);
  32375. + if (result)
  32376. + goto out;
  32377. + if (cont->state == DISPATCH_ASSIGNED_NEW)
  32378. + /* done_lh was called in write_dispatch_hook */
  32379. + goto out_no_longterm_lock;
  32380. +
  32381. + result = prepare_logical_cluster(inode, pos, count, &clust,
  32382. + LC_APPOV);
  32383. + if (result)
  32384. + goto out;
  32385. +
  32386. + assert("edward-751", cryptcompress_inode_ok(inode));
  32387. + assert("edward-204", win.stat == DATA_WINDOW);
  32388. + assert("edward-1288", hint_is_valid(clust.hint));
  32389. + assert("edward-752",
  32390. + znode_is_write_locked(hint->ext_coord.coord.node));
  32391. + put_hint_cluster(&clust, inode, ZNODE_WRITE_LOCK);
  32392. +
  32393. + /* set write position in page */
  32394. + page_off = off_to_pgoff(win.off);
  32395. +
  32396. + /* copy user's data to cluster pages */
  32397. + for (i = off_to_pg(win.off), src = buf;
  32398. + i < size_in_pages(win.off + win.count);
  32399. + i++, src += to_page) {
  32400. + to_page = __mbp(win.off + win.count, i) - page_off;
  32401. + assert("edward-1039",
  32402. + page_off + to_page <= PAGE_SIZE);
  32403. + assert("edward-287", clust.pages[i] != NULL);
  32404. +
  32405. + fault_in_readable(src, to_page);
  32406. +
  32407. + lock_page(clust.pages[i]);
  32408. + result =
  32409. + __copy_from_user((char *)kmap(clust.pages[i]) +
  32410. + page_off, src, to_page);
  32411. + kunmap(clust.pages[i]);
  32412. + if (unlikely(result)) {
  32413. + unlock_page(clust.pages[i]);
  32414. + result = -EFAULT;
  32415. + goto err2;
  32416. + }
  32417. + SetPageUptodate(clust.pages[i]);
  32418. + set_page_dirty_notag(clust.pages[i]);
  32419. + flush_dcache_page(clust.pages[i]);
  32420. + mark_page_accessed(clust.pages[i]);
  32421. + unlock_page(clust.pages[i]);
  32422. + page_off = 0;
  32423. + }
  32424. + assert("edward-753", cryptcompress_inode_ok(inode));
  32425. +
  32426. + result = checkin_logical_cluster(&clust, inode);
  32427. + if (result)
  32428. + goto err2;
  32429. +
  32430. + buf += win.count;
  32431. + count -= win.count;
  32432. +
  32433. + result = balance_dirty_page_cluster(&clust, inode, 0, count,
  32434. + win_count_to_nrpages(&win));
  32435. + if (result)
  32436. + goto err1;
  32437. + assert("edward-755", hint->lh.owner == NULL);
  32438. + reset_cluster_params(&clust);
  32439. + continue;
  32440. + err2:
  32441. + put_page_cluster(&clust, inode, WRITE_OP);
  32442. + err1:
  32443. + if (clust.reserved)
  32444. + free_reserved4cluster(inode,
  32445. + &clust,
  32446. + estimate_update_cluster(inode));
  32447. + break;
  32448. + } while (count);
  32449. + out:
  32450. + done_lh(&hint->lh);
  32451. + save_file_hint(file, hint);
  32452. + out_no_longterm_lock:
  32453. + mutex_unlock(&info->checkin_mutex);
  32454. + kfree(hint);
  32455. + put_cluster_handle(&clust);
  32456. + assert("edward-195",
  32457. + ergo((to_write == count),
  32458. + (result < 0 || cont->state == DISPATCH_ASSIGNED_NEW)));
  32459. + return (to_write - count) ? (to_write - count) : result;
  32460. +}
  32461. +
  32462. +/**
  32463. + * plugin->write()
  32464. + * @file: file to write to
  32465. + * @buf: address of user-space buffer
  32466. + * @read_amount: number of bytes to write
  32467. + * @off: position in file to write to
  32468. + */
  32469. +ssize_t write_cryptcompress(struct file *file, const char __user *buf,
  32470. + size_t count, loff_t *off,
  32471. + struct dispatch_context *cont)
  32472. +{
  32473. + ssize_t result;
  32474. + struct inode *inode;
  32475. + reiser4_context *ctx;
  32476. + loff_t pos = *off;
  32477. + struct cryptcompress_info *info;
  32478. +
  32479. + assert("edward-1449", cont->state == DISPATCH_INVAL_STATE);
  32480. +
  32481. + inode = file_inode(file);
  32482. + assert("edward-196", cryptcompress_inode_ok(inode));
  32483. +
  32484. + info = cryptcompress_inode_data(inode);
  32485. + ctx = get_current_context();
  32486. +
  32487. + result = file_remove_privs(file);
  32488. + if (unlikely(result != 0)) {
  32489. + context_set_commit_async(ctx);
  32490. + return result;
  32491. + }
  32492. + /* remove_suid might create a transaction */
  32493. + reiser4_txn_restart(ctx);
  32494. +
  32495. + result = do_write_cryptcompress(file, inode, buf, count, pos, cont);
  32496. +
  32497. + if (unlikely(result < 0)) {
  32498. + context_set_commit_async(ctx);
  32499. + return result;
  32500. + }
  32501. + /* update position in a file */
  32502. + *off = pos + result;
  32503. + return result;
  32504. +}
  32505. +
  32506. +/* plugin->readpages */
  32507. +int readpages_cryptcompress(struct file *file, struct address_space *mapping,
  32508. + struct list_head *pages, unsigned nr_pages)
  32509. +{
  32510. + reiser4_context * ctx;
  32511. + int ret;
  32512. +
  32513. + ctx = reiser4_init_context(mapping->host->i_sb);
  32514. + if (IS_ERR(ctx)) {
  32515. + ret = PTR_ERR(ctx);
  32516. + goto err;
  32517. + }
  32518. + /* cryptcompress file can be built of ctail items only */
  32519. + ret = readpages_ctail(file, mapping, pages);
  32520. + reiser4_txn_restart(ctx);
  32521. + reiser4_exit_context(ctx);
  32522. + if (ret) {
  32523. +err:
  32524. + put_pages_list(pages);
  32525. + }
  32526. + return ret;
  32527. +}
  32528. +
  32529. +static reiser4_block_nr cryptcompress_estimate_read(struct inode *inode)
  32530. +{
  32531. + /* reserve one block to update stat data item */
  32532. + assert("edward-1193",
  32533. + inode_file_plugin(inode)->estimate.update ==
  32534. + estimate_update_common);
  32535. + return estimate_update_common(inode);
  32536. +}
  32537. +
  32538. +/**
  32539. + * plugin->read()
  32540. + */
  32541. +ssize_t read_cryptcompress(struct kiocb *iocb, struct iov_iter *iter)
  32542. +{
  32543. + ssize_t result;
  32544. + struct inode *inode;
  32545. + reiser4_context *ctx;
  32546. + struct cryptcompress_info *info;
  32547. + reiser4_block_nr needed;
  32548. +
  32549. + inode = file_inode(iocb->ki_filp);
  32550. + assert("edward-1194", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  32551. +
  32552. + ctx = reiser4_init_context(inode->i_sb);
  32553. + if (IS_ERR(ctx))
  32554. + return PTR_ERR(ctx);
  32555. +
  32556. + info = cryptcompress_inode_data(inode);
  32557. + needed = cryptcompress_estimate_read(inode);
  32558. +
  32559. + result = reiser4_grab_space(needed, BA_CAN_COMMIT);
  32560. + if (result != 0) {
  32561. + reiser4_exit_context(ctx);
  32562. + return result;
  32563. + }
  32564. + result = generic_file_read_iter(iocb, iter);
  32565. +
  32566. + context_set_commit_async(ctx);
  32567. + reiser4_exit_context(ctx);
  32568. +
  32569. + return result;
  32570. +}
  32571. +
  32572. +/* Set left coord when unit is not found after node_lookup()
  32573. + This takes into account that there can be holes in a sequence
  32574. + of disk clusters */
  32575. +
  32576. +static void adjust_left_coord(coord_t * left_coord)
  32577. +{
  32578. + switch (left_coord->between) {
  32579. + case AFTER_UNIT:
  32580. + left_coord->between = AFTER_ITEM;
  32581. + case AFTER_ITEM:
  32582. + case BEFORE_UNIT:
  32583. + break;
  32584. + default:
  32585. + impossible("edward-1204", "bad left coord to cut");
  32586. + }
  32587. + return;
  32588. +}
  32589. +
  32590. +#define CRC_CUT_TREE_MIN_ITERATIONS 64
  32591. +
  32592. +/* plugin->cut_tree_worker */
  32593. +int cut_tree_worker_cryptcompress(tap_t * tap, const reiser4_key * from_key,
  32594. + const reiser4_key * to_key,
  32595. + reiser4_key * smallest_removed,
  32596. + struct inode *object, int truncate,
  32597. + int *progress)
  32598. +{
  32599. + lock_handle next_node_lock;
  32600. + coord_t left_coord;
  32601. + int result;
  32602. +
  32603. + assert("edward-1158", tap->coord->node != NULL);
  32604. + assert("edward-1159", znode_is_write_locked(tap->coord->node));
  32605. + assert("edward-1160", znode_get_level(tap->coord->node) == LEAF_LEVEL);
  32606. +
  32607. + *progress = 0;
  32608. + init_lh(&next_node_lock);
  32609. +
  32610. + while (1) {
  32611. + znode *node; /* node from which items are cut */
  32612. + node_plugin *nplug; /* node plugin for @node */
  32613. +
  32614. + node = tap->coord->node;
  32615. +
  32616. + /* Move next_node_lock to the next node on the left. */
  32617. + result =
  32618. + reiser4_get_left_neighbor(&next_node_lock, node,
  32619. + ZNODE_WRITE_LOCK,
  32620. + GN_CAN_USE_UPPER_LEVELS);
  32621. + if (result != 0 && result != -E_NO_NEIGHBOR)
  32622. + break;
  32623. + /* FIXME-EDWARD: Check can we delete the node as a whole. */
  32624. + result = reiser4_tap_load(tap);
  32625. + if (result)
  32626. + return result;
  32627. +
  32628. + /* Prepare the second (right) point for cut_node() */
  32629. + if (*progress)
  32630. + coord_init_last_unit(tap->coord, node);
  32631. +
  32632. + else if (item_plugin_by_coord(tap->coord)->b.lookup == NULL)
  32633. + /* set rightmost unit for the items without lookup method */
  32634. + tap->coord->unit_pos = coord_last_unit_pos(tap->coord);
  32635. +
  32636. + nplug = node->nplug;
  32637. +
  32638. + assert("edward-1161", nplug);
  32639. + assert("edward-1162", nplug->lookup);
  32640. +
  32641. + /* left_coord is leftmost unit cut from @node */
  32642. + result = nplug->lookup(node, from_key, FIND_EXACT, &left_coord);
  32643. +
  32644. + if (IS_CBKERR(result))
  32645. + break;
  32646. +
  32647. + if (result == CBK_COORD_NOTFOUND)
  32648. + adjust_left_coord(&left_coord);
  32649. +
  32650. + /* adjust coordinates so that they are set to existing units */
  32651. + if (coord_set_to_right(&left_coord)
  32652. + || coord_set_to_left(tap->coord)) {
  32653. + result = 0;
  32654. + break;
  32655. + }
  32656. +
  32657. + if (coord_compare(&left_coord, tap->coord) ==
  32658. + COORD_CMP_ON_RIGHT) {
  32659. + /* keys from @from_key to @to_key are not in the tree */
  32660. + result = 0;
  32661. + break;
  32662. + }
  32663. +
  32664. + /* cut data from one node */
  32665. + *smallest_removed = *reiser4_min_key();
  32666. + result = kill_node_content(&left_coord,
  32667. + tap->coord,
  32668. + from_key,
  32669. + to_key,
  32670. + smallest_removed,
  32671. + next_node_lock.node,
  32672. + object, truncate);
  32673. + reiser4_tap_relse(tap);
  32674. +
  32675. + if (result)
  32676. + break;
  32677. +
  32678. + ++(*progress);
  32679. +
  32680. + /* Check whether all items with keys >= from_key were removed
  32681. + * from the tree. */
  32682. + if (keyle(smallest_removed, from_key))
  32683. + /* result = 0; */
  32684. + break;
  32685. +
  32686. + if (next_node_lock.node == NULL)
  32687. + break;
  32688. +
  32689. + result = reiser4_tap_move(tap, &next_node_lock);
  32690. + done_lh(&next_node_lock);
  32691. + if (result)
  32692. + break;
  32693. +
  32694. + /* Break long cut_tree operation (deletion of a large file) if
  32695. + * atom requires commit. */
  32696. + if (*progress > CRC_CUT_TREE_MIN_ITERATIONS
  32697. + && current_atom_should_commit()) {
  32698. + result = -E_REPEAT;
  32699. + break;
  32700. + }
  32701. + }
  32702. + done_lh(&next_node_lock);
  32703. + return result;
  32704. +}
  32705. +
  32706. +static int expand_cryptcompress(struct inode *inode /* old size */,
  32707. + loff_t new_size)
  32708. +{
  32709. + int result = 0;
  32710. + hint_t *hint;
  32711. + lock_handle *lh;
  32712. + loff_t hole_size;
  32713. + int nr_zeroes;
  32714. + struct reiser4_slide win;
  32715. + struct cluster_handle clust;
  32716. +
  32717. + assert("edward-1133", inode->i_size < new_size);
  32718. + assert("edward-1134", reiser4_schedulable());
  32719. + assert("edward-1135", cryptcompress_inode_ok(inode));
  32720. + assert("edward-1136", current_blocksize == PAGE_SIZE);
  32721. +
  32722. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  32723. + if (hint == NULL)
  32724. + return RETERR(-ENOMEM);
  32725. + hint_init_zero(hint);
  32726. + lh = &hint->lh;
  32727. +
  32728. + reiser4_slide_init(&win);
  32729. + cluster_init_read(&clust, &win);
  32730. + clust.hint = hint;
  32731. +
  32732. + if (off_to_cloff(inode->i_size, inode) == 0)
  32733. + goto append_hole;
  32734. + /*
  32735. + * It can happen that
  32736. + * a part of the hole will be converted
  32737. + * to zeros. If so, it should be submitted
  32738. + */
  32739. + result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
  32740. + if (result)
  32741. + goto out;
  32742. + hole_size = new_size - inode->i_size;
  32743. + nr_zeroes = inode_cluster_size(inode) -
  32744. + off_to_cloff(inode->i_size, inode);
  32745. + if (nr_zeroes > hole_size)
  32746. + nr_zeroes = hole_size;
  32747. +
  32748. + set_window(&clust, &win, inode, inode->i_size,
  32749. + inode->i_size + nr_zeroes);
  32750. + win.stat = HOLE_WINDOW;
  32751. +
  32752. + assert("edward-1137",
  32753. + clust.index == off_to_clust(inode->i_size, inode));
  32754. +
  32755. + result = prepare_logical_cluster(inode, 0, 0, &clust, LC_EXPAND);
  32756. + if (result)
  32757. + goto out;
  32758. + assert("edward-1139",
  32759. + clust.dstat == PREP_DISK_CLUSTER ||
  32760. + clust.dstat == UNPR_DISK_CLUSTER ||
  32761. + clust.dstat == FAKE_DISK_CLUSTER);
  32762. +
  32763. + assert("edward-1431", hole_size >= nr_zeroes);
  32764. +
  32765. + append_hole:
  32766. + INODE_SET_SIZE(inode, new_size);
  32767. + out:
  32768. + done_lh(lh);
  32769. + kfree(hint);
  32770. + put_cluster_handle(&clust);
  32771. + return result;
  32772. +}
  32773. +
  32774. +static int update_size_actor(struct inode *inode,
  32775. + loff_t new_size, int update_sd)
  32776. +{
  32777. + if (new_size & ((loff_t) (inode_cluster_size(inode)) - 1))
  32778. + /*
  32779. + * cut not at logical cluster boundary,
  32780. + * size will be updated by write_hole()
  32781. + */
  32782. + return 0;
  32783. + else
  32784. + return reiser4_update_file_size(inode, new_size, update_sd);
  32785. +}
  32786. +
  32787. +static int prune_cryptcompress(struct inode *inode,
  32788. + loff_t new_size, int update_sd)
  32789. +{
  32790. + int result = 0;
  32791. + unsigned nr_zeros;
  32792. + loff_t to_prune;
  32793. + loff_t old_size;
  32794. + cloff_t from_idx;
  32795. + cloff_t to_idx;
  32796. +
  32797. + hint_t *hint;
  32798. + lock_handle *lh;
  32799. + struct reiser4_slide win;
  32800. + struct cluster_handle clust;
  32801. +
  32802. + assert("edward-1140", inode->i_size >= new_size);
  32803. + assert("edward-1141", reiser4_schedulable());
  32804. + assert("edward-1142", cryptcompress_inode_ok(inode));
  32805. + assert("edward-1143", current_blocksize == PAGE_SIZE);
  32806. +
  32807. + old_size = inode->i_size;
  32808. +
  32809. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  32810. + if (hint == NULL)
  32811. + return RETERR(-ENOMEM);
  32812. + hint_init_zero(hint);
  32813. + lh = &hint->lh;
  32814. +
  32815. + reiser4_slide_init(&win);
  32816. + cluster_init_read(&clust, &win);
  32817. + clust.hint = hint;
  32818. +
  32819. + /*
  32820. + * index of the leftmost logical cluster
  32821. + * that will be completely truncated
  32822. + */
  32823. + from_idx = size_in_lc(new_size, inode);
  32824. + to_idx = size_in_lc(inode->i_size, inode);
  32825. + /*
  32826. + * truncate all complete disk clusters starting from @from_idx
  32827. + */
  32828. + assert("edward-1174", from_idx <= to_idx);
  32829. +
  32830. + old_size = inode->i_size;
  32831. + if (from_idx != to_idx) {
  32832. + struct cryptcompress_info *info;
  32833. + info = cryptcompress_inode_data(inode);
  32834. +
  32835. + result = cut_file_items(inode,
  32836. + clust_to_off(from_idx, inode),
  32837. + update_sd,
  32838. + clust_to_off(to_idx, inode),
  32839. + update_size_actor);
  32840. + info->trunc_index = ULONG_MAX;
  32841. + if (unlikely(result == CBK_COORD_NOTFOUND))
  32842. + result = 0;
  32843. + if (unlikely(result))
  32844. + goto out;
  32845. + }
  32846. + if (off_to_cloff(new_size, inode) == 0)
  32847. + goto truncate_hole;
  32848. +
  32849. + assert("edward-1146", new_size < inode->i_size);
  32850. +
  32851. + to_prune = inode->i_size - new_size;
  32852. + /*
  32853. + * Partial truncate of the last logical cluster.
  32854. + * Partial hole will be converted to zeros. The resulted
  32855. + * logical cluster will be captured and submitted to disk
  32856. + */
  32857. + result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
  32858. + if (result)
  32859. + goto out;
  32860. +
  32861. + nr_zeros = off_to_pgoff(new_size);
  32862. + if (nr_zeros)
  32863. + nr_zeros = PAGE_SIZE - nr_zeros;
  32864. +
  32865. + set_window(&clust, &win, inode, new_size, new_size + nr_zeros);
  32866. + win.stat = HOLE_WINDOW;
  32867. +
  32868. + assert("edward-1149", clust.index == from_idx - 1);
  32869. +
  32870. + result = prepare_logical_cluster(inode, 0, 0, &clust, LC_SHRINK);
  32871. + if (result)
  32872. + goto out;
  32873. + assert("edward-1151",
  32874. + clust.dstat == PREP_DISK_CLUSTER ||
  32875. + clust.dstat == UNPR_DISK_CLUSTER ||
  32876. + clust.dstat == FAKE_DISK_CLUSTER);
  32877. + truncate_hole:
  32878. + /*
  32879. + * drop all the pages that don't have jnodes (i.e. pages
  32880. + * which can not be truncated by cut_file_items() because
  32881. + * of holes represented by fake disk clusters) including
  32882. + * the pages of partially truncated cluster which was
  32883. + * released by prepare_logical_cluster()
  32884. + */
  32885. + INODE_SET_SIZE(inode, new_size);
  32886. + truncate_inode_pages(inode->i_mapping, new_size);
  32887. + out:
  32888. + assert("edward-1497",
  32889. + pages_truncate_ok(inode, size_in_pages(new_size)));
  32890. +
  32891. + done_lh(lh);
  32892. + kfree(hint);
  32893. + put_cluster_handle(&clust);
  32894. + return result;
  32895. +}
  32896. +
  32897. +/**
  32898. + * Capture a pager cluster.
  32899. + * @clust must be set up by a caller.
  32900. + */
  32901. +static int capture_page_cluster(struct cluster_handle * clust,
  32902. + struct inode * inode)
  32903. +{
  32904. + int result;
  32905. +
  32906. + assert("edward-1073", clust != NULL);
  32907. + assert("edward-1074", inode != NULL);
  32908. + assert("edward-1075", clust->dstat == INVAL_DISK_CLUSTER);
  32909. +
  32910. + result = prepare_logical_cluster(inode, 0, 0, clust, LC_APPOV);
  32911. + if (result)
  32912. + return result;
  32913. +
  32914. + set_cluster_pages_dirty(clust, inode);
  32915. + result = checkin_logical_cluster(clust, inode);
  32916. + put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
  32917. + if (unlikely(result))
  32918. + put_page_cluster(clust, inode, WRITE_OP);
  32919. + return result;
  32920. +}
  32921. +
  32922. +/* Starting from @index find tagged pages of the same page cluster.
  32923. + * Clear the tag for each of them. Return number of found pages.
  32924. + */
  32925. +static int find_anon_page_cluster(struct address_space * mapping,
  32926. + pgoff_t * index, struct page ** pages)
  32927. +{
  32928. + int i = 0;
  32929. + int found;
  32930. + xa_lock_irq(&mapping->i_pages);
  32931. + do {
  32932. + /* looking for one page */
  32933. + found = radix_tree_gang_lookup_tag(&mapping->i_pages,
  32934. + (void **)&pages[i],
  32935. + *index, 1,
  32936. + PAGECACHE_TAG_REISER4_MOVED);
  32937. + if (!found)
  32938. + break;
  32939. + if (!same_page_cluster(pages[0], pages[i]))
  32940. + break;
  32941. +
  32942. + /* found */
  32943. + get_page(pages[i]);
  32944. + *index = pages[i]->index + 1;
  32945. +
  32946. + radix_tree_tag_clear(&mapping->i_pages,
  32947. + pages[i]->index,
  32948. + PAGECACHE_TAG_REISER4_MOVED);
  32949. + if (last_page_in_cluster(pages[i++]))
  32950. + break;
  32951. + } while (1);
  32952. + xa_unlock_irq(&mapping->i_pages);
  32953. + return i;
  32954. +}
  32955. +
  32956. +#define MAX_PAGES_TO_CAPTURE (1024)
  32957. +
  32958. +/* Capture anonymous page clusters */
  32959. +static int capture_anon_pages(struct address_space * mapping, pgoff_t * index,
  32960. + int to_capture)
  32961. +{
  32962. + int count = 0;
  32963. + int found = 0;
  32964. + int result = 0;
  32965. + hint_t *hint;
  32966. + lock_handle *lh;
  32967. + struct inode * inode;
  32968. + struct cluster_handle clust;
  32969. + struct page * pages[MAX_CLUSTER_NRPAGES];
  32970. +
  32971. + assert("edward-1127", mapping != NULL);
  32972. + assert("edward-1128", mapping->host != NULL);
  32973. + assert("edward-1440", mapping->host->i_mapping == mapping);
  32974. +
  32975. + inode = mapping->host;
  32976. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  32977. + if (hint == NULL)
  32978. + return RETERR(-ENOMEM);
  32979. + hint_init_zero(hint);
  32980. + lh = &hint->lh;
  32981. +
  32982. + cluster_init_read(&clust, NULL /* no sliding window */);
  32983. + clust.hint = hint;
  32984. +
  32985. + result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
  32986. + if (result)
  32987. + goto out;
  32988. +
  32989. + while (to_capture > 0) {
  32990. + found = find_anon_page_cluster(mapping, index, pages);
  32991. + if (!found) {
  32992. + *index = (pgoff_t) - 1;
  32993. + break;
  32994. + }
  32995. + move_cluster_forward(&clust, inode, pages[0]->index);
  32996. + result = capture_page_cluster(&clust, inode);
  32997. +
  32998. + put_found_pages(pages, found); /* find_anon_page_cluster */
  32999. + if (result)
  33000. + break;
  33001. + to_capture -= clust.nr_pages;
  33002. + count += clust.nr_pages;
  33003. + }
  33004. + if (result) {
  33005. + warning("edward-1077",
  33006. + "Capture failed (inode %llu, result=%i, captured=%d)\n",
  33007. + (unsigned long long)get_inode_oid(inode), result, count);
  33008. + } else {
  33009. + assert("edward-1078", ergo(found > 0, count > 0));
  33010. + if (to_capture <= 0)
  33011. + /* there may be left more pages */
  33012. + __mark_inode_dirty(inode, I_DIRTY_PAGES);
  33013. + result = count;
  33014. + }
  33015. + out:
  33016. + done_lh(lh);
  33017. + kfree(hint);
  33018. + put_cluster_handle(&clust);
  33019. + return result;
  33020. +}
  33021. +
  33022. +/* Returns true if inode's mapping has dirty pages
  33023. + which do not belong to any atom */
  33024. +static int cryptcompress_inode_has_anon_pages(struct inode *inode)
  33025. +{
  33026. + int result;
  33027. + xa_lock_irq(&inode->i_mapping->i_pages);
  33028. + result = radix_tree_tagged(&inode->i_mapping->i_pages,
  33029. + PAGECACHE_TAG_REISER4_MOVED);
  33030. + xa_unlock_irq(&inode->i_mapping->i_pages);
  33031. + return result;
  33032. +}
  33033. +
  33034. +/* plugin->writepages */
  33035. +int writepages_cryptcompress(struct address_space *mapping,
  33036. + struct writeback_control *wbc)
  33037. +{
  33038. + int result = 0;
  33039. + long to_capture;
  33040. + pgoff_t nrpages;
  33041. + pgoff_t index = 0;
  33042. + struct inode *inode;
  33043. + struct cryptcompress_info *info;
  33044. +
  33045. + inode = mapping->host;
  33046. + if (!cryptcompress_inode_has_anon_pages(inode))
  33047. + goto end;
  33048. + info = cryptcompress_inode_data(inode);
  33049. + nrpages = size_in_pages(i_size_read(inode));
  33050. +
  33051. + if (wbc->sync_mode != WB_SYNC_ALL)
  33052. + to_capture = min(wbc->nr_to_write, (long)MAX_PAGES_TO_CAPTURE);
  33053. + else
  33054. + to_capture = MAX_PAGES_TO_CAPTURE;
  33055. + do {
  33056. + reiser4_context *ctx;
  33057. +
  33058. + ctx = reiser4_init_context(inode->i_sb);
  33059. + if (IS_ERR(ctx)) {
  33060. + result = PTR_ERR(ctx);
  33061. + break;
  33062. + }
  33063. + /* avoid recursive calls to ->sync_inodes */
  33064. + ctx->nobalance = 1;
  33065. +
  33066. + assert("edward-1079",
  33067. + lock_stack_isclean(get_current_lock_stack()));
  33068. +
  33069. + reiser4_txn_restart_current();
  33070. +
  33071. + if (get_current_context()->entd) {
  33072. + if (mutex_trylock(&info->checkin_mutex) == 0) {
  33073. + /* the mutex might be occupied by
  33074. + entd caller */
  33075. + result = RETERR(-EBUSY);
  33076. + reiser4_exit_context(ctx);
  33077. + break;
  33078. + }
  33079. + } else
  33080. + mutex_lock(&info->checkin_mutex);
  33081. +
  33082. + result = capture_anon_pages(inode->i_mapping, &index,
  33083. + to_capture);
  33084. + mutex_unlock(&info->checkin_mutex);
  33085. +
  33086. + if (result < 0) {
  33087. + reiser4_exit_context(ctx);
  33088. + break;
  33089. + }
  33090. + wbc->nr_to_write -= result;
  33091. + if (wbc->sync_mode != WB_SYNC_ALL) {
  33092. + reiser4_exit_context(ctx);
  33093. + break;
  33094. + }
  33095. + result = txnmgr_force_commit_all(inode->i_sb, 0);
  33096. + reiser4_exit_context(ctx);
  33097. + } while (result >= 0 && index < nrpages);
  33098. +
  33099. + end:
  33100. + if (is_in_reiser4_context()) {
  33101. + if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
  33102. + /* there are already pages to flush, flush them out,
  33103. + do not delay until end of reiser4_sync_inodes */
  33104. + reiser4_writeout(inode->i_sb, wbc);
  33105. + get_current_context()->nr_captured = 0;
  33106. + }
  33107. + }
  33108. + return result;
  33109. +}
  33110. +
  33111. +/* plugin->ioctl */
  33112. +int ioctl_cryptcompress(struct file *filp, unsigned int cmd,
  33113. + unsigned long arg)
  33114. +{
  33115. + return RETERR(-ENOTTY);
  33116. +}
  33117. +
  33118. +/* plugin->mmap */
  33119. +int mmap_cryptcompress(struct file *file, struct vm_area_struct *vma)
  33120. +{
  33121. + int result;
  33122. + struct inode *inode;
  33123. + reiser4_context *ctx;
  33124. +
  33125. + inode = file_inode(file);
  33126. + ctx = reiser4_init_context(inode->i_sb);
  33127. + if (IS_ERR(ctx))
  33128. + return PTR_ERR(ctx);
  33129. + /*
  33130. + * generic_file_mmap will do update_atime. Grab space for stat data
  33131. + * update.
  33132. + */
  33133. + result = reiser4_grab_space_force
  33134. + (inode_file_plugin(inode)->estimate.update(inode),
  33135. + BA_CAN_COMMIT);
  33136. + if (result) {
  33137. + reiser4_exit_context(ctx);
  33138. + return result;
  33139. + }
  33140. + result = generic_file_mmap(file, vma);
  33141. + reiser4_exit_context(ctx);
  33142. + return result;
  33143. +}
  33144. +
  33145. +/* plugin->delete_object */
  33146. +int delete_object_cryptcompress(struct inode *inode)
  33147. +{
  33148. + int result;
  33149. + struct cryptcompress_info * info;
  33150. +
  33151. + assert("edward-429", inode->i_nlink == 0);
  33152. +
  33153. + reiser4_txn_restart_current();
  33154. + info = cryptcompress_inode_data(inode);
  33155. +
  33156. + mutex_lock(&info->checkin_mutex);
  33157. + result = prune_cryptcompress(inode, 0, 0);
  33158. + mutex_unlock(&info->checkin_mutex);
  33159. +
  33160. + if (result) {
  33161. + warning("edward-430",
  33162. + "cannot truncate cryptcompress file %lli: %i",
  33163. + (unsigned long long)get_inode_oid(inode),
  33164. + result);
  33165. + }
  33166. + /* and remove stat data */
  33167. + return reiser4_delete_object_common(inode);
  33168. +}
  33169. +
  33170. +/*
  33171. + * plugin->setattr
  33172. + * This implements actual truncate (see comments in reiser4/page_cache.c)
  33173. + */
  33174. +int setattr_cryptcompress(struct dentry *dentry, struct iattr *attr)
  33175. +{
  33176. + int result;
  33177. + struct inode *inode;
  33178. + struct cryptcompress_info * info;
  33179. +
  33180. + inode = dentry->d_inode;
  33181. + info = cryptcompress_inode_data(inode);
  33182. +
  33183. + if (attr->ia_valid & ATTR_SIZE) {
  33184. + if (i_size_read(inode) != attr->ia_size) {
  33185. + reiser4_context *ctx;
  33186. + loff_t old_size;
  33187. +
  33188. + ctx = reiser4_init_context(dentry->d_inode->i_sb);
  33189. + if (IS_ERR(ctx))
  33190. + return PTR_ERR(ctx);
  33191. + result = setattr_dispatch_hook(inode);
  33192. + if (result) {
  33193. + context_set_commit_async(ctx);
  33194. + reiser4_exit_context(ctx);
  33195. + return result;
  33196. + }
  33197. + old_size = i_size_read(inode);
  33198. + inode_check_scale(inode, old_size, attr->ia_size);
  33199. +
  33200. + mutex_lock(&info->checkin_mutex);
  33201. + if (attr->ia_size > inode->i_size)
  33202. + result = expand_cryptcompress(inode,
  33203. + attr->ia_size);
  33204. + else
  33205. + result = prune_cryptcompress(inode,
  33206. + attr->ia_size,
  33207. + 1/* update sd */);
  33208. + mutex_unlock(&info->checkin_mutex);
  33209. + if (result) {
  33210. + warning("edward-1192",
  33211. + "truncate_cryptcompress failed: oid %lli, "
  33212. + "old size %lld, new size %lld, retval %d",
  33213. + (unsigned long long)
  33214. + get_inode_oid(inode), old_size,
  33215. + attr->ia_size, result);
  33216. + }
  33217. + context_set_commit_async(ctx);
  33218. + reiser4_exit_context(ctx);
  33219. + } else
  33220. + result = 0;
  33221. + } else
  33222. + result = reiser4_setattr_common(&init_user_ns, dentry, attr);
  33223. + return result;
  33224. +}
  33225. +
  33226. +/* plugin->release */
  33227. +int release_cryptcompress(struct inode *inode, struct file *file)
  33228. +{
  33229. + reiser4_context *ctx = reiser4_init_context(inode->i_sb);
  33230. +
  33231. + if (IS_ERR(ctx))
  33232. + return PTR_ERR(ctx);
  33233. + reiser4_free_file_fsdata(file);
  33234. + reiser4_exit_context(ctx);
  33235. + return 0;
  33236. +}
  33237. +
  33238. +/* plugin->write_begin() */
  33239. +int write_begin_cryptcompress(struct file *file, struct page *page,
  33240. + loff_t pos, unsigned len, void **fsdata)
  33241. +{
  33242. + int ret = -ENOMEM;
  33243. + char *buf;
  33244. + hint_t *hint;
  33245. + struct inode *inode;
  33246. + struct reiser4_slide *win;
  33247. + struct cluster_handle *clust;
  33248. + struct cryptcompress_info *info;
  33249. + reiser4_context *ctx;
  33250. +
  33251. + ctx = get_current_context();
  33252. + inode = page->mapping->host;
  33253. + info = cryptcompress_inode_data(inode);
  33254. +
  33255. + assert("edward-1564", PageLocked(page));
  33256. + buf = kmalloc(sizeof(*clust) +
  33257. + sizeof(*win) +
  33258. + sizeof(*hint),
  33259. + reiser4_ctx_gfp_mask_get());
  33260. + if (!buf)
  33261. + goto err2;
  33262. + clust = (struct cluster_handle *)buf;
  33263. + win = (struct reiser4_slide *)(buf + sizeof(*clust));
  33264. + hint = (hint_t *)(buf + sizeof(*clust) + sizeof(*win));
  33265. +
  33266. + hint_init_zero(hint);
  33267. + cluster_init_read(clust, NULL);
  33268. + clust->hint = hint;
  33269. +
  33270. + mutex_lock(&info->checkin_mutex);
  33271. +
  33272. + ret = set_window_and_cluster(inode, clust, win, len, pos);
  33273. + if (ret)
  33274. + goto err1;
  33275. + unlock_page(page);
  33276. + ret = prepare_logical_cluster(inode, pos, len, clust, LC_APPOV);
  33277. + done_lh(&hint->lh);
  33278. + assert("edward-1565", lock_stack_isclean(get_current_lock_stack()));
  33279. + lock_page(page);
  33280. + if (ret) {
  33281. + SetPageError(page);
  33282. + ClearPageUptodate(page);
  33283. + unlock_page(page);
  33284. + goto err0;
  33285. + }
  33286. + /*
  33287. + * Success. All resources (including checkin_mutex)
  33288. + * will be released in ->write_end()
  33289. + */
  33290. + ctx->locked_page = page;
  33291. + *fsdata = (void *)buf;
  33292. +
  33293. + return 0;
  33294. + err0:
  33295. + put_cluster_handle(clust);
  33296. + err1:
  33297. + mutex_unlock(&info->checkin_mutex);
  33298. + kfree(buf);
  33299. + err2:
  33300. + assert("edward-1568", !ret);
  33301. + return ret;
  33302. +}
  33303. +
  33304. +/* plugin->write_end() */
  33305. +int write_end_cryptcompress(struct file *file, struct page *page,
  33306. + loff_t pos, unsigned copied, void *fsdata)
  33307. +{
  33308. + int ret;
  33309. + hint_t *hint;
  33310. + struct inode *inode;
  33311. + struct cluster_handle *clust;
  33312. + struct cryptcompress_info *info;
  33313. + reiser4_context *ctx;
  33314. +
  33315. + assert("edward-1566",
  33316. + lock_stack_isclean(get_current_lock_stack()));
  33317. + ctx = get_current_context();
  33318. + inode = page->mapping->host;
  33319. + info = cryptcompress_inode_data(inode);
  33320. + clust = (struct cluster_handle *)fsdata;
  33321. + hint = clust->hint;
  33322. +
  33323. + unlock_page(page);
  33324. + ctx->locked_page = NULL;
  33325. + set_cluster_pages_dirty(clust, inode);
  33326. + ret = checkin_logical_cluster(clust, inode);
  33327. + if (ret) {
  33328. + SetPageError(page);
  33329. + goto exit;
  33330. + }
  33331. + exit:
  33332. + mutex_unlock(&info->checkin_mutex);
  33333. +
  33334. + put_cluster_handle(clust);
  33335. +
  33336. + if (pos + copied > inode->i_size) {
  33337. + /*
  33338. + * i_size has been updated in
  33339. + * checkin_logical_cluster
  33340. + */
  33341. + ret = reiser4_update_sd(inode);
  33342. + if (unlikely(ret != 0))
  33343. + warning("edward-1603",
  33344. + "Can not update stat-data: %i. FSCK?",
  33345. + ret);
  33346. + }
  33347. + kfree(fsdata);
  33348. + return ret;
  33349. +}
  33350. +
  33351. +/* plugin->bmap */
  33352. +sector_t bmap_cryptcompress(struct address_space *mapping, sector_t lblock)
  33353. +{
  33354. + return -EINVAL;
  33355. +}
  33356. +
  33357. +/*
  33358. + Local variables:
  33359. + c-indentation-style: "K&R"
  33360. + mode-name: "LC"
  33361. + c-basic-offset: 8
  33362. + tab-width: 8
  33363. + fill-column: 80
  33364. + scroll-step: 1
  33365. + End:
  33366. +*/
  33367. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/cryptcompress.h linux-5.16.14/fs/reiser4/plugin/file/cryptcompress.h
  33368. --- linux-5.16.14.orig/fs/reiser4/plugin/file/cryptcompress.h 1970-01-01 01:00:00.000000000 +0100
  33369. +++ linux-5.16.14/fs/reiser4/plugin/file/cryptcompress.h 2022-03-12 13:26:19.669892770 +0100
  33370. @@ -0,0 +1,620 @@
  33371. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  33372. +/* See http://www.namesys.com/cryptcompress_design.html */
  33373. +
  33374. +#if !defined( __FS_REISER4_CRYPTCOMPRESS_H__ )
  33375. +#define __FS_REISER4_CRYPTCOMPRESS_H__
  33376. +
  33377. +#include "../../page_cache.h"
  33378. +#include "../compress/compress.h"
  33379. +#include "../crypto/cipher.h"
  33380. +
  33381. +#include <linux/pagemap.h>
  33382. +
  33383. +#define MIN_CLUSTER_SHIFT PAGE_SHIFT
  33384. +#define MAX_CLUSTER_SHIFT 16
  33385. +#define MAX_CLUSTER_NRPAGES (1U << MAX_CLUSTER_SHIFT >> PAGE_SHIFT)
  33386. +#define DC_CHECKSUM_SIZE 4
  33387. +
  33388. +#define MIN_LATTICE_FACTOR 1
  33389. +#define MAX_LATTICE_FACTOR 32
  33390. +
  33391. +#define REISER4_CRYPTO 0
  33392. +
  33393. +/* this mask contains all non-standard plugins that might
  33394. + be present in reiser4-specific part of inode managed by
  33395. + cryptcompress file plugin */
  33396. +#define cryptcompress_mask \
  33397. + ((1 << PSET_FILE) | \
  33398. + (1 << PSET_CLUSTER) | \
  33399. + (1 << PSET_CIPHER) | \
  33400. + (1 << PSET_DIGEST) | \
  33401. + (1 << PSET_COMPRESSION) | \
  33402. + (1 << PSET_COMPRESSION_MODE))
  33403. +
  33404. +#if REISER4_DEBUG
  33405. +static inline int cluster_shift_ok(int shift)
  33406. +{
  33407. + return (shift >= MIN_CLUSTER_SHIFT) && (shift <= MAX_CLUSTER_SHIFT);
  33408. +}
  33409. +#endif
  33410. +
  33411. +#if REISER4_DEBUG
  33412. +#define INODE_PGCOUNT(inode) \
  33413. +({ \
  33414. + assert("edward-1530", inode_file_plugin(inode) == \
  33415. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); \
  33416. + atomic_read(&cryptcompress_inode_data(inode)->pgcount); \
  33417. + })
  33418. +#define INODE_PGCOUNT_INC(inode) \
  33419. +do { \
  33420. + assert("edward-1531", inode_file_plugin(inode) == \
  33421. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); \
  33422. + atomic_inc(&cryptcompress_inode_data(inode)->pgcount); \
  33423. +} while (0)
  33424. +#define INODE_PGCOUNT_DEC(inode) \
  33425. +do { \
  33426. + if (inode_file_plugin(inode) == \
  33427. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)) \
  33428. + atomic_dec(&cryptcompress_inode_data(inode)->pgcount); \
  33429. +} while (0)
  33430. +#else
  33431. +#define INODE_PGCOUNT(inode) (0)
  33432. +#define INODE_PGCOUNT_INC(inode)
  33433. +#define INODE_PGCOUNT_DEC(inode)
  33434. +#endif /* REISER4_DEBUG */
  33435. +
  33436. +struct tfm_stream {
  33437. + __u8 *data;
  33438. + size_t size;
  33439. +};
  33440. +
  33441. +typedef enum {
  33442. + INPUT_STREAM,
  33443. + OUTPUT_STREAM,
  33444. + LAST_STREAM
  33445. +} tfm_stream_id;
  33446. +
  33447. +typedef struct tfm_stream * tfm_unit[LAST_STREAM];
  33448. +
  33449. +static inline __u8 *ts_data(struct tfm_stream * stm)
  33450. +{
  33451. + assert("edward-928", stm != NULL);
  33452. + return stm->data;
  33453. +}
  33454. +
  33455. +static inline size_t ts_size(struct tfm_stream * stm)
  33456. +{
  33457. + assert("edward-929", stm != NULL);
  33458. + return stm->size;
  33459. +}
  33460. +
  33461. +static inline void set_ts_size(struct tfm_stream * stm, size_t size)
  33462. +{
  33463. + assert("edward-930", stm != NULL);
  33464. +
  33465. + stm->size = size;
  33466. +}
  33467. +
  33468. +static inline int alloc_ts(struct tfm_stream ** stm)
  33469. +{
  33470. + assert("edward-931", stm);
  33471. + assert("edward-932", *stm == NULL);
  33472. +
  33473. + *stm = kzalloc(sizeof(**stm), reiser4_ctx_gfp_mask_get());
  33474. + if (!*stm)
  33475. + return -ENOMEM;
  33476. + return 0;
  33477. +}
  33478. +
  33479. +static inline void free_ts(struct tfm_stream * stm)
  33480. +{
  33481. + assert("edward-933", !ts_data(stm));
  33482. + assert("edward-934", !ts_size(stm));
  33483. +
  33484. + kfree(stm);
  33485. +}
  33486. +
  33487. +static inline int alloc_ts_data(struct tfm_stream * stm, size_t size)
  33488. +{
  33489. + assert("edward-935", !ts_data(stm));
  33490. + assert("edward-936", !ts_size(stm));
  33491. + assert("edward-937", size != 0);
  33492. +
  33493. + stm->data = reiser4_vmalloc(size);
  33494. + if (!stm->data)
  33495. + return -ENOMEM;
  33496. + set_ts_size(stm, size);
  33497. + return 0;
  33498. +}
  33499. +
  33500. +static inline void free_ts_data(struct tfm_stream * stm)
  33501. +{
  33502. + assert("edward-938", equi(ts_data(stm), ts_size(stm)));
  33503. +
  33504. + if (ts_data(stm))
  33505. + vfree(ts_data(stm));
  33506. + memset(stm, 0, sizeof *stm);
  33507. +}
  33508. +
  33509. +/* Write modes for item conversion in flush convert phase */
  33510. +typedef enum {
  33511. + CTAIL_INVAL_CONVERT_MODE = 0,
  33512. + CTAIL_APPEND_ITEM = 1,
  33513. + CTAIL_OVERWRITE_ITEM = 2,
  33514. + CTAIL_CUT_ITEM = 3
  33515. +} ctail_convert_mode_t;
  33516. +
  33517. +typedef enum {
  33518. + LC_INVAL = 0, /* invalid value */
  33519. + LC_APPOV = 1, /* append and/or overwrite */
  33520. + LC_EXPAND = 2, /* expanding truncate */
  33521. + LC_SHRINK = 3 /* shrinking truncate */
  33522. +} logical_cluster_op;
  33523. +
  33524. +/* Transform cluster.
  33525. + * Intermediate state between page cluster and disk cluster
  33526. + * Is used for data transform (compression/encryption)
  33527. + */
  33528. +struct tfm_cluster {
  33529. + coa_set coa; /* compression algorithms info */
  33530. + tfm_unit tun; /* plain and transformed streams */
  33531. + tfm_action act;
  33532. + int uptodate;
  33533. + int lsize; /* number of bytes in logical cluster */
  33534. + int len; /* length of the transform stream */
  33535. + unsigned int hole:1; /* should punch hole */
  33536. +};
  33537. +
  33538. +static inline coa_t get_coa(struct tfm_cluster * tc, reiser4_compression_id id,
  33539. + tfm_action act)
  33540. +{
  33541. + return tc->coa[id][act];
  33542. +}
  33543. +
  33544. +static inline void set_coa(struct tfm_cluster * tc, reiser4_compression_id id,
  33545. + tfm_action act, coa_t coa)
  33546. +{
  33547. + tc->coa[id][act] = coa;
  33548. +}
  33549. +
  33550. +static inline int alloc_coa(struct tfm_cluster * tc, compression_plugin * cplug)
  33551. +{
  33552. + coa_t coa;
  33553. +
  33554. + coa = cplug->alloc(tc->act);
  33555. + if (IS_ERR(coa))
  33556. + return PTR_ERR(coa);
  33557. + set_coa(tc, cplug->h.id, tc->act, coa);
  33558. + return 0;
  33559. +}
  33560. +
  33561. +static inline int
  33562. +grab_coa(struct tfm_cluster * tc, compression_plugin * cplug)
  33563. +{
  33564. + return (cplug->alloc && !get_coa(tc, cplug->h.id, tc->act) ?
  33565. + alloc_coa(tc, cplug) : 0);
  33566. +}
  33567. +
  33568. +static inline void free_coa_set(struct tfm_cluster * tc)
  33569. +{
  33570. + tfm_action j;
  33571. + reiser4_compression_id i;
  33572. + compression_plugin *cplug;
  33573. +
  33574. + assert("edward-810", tc != NULL);
  33575. +
  33576. + for (j = 0; j < TFMA_LAST; j++)
  33577. + for (i = 0; i < LAST_COMPRESSION_ID; i++) {
  33578. + if (!get_coa(tc, i, j))
  33579. + continue;
  33580. + cplug = compression_plugin_by_id(i);
  33581. + assert("edward-812", cplug->free != NULL);
  33582. + cplug->free(get_coa(tc, i, j), j);
  33583. + set_coa(tc, i, j, 0);
  33584. + }
  33585. + return;
  33586. +}
  33587. +
  33588. +static inline struct tfm_stream * get_tfm_stream(struct tfm_cluster * tc,
  33589. + tfm_stream_id id)
  33590. +{
  33591. + return tc->tun[id];
  33592. +}
  33593. +
  33594. +static inline void set_tfm_stream(struct tfm_cluster * tc,
  33595. + tfm_stream_id id, struct tfm_stream * ts)
  33596. +{
  33597. + tc->tun[id] = ts;
  33598. +}
  33599. +
  33600. +static inline __u8 *tfm_stream_data(struct tfm_cluster * tc, tfm_stream_id id)
  33601. +{
  33602. + return ts_data(get_tfm_stream(tc, id));
  33603. +}
  33604. +
  33605. +static inline void set_tfm_stream_data(struct tfm_cluster * tc,
  33606. + tfm_stream_id id, __u8 * data)
  33607. +{
  33608. + get_tfm_stream(tc, id)->data = data;
  33609. +}
  33610. +
  33611. +static inline size_t tfm_stream_size(struct tfm_cluster * tc, tfm_stream_id id)
  33612. +{
  33613. + return ts_size(get_tfm_stream(tc, id));
  33614. +}
  33615. +
  33616. +static inline void
  33617. +set_tfm_stream_size(struct tfm_cluster * tc, tfm_stream_id id, size_t size)
  33618. +{
  33619. + get_tfm_stream(tc, id)->size = size;
  33620. +}
  33621. +
  33622. +static inline int
  33623. +alloc_tfm_stream(struct tfm_cluster * tc, size_t size, tfm_stream_id id)
  33624. +{
  33625. + assert("edward-939", tc != NULL);
  33626. + assert("edward-940", !get_tfm_stream(tc, id));
  33627. +
  33628. + tc->tun[id] = kzalloc(sizeof(struct tfm_stream),
  33629. + reiser4_ctx_gfp_mask_get());
  33630. + if (!tc->tun[id])
  33631. + return -ENOMEM;
  33632. + return alloc_ts_data(get_tfm_stream(tc, id), size);
  33633. +}
  33634. +
  33635. +static inline int
  33636. +realloc_tfm_stream(struct tfm_cluster * tc, size_t size, tfm_stream_id id)
  33637. +{
  33638. + assert("edward-941", tfm_stream_size(tc, id) < size);
  33639. + free_ts_data(get_tfm_stream(tc, id));
  33640. + return alloc_ts_data(get_tfm_stream(tc, id), size);
  33641. +}
  33642. +
  33643. +static inline void free_tfm_stream(struct tfm_cluster * tc, tfm_stream_id id)
  33644. +{
  33645. + free_ts_data(get_tfm_stream(tc, id));
  33646. + free_ts(get_tfm_stream(tc, id));
  33647. + set_tfm_stream(tc, id, 0);
  33648. +}
  33649. +
  33650. +static inline unsigned coa_overrun(compression_plugin * cplug, int ilen)
  33651. +{
  33652. + return (cplug->overrun != NULL ? cplug->overrun(ilen) : 0);
  33653. +}
  33654. +
  33655. +static inline void free_tfm_unit(struct tfm_cluster * tc)
  33656. +{
  33657. + tfm_stream_id id;
  33658. + for (id = 0; id < LAST_STREAM; id++) {
  33659. + if (!get_tfm_stream(tc, id))
  33660. + continue;
  33661. + free_tfm_stream(tc, id);
  33662. + }
  33663. +}
  33664. +
  33665. +static inline void put_tfm_cluster(struct tfm_cluster * tc)
  33666. +{
  33667. + assert("edward-942", tc != NULL);
  33668. + free_coa_set(tc);
  33669. + free_tfm_unit(tc);
  33670. +}
  33671. +
  33672. +static inline int tfm_cluster_is_uptodate(struct tfm_cluster * tc)
  33673. +{
  33674. + assert("edward-943", tc != NULL);
  33675. + assert("edward-944", tc->uptodate == 0 || tc->uptodate == 1);
  33676. + return (tc->uptodate == 1);
  33677. +}
  33678. +
  33679. +static inline void tfm_cluster_set_uptodate(struct tfm_cluster * tc)
  33680. +{
  33681. + assert("edward-945", tc != NULL);
  33682. + assert("edward-946", tc->uptodate == 0 || tc->uptodate == 1);
  33683. + tc->uptodate = 1;
  33684. + return;
  33685. +}
  33686. +
  33687. +static inline void tfm_cluster_clr_uptodate(struct tfm_cluster * tc)
  33688. +{
  33689. + assert("edward-947", tc != NULL);
  33690. + assert("edward-948", tc->uptodate == 0 || tc->uptodate == 1);
  33691. + tc->uptodate = 0;
  33692. + return;
  33693. +}
  33694. +
  33695. +static inline int tfm_stream_is_set(struct tfm_cluster * tc, tfm_stream_id id)
  33696. +{
  33697. + return (get_tfm_stream(tc, id) &&
  33698. + tfm_stream_data(tc, id) && tfm_stream_size(tc, id));
  33699. +}
  33700. +
  33701. +static inline int tfm_cluster_is_set(struct tfm_cluster * tc)
  33702. +{
  33703. + int i;
  33704. + for (i = 0; i < LAST_STREAM; i++)
  33705. + if (!tfm_stream_is_set(tc, i))
  33706. + return 0;
  33707. + return 1;
  33708. +}
  33709. +
  33710. +static inline void alternate_streams(struct tfm_cluster * tc)
  33711. +{
  33712. + struct tfm_stream *tmp = get_tfm_stream(tc, INPUT_STREAM);
  33713. +
  33714. + set_tfm_stream(tc, INPUT_STREAM, get_tfm_stream(tc, OUTPUT_STREAM));
  33715. + set_tfm_stream(tc, OUTPUT_STREAM, tmp);
  33716. +}
  33717. +
  33718. +/* Set of states to indicate a kind of data
  33719. + * that will be written to the window */
  33720. +typedef enum {
  33721. + DATA_WINDOW, /* user's data */
  33722. + HOLE_WINDOW /* zeroes (such kind of data can be written
  33723. + * if we start to write from offset > i_size) */
  33724. +} window_stat;
  33725. +
  33726. +/* Window (of logical cluster size) discretely sliding along a file.
  33727. + * Is used to locate hole region in a logical cluster to be properly
  33728. + * represented on disk.
  33729. + * We split a write to cryptcompress file into writes to its logical
  33730. + * clusters. Before writing to a logical cluster we set a window, i.e.
  33731. + * calculate values of the following fields:
  33732. + */
  33733. +struct reiser4_slide {
  33734. + unsigned off; /* offset to write from */
  33735. + unsigned count; /* number of bytes to write */
  33736. + unsigned delta; /* number of bytes to append to the hole */
  33737. + window_stat stat; /* what kind of data will be written starting
  33738. + from @off */
  33739. +};
  33740. +
  33741. +/* Possible states of a disk cluster */
  33742. +typedef enum {
  33743. + INVAL_DISK_CLUSTER, /* unknown state */
  33744. + PREP_DISK_CLUSTER, /* disk cluster got converted by flush
  33745. + * at least 1 time */
  33746. + UNPR_DISK_CLUSTER, /* disk cluster just created and should be
  33747. + * converted by flush */
  33748. + FAKE_DISK_CLUSTER, /* disk cluster doesn't exist neither in memory
  33749. + * nor on disk */
  33750. + TRNC_DISK_CLUSTER /* disk cluster is partially truncated */
  33751. +} disk_cluster_stat;
  33752. +
  33753. +/* The following structure represents various stages of the same logical
  33754. + * cluster of index @index:
  33755. + * . fixed slide
  33756. + * . page cluster (stage in primary cache)
  33757. + * . transform cluster (transition stage)
  33758. + * . disk cluster (stage in secondary cache)
  33759. + * This structure is used in transition and synchronizing operations, e.g.
  33760. + * transform cluster is a transition state when synchronizing page cluster
  33761. + * and disk cluster.
  33762. + * FIXME: Encapsulate page cluster, disk cluster.
  33763. + */
  33764. +struct cluster_handle {
  33765. + cloff_t index; /* offset in a file (unit is a cluster size) */
  33766. + int index_valid; /* for validating the index above, if needed */
  33767. + struct file *file; /* host file */
  33768. +
  33769. + /* logical cluster */
  33770. + struct reiser4_slide *win; /* sliding window to locate holes */
  33771. + logical_cluster_op op; /* logical cluster operation (truncate or
  33772. + append/overwrite) */
  33773. + /* transform cluster */
  33774. + struct tfm_cluster tc; /* contains all needed info to synchronize
  33775. + page cluster and disk cluster) */
  33776. + /* page cluster */
  33777. + int nr_pages; /* number of pages of current checkin action */
  33778. + int old_nrpages; /* number of pages of last checkin action */
  33779. + struct page **pages; /* attached pages */
  33780. + jnode * node; /* jnode for capture */
  33781. +
  33782. + /* disk cluster */
  33783. + hint_t *hint; /* current position in the tree */
  33784. + disk_cluster_stat dstat; /* state of the current disk cluster */
  33785. + int reserved; /* is space for disk cluster reserved */
  33786. +#if REISER4_DEBUG
  33787. + reiser4_context *ctx;
  33788. + int reserved_prepped;
  33789. + int reserved_unprepped;
  33790. +#endif
  33791. +
  33792. +};
  33793. +
  33794. +static inline __u8 * tfm_input_data (struct cluster_handle * clust)
  33795. +{
  33796. + return tfm_stream_data(&clust->tc, INPUT_STREAM);
  33797. +}
  33798. +
  33799. +static inline __u8 * tfm_output_data (struct cluster_handle * clust)
  33800. +{
  33801. + return tfm_stream_data(&clust->tc, OUTPUT_STREAM);
  33802. +}
  33803. +
  33804. +static inline int reset_cluster_pgset(struct cluster_handle * clust,
  33805. + int nrpages)
  33806. +{
  33807. + assert("edward-1057", clust->pages != NULL);
  33808. + memset(clust->pages, 0, sizeof(*clust->pages) * nrpages);
  33809. + return 0;
  33810. +}
  33811. +
  33812. +static inline int alloc_cluster_pgset(struct cluster_handle * clust,
  33813. + int nrpages)
  33814. +{
  33815. + assert("edward-949", clust != NULL);
  33816. + assert("edward-1362", clust->pages == NULL);
  33817. + assert("edward-950", nrpages != 0 && nrpages <= MAX_CLUSTER_NRPAGES);
  33818. +
  33819. + clust->pages = kzalloc(sizeof(*clust->pages) * nrpages,
  33820. + reiser4_ctx_gfp_mask_get());
  33821. + if (!clust->pages)
  33822. + return RETERR(-ENOMEM);
  33823. + return 0;
  33824. +}
  33825. +
  33826. +static inline void move_cluster_pgset(struct cluster_handle *clust,
  33827. + struct page ***pages, int * nr_pages)
  33828. +{
  33829. + assert("edward-1545", clust != NULL && clust->pages != NULL);
  33830. + assert("edward-1546", pages != NULL && *pages == NULL);
  33831. + *pages = clust->pages;
  33832. + *nr_pages = clust->nr_pages;
  33833. + clust->pages = NULL;
  33834. +}
  33835. +
  33836. +static inline void free_cluster_pgset(struct cluster_handle * clust)
  33837. +{
  33838. + assert("edward-951", clust->pages != NULL);
  33839. + kfree(clust->pages);
  33840. + clust->pages = NULL;
  33841. +}
  33842. +
  33843. +static inline void put_cluster_handle(struct cluster_handle * clust)
  33844. +{
  33845. + assert("edward-435", clust != NULL);
  33846. +
  33847. + put_tfm_cluster(&clust->tc);
  33848. + if (clust->pages)
  33849. + free_cluster_pgset(clust);
  33850. + memset(clust, 0, sizeof *clust);
  33851. +}
  33852. +
  33853. +static inline void inc_keyload_count(struct reiser4_crypto_info * data)
  33854. +{
  33855. + assert("edward-1410", data != NULL);
  33856. + data->keyload_count++;
  33857. +}
  33858. +
  33859. +static inline void dec_keyload_count(struct reiser4_crypto_info * data)
  33860. +{
  33861. + assert("edward-1411", data != NULL);
  33862. + assert("edward-1412", data->keyload_count > 0);
  33863. + data->keyload_count--;
  33864. +}
  33865. +
  33866. +static inline int capture_cluster_jnode(jnode * node)
  33867. +{
  33868. + return reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  33869. +}
  33870. +
  33871. +/* cryptcompress specific part of reiser4_inode */
  33872. +struct cryptcompress_info {
  33873. + struct mutex checkin_mutex; /* This is to serialize
  33874. + * checkin_logical_cluster operations */
  33875. + cloff_t trunc_index; /* Index of the leftmost truncated disk
  33876. + * cluster (to resolve races with read) */
  33877. + struct reiser4_crypto_info *crypt;
  33878. + /*
  33879. + * the following 2 fields are controlled by compression mode plugin
  33880. + */
  33881. + int compress_toggle; /* Current status of compressibility */
  33882. + int lattice_factor; /* Factor of dynamic lattice. FIXME: Have
  33883. + * a compression_toggle to keep the factor
  33884. + */
  33885. +#if REISER4_DEBUG
  33886. + atomic_t pgcount; /* number of grabbed pages */
  33887. +#endif
  33888. +};
  33889. +
  33890. +static inline void set_compression_toggle (struct cryptcompress_info * info, int val)
  33891. +{
  33892. + info->compress_toggle = val;
  33893. +}
  33894. +
  33895. +static inline int get_compression_toggle (struct cryptcompress_info * info)
  33896. +{
  33897. + return info->compress_toggle;
  33898. +}
  33899. +
  33900. +static inline int compression_is_on(struct cryptcompress_info * info)
  33901. +{
  33902. + return get_compression_toggle(info) == 1;
  33903. +}
  33904. +
  33905. +static inline void turn_on_compression(struct cryptcompress_info * info)
  33906. +{
  33907. + set_compression_toggle(info, 1);
  33908. +}
  33909. +
  33910. +static inline void turn_off_compression(struct cryptcompress_info * info)
  33911. +{
  33912. + set_compression_toggle(info, 0);
  33913. +}
  33914. +
  33915. +static inline void set_lattice_factor(struct cryptcompress_info * info, int val)
  33916. +{
  33917. + info->lattice_factor = val;
  33918. +}
  33919. +
  33920. +static inline int get_lattice_factor(struct cryptcompress_info * info)
  33921. +{
  33922. + return info->lattice_factor;
  33923. +}
  33924. +
  33925. +struct cryptcompress_info *cryptcompress_inode_data(const struct inode *);
  33926. +int equal_to_rdk(znode *, const reiser4_key *);
  33927. +int goto_right_neighbor(coord_t *, lock_handle *);
  33928. +int cryptcompress_inode_ok(struct inode *inode);
  33929. +int coord_is_unprepped_ctail(const coord_t * coord);
  33930. +extern int do_readpage_ctail(struct inode *, struct cluster_handle *,
  33931. + struct page * page, znode_lock_mode mode);
  33932. +extern int ctail_insert_unprepped_cluster(struct cluster_handle * clust,
  33933. + struct inode * inode);
  33934. +extern int readpages_cryptcompress(struct file*, struct address_space*,
  33935. + struct list_head*, unsigned);
  33936. +void destroy_inode_cryptcompress(struct inode * inode);
  33937. +int grab_page_cluster(struct inode *inode, struct cluster_handle * clust,
  33938. + rw_op rw);
  33939. +int write_dispatch_hook(struct file *file, struct inode * inode,
  33940. + loff_t pos, struct cluster_handle * clust,
  33941. + struct dispatch_context * cont);
  33942. +int setattr_dispatch_hook(struct inode * inode);
  33943. +struct reiser4_crypto_info * inode_crypto_info(struct inode * inode);
  33944. +void inherit_crypto_info_common(struct inode * parent, struct inode * object,
  33945. + int (*can_inherit)(struct inode * child,
  33946. + struct inode * parent));
  33947. +void reiser4_attach_crypto_info(struct inode * inode,
  33948. + struct reiser4_crypto_info * info);
  33949. +void change_crypto_info(struct inode * inode, struct reiser4_crypto_info * new);
  33950. +struct reiser4_crypto_info * reiser4_alloc_crypto_info (struct inode * inode);
  33951. +
  33952. +static inline struct crypto_blkcipher * info_get_cipher(struct reiser4_crypto_info * info)
  33953. +{
  33954. + return info->cipher;
  33955. +}
  33956. +
  33957. +static inline void info_set_cipher(struct reiser4_crypto_info * info,
  33958. + struct crypto_blkcipher * tfm)
  33959. +{
  33960. + info->cipher = tfm;
  33961. +}
  33962. +
  33963. +static inline struct crypto_hash * info_get_digest(struct reiser4_crypto_info * info)
  33964. +{
  33965. + return info->digest;
  33966. +}
  33967. +
  33968. +static inline void info_set_digest(struct reiser4_crypto_info * info,
  33969. + struct crypto_hash * tfm)
  33970. +{
  33971. + info->digest = tfm;
  33972. +}
  33973. +
  33974. +static inline void put_cluster_page(struct page * page)
  33975. +{
  33976. + put_page(page);
  33977. +}
  33978. +
  33979. +#endif /* __FS_REISER4_CRYPTCOMPRESS_H__ */
  33980. +
  33981. +/* Make Linus happy.
  33982. + Local variables:
  33983. + c-indentation-style: "K&R"
  33984. + mode-name: "LC"
  33985. + c-basic-offset: 8
  33986. + tab-width: 8
  33987. + fill-column: 120
  33988. + scroll-step: 1
  33989. + End:
  33990. +*/
  33991. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/file.c linux-5.16.14/fs/reiser4/plugin/file/file.c
  33992. --- linux-5.16.14.orig/fs/reiser4/plugin/file/file.c 1970-01-01 01:00:00.000000000 +0100
  33993. +++ linux-5.16.14/fs/reiser4/plugin/file/file.c 2022-03-12 13:26:19.670892773 +0100
  33994. @@ -0,0 +1,2765 @@
  33995. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  33996. + * reiser4/README */
  33997. +
  33998. +/*
  33999. + * this file contains implementations of inode/file/address_space/file plugin
  34000. + * operations specific for "unix file plugin" (plugin id is
  34001. + * UNIX_FILE_PLUGIN_ID). "Unix file" is either built of tail items only
  34002. + * (FORMATTING_ID) or of extent items only (EXTENT_POINTER_ID) or empty (have
  34003. + * no items but stat data)
  34004. + */
  34005. +
  34006. +#include "../../inode.h"
  34007. +#include "../../super.h"
  34008. +#include "../../tree_walk.h"
  34009. +#include "../../carry.h"
  34010. +#include "../../page_cache.h"
  34011. +#include "../../ioctl.h"
  34012. +#include "../object.h"
  34013. +#include "../cluster.h"
  34014. +#include "../../safe_link.h"
  34015. +
  34016. +#include <linux/writeback.h>
  34017. +#include <linux/pagevec.h>
  34018. +#include <linux/syscalls.h>
  34019. +#include <linux/uio.h>
  34020. +
  34021. +
  34022. +static int unpack(struct file *file, struct inode *inode, int forever);
  34023. +static void drop_access(struct unix_file_info *);
  34024. +static int hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
  34025. + znode_lock_mode lock_mode);
  34026. +
  34027. +/* Get exclusive access and make sure that file is not partially
  34028. + * converted (It may happen that another process is doing tail
  34029. + * conversion. If so, wait until it completes)
  34030. + */
  34031. +static inline void get_exclusive_access_careful(struct unix_file_info * uf_info,
  34032. + struct inode *inode)
  34033. +{
  34034. + do {
  34035. + get_exclusive_access(uf_info);
  34036. + if (!reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))
  34037. + break;
  34038. + drop_exclusive_access(uf_info);
  34039. + schedule();
  34040. + } while (1);
  34041. +}
  34042. +
  34043. +/* get unix file plugin specific portion of inode */
  34044. +struct unix_file_info *unix_file_inode_data(const struct inode *inode)
  34045. +{
  34046. + return &reiser4_inode_data(inode)->file_plugin_data.unix_file_info;
  34047. +}
  34048. +
  34049. +/**
  34050. + * equal_to_rdk - compare key and znode's right delimiting key
  34051. + * @node: node whose right delimiting key to compare with @key
  34052. + * @key: key to compare with @node's right delimiting key
  34053. + *
  34054. + * Returns true if @key is equal to right delimiting key of @node.
  34055. + */
  34056. +int equal_to_rdk(znode *node, const reiser4_key *key)
  34057. +{
  34058. + int result;
  34059. +
  34060. + read_lock_dk(znode_get_tree(node));
  34061. + result = keyeq(key, znode_get_rd_key(node));
  34062. + read_unlock_dk(znode_get_tree(node));
  34063. + return result;
  34064. +}
  34065. +
  34066. +#if REISER4_DEBUG
  34067. +
  34068. +/**
  34069. + * equal_to_ldk - compare key and znode's left delimiting key
  34070. + * @node: node whose left delimiting key to compare with @key
  34071. + * @key: key to compare with @node's left delimiting key
  34072. + *
  34073. + * Returns true if @key is equal to left delimiting key of @node.
  34074. + */
  34075. +int equal_to_ldk(znode *node, const reiser4_key *key)
  34076. +{
  34077. + int result;
  34078. +
  34079. + read_lock_dk(znode_get_tree(node));
  34080. + result = keyeq(key, znode_get_ld_key(node));
  34081. + read_unlock_dk(znode_get_tree(node));
  34082. + return result;
  34083. +}
  34084. +
  34085. +/**
  34086. + * check_coord - check whether coord corresponds to key
  34087. + * @coord: coord to check
  34088. + * @key: key @coord has to correspond to
  34089. + *
  34090. + * Returns true if @coord is set as if it was set as result of lookup with @key
  34091. + * in coord->node.
  34092. + */
  34093. +static int check_coord(const coord_t *coord, const reiser4_key *key)
  34094. +{
  34095. + coord_t twin;
  34096. +
  34097. + node_plugin_by_node(coord->node)->lookup(coord->node, key,
  34098. + FIND_MAX_NOT_MORE_THAN, &twin);
  34099. + return coords_equal(coord, &twin);
  34100. +}
  34101. +
  34102. +#endif /* REISER4_DEBUG */
  34103. +
  34104. +/**
  34105. + * init_uf_coord - initialize extended coord
  34106. + * @uf_coord:
  34107. + * @lh:
  34108. + *
  34109. + *
  34110. + */
  34111. +void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh)
  34112. +{
  34113. + coord_init_zero(&uf_coord->coord);
  34114. + coord_clear_iplug(&uf_coord->coord);
  34115. + uf_coord->lh = lh;
  34116. + init_lh(lh);
  34117. + memset(&uf_coord->extension, 0, sizeof(uf_coord->extension));
  34118. + uf_coord->valid = 0;
  34119. +}
  34120. +
  34121. +static void validate_extended_coord(uf_coord_t *uf_coord, loff_t offset)
  34122. +{
  34123. + assert("vs-1333", uf_coord->valid == 0);
  34124. +
  34125. + if (coord_is_between_items(&uf_coord->coord))
  34126. + return;
  34127. +
  34128. + assert("vs-1348",
  34129. + item_plugin_by_coord(&uf_coord->coord)->s.file.
  34130. + init_coord_extension);
  34131. +
  34132. + item_body_by_coord(&uf_coord->coord);
  34133. + item_plugin_by_coord(&uf_coord->coord)->s.file.
  34134. + init_coord_extension(uf_coord, offset);
  34135. +}
  34136. +
  34137. +/**
  34138. + * goto_right_neighbor - lock right neighbor, drop current node lock
  34139. + * @coord:
  34140. + * @lh:
  34141. + *
  34142. + * Obtain lock on right neighbor and drop lock on current node.
  34143. + */
  34144. +int goto_right_neighbor(coord_t *coord, lock_handle *lh)
  34145. +{
  34146. + int result;
  34147. + lock_handle lh_right;
  34148. +
  34149. + assert("vs-1100", znode_is_locked(coord->node));
  34150. +
  34151. + init_lh(&lh_right);
  34152. + result = reiser4_get_right_neighbor(&lh_right, coord->node,
  34153. + znode_is_wlocked(coord->node) ?
  34154. + ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
  34155. + GN_CAN_USE_UPPER_LEVELS);
  34156. + if (result) {
  34157. + done_lh(&lh_right);
  34158. + return result;
  34159. + }
  34160. +
  34161. + /*
  34162. + * we hold two longterm locks on neighboring nodes. Unlock left of
  34163. + * them
  34164. + */
  34165. + done_lh(lh);
  34166. +
  34167. + coord_init_first_unit_nocheck(coord, lh_right.node);
  34168. + move_lh(lh, &lh_right);
  34169. +
  34170. + return 0;
  34171. +
  34172. +}
  34173. +
  34174. +/**
  34175. + * set_file_state
  34176. + * @uf_info:
  34177. + * @cbk_result:
  34178. + * @level:
  34179. + *
  34180. + * This is to be used by find_file_item and in find_file_state to
  34181. + * determine real state of file
  34182. + */
  34183. +static void set_file_state(struct unix_file_info *uf_info, int cbk_result,
  34184. + tree_level level)
  34185. +{
  34186. + if (cbk_errored(cbk_result))
  34187. + /* error happened in find_file_item */
  34188. + return;
  34189. +
  34190. + assert("vs-1164", level == LEAF_LEVEL || level == TWIG_LEVEL);
  34191. +
  34192. + if (uf_info->container == UF_CONTAINER_UNKNOWN) {
  34193. + if (cbk_result == CBK_COORD_NOTFOUND)
  34194. + uf_info->container = UF_CONTAINER_EMPTY;
  34195. + else if (level == LEAF_LEVEL)
  34196. + uf_info->container = UF_CONTAINER_TAILS;
  34197. + else
  34198. + uf_info->container = UF_CONTAINER_EXTENTS;
  34199. + } else {
  34200. + /*
  34201. + * file state is known, check whether it is set correctly if
  34202. + * file is not being tail converted
  34203. + */
  34204. + if (!reiser4_inode_get_flag(unix_file_info_to_inode(uf_info),
  34205. + REISER4_PART_IN_CONV)) {
  34206. + assert("vs-1162",
  34207. + ergo(level == LEAF_LEVEL &&
  34208. + cbk_result == CBK_COORD_FOUND,
  34209. + uf_info->container == UF_CONTAINER_TAILS));
  34210. + assert("vs-1165",
  34211. + ergo(level == TWIG_LEVEL &&
  34212. + cbk_result == CBK_COORD_FOUND,
  34213. + uf_info->container == UF_CONTAINER_EXTENTS));
  34214. + }
  34215. + }
  34216. +}
  34217. +
  34218. +int find_file_item_nohint(coord_t *coord, lock_handle *lh,
  34219. + const reiser4_key *key, znode_lock_mode lock_mode,
  34220. + struct inode *inode)
  34221. +{
  34222. + return reiser4_object_lookup(inode, key, coord, lh, lock_mode,
  34223. + FIND_MAX_NOT_MORE_THAN,
  34224. + TWIG_LEVEL, LEAF_LEVEL,
  34225. + (lock_mode == ZNODE_READ_LOCK) ? CBK_UNIQUE :
  34226. + (CBK_UNIQUE | CBK_FOR_INSERT),
  34227. + NULL /* ra_info */ );
  34228. +}
  34229. +
  34230. +/**
  34231. + * find_file_item - look for file item in the tree
  34232. + * @hint: provides coordinate, lock handle, seal
  34233. + * @key: key for search
  34234. + * @mode: mode of lock to put on returned node
  34235. + * @ra_info:
  34236. + * @inode:
  34237. + *
  34238. + * This finds position in the tree corresponding to @key. It first tries to use
  34239. + * @hint's seal if it is set.
  34240. + */
  34241. +int find_file_item(hint_t *hint, const reiser4_key *key,
  34242. + znode_lock_mode lock_mode,
  34243. + struct inode *inode)
  34244. +{
  34245. + int result;
  34246. + coord_t *coord;
  34247. + lock_handle *lh;
  34248. +
  34249. + assert("nikita-3030", reiser4_schedulable());
  34250. + assert("vs-1707", hint != NULL);
  34251. + assert("vs-47", inode != NULL);
  34252. +
  34253. + coord = &hint->ext_coord.coord;
  34254. + lh = hint->ext_coord.lh;
  34255. + init_lh(lh);
  34256. +
  34257. + result = hint_validate(hint, key, 1 /* check key */, lock_mode);
  34258. + if (!result) {
  34259. + if (coord->between == AFTER_UNIT &&
  34260. + equal_to_rdk(coord->node, key)) {
  34261. + result = goto_right_neighbor(coord, lh);
  34262. + if (result == -E_NO_NEIGHBOR)
  34263. + return RETERR(-EIO);
  34264. + if (result)
  34265. + return result;
  34266. + assert("vs-1152", equal_to_ldk(coord->node, key));
  34267. + /*
  34268. + * we moved to different node. Invalidate coord
  34269. + * extension, zload is necessary to init it again
  34270. + */
  34271. + hint->ext_coord.valid = 0;
  34272. + }
  34273. +
  34274. + set_file_state(unix_file_inode_data(inode), CBK_COORD_FOUND,
  34275. + znode_get_level(coord->node));
  34276. +
  34277. + return CBK_COORD_FOUND;
  34278. + }
  34279. +
  34280. + coord_init_zero(coord);
  34281. + result = find_file_item_nohint(coord, lh, key, lock_mode, inode);
  34282. + set_file_state(unix_file_inode_data(inode), result,
  34283. + znode_get_level(coord->node));
  34284. +
  34285. + /* FIXME: we might already have coord extension initialized */
  34286. + hint->ext_coord.valid = 0;
  34287. + return result;
  34288. +}
  34289. +
  34290. +void hint_init_zero(hint_t * hint)
  34291. +{
  34292. + memset(hint, 0, sizeof(*hint));
  34293. + init_lh(&hint->lh);
  34294. + hint->ext_coord.lh = &hint->lh;
  34295. +}
  34296. +
  34297. +static int find_file_state(struct inode *inode, struct unix_file_info *uf_info)
  34298. +{
  34299. + int result;
  34300. + reiser4_key key;
  34301. + coord_t coord;
  34302. + lock_handle lh;
  34303. +
  34304. + assert("vs-1628", ea_obtained(uf_info));
  34305. +
  34306. + if (uf_info->container == UF_CONTAINER_UNKNOWN) {
  34307. + key_by_inode_and_offset_common(inode, 0, &key);
  34308. + init_lh(&lh);
  34309. + result = find_file_item_nohint(&coord, &lh, &key,
  34310. + ZNODE_READ_LOCK, inode);
  34311. + set_file_state(uf_info, result, znode_get_level(coord.node));
  34312. + done_lh(&lh);
  34313. + if (!cbk_errored(result))
  34314. + result = 0;
  34315. + } else
  34316. + result = 0;
  34317. + assert("vs-1074",
  34318. + ergo(result == 0, uf_info->container != UF_CONTAINER_UNKNOWN));
  34319. + reiser4_txn_restart_current();
  34320. + return result;
  34321. +}
  34322. +
  34323. +/**
  34324. + * Estimate and reserve space needed to truncate page
  34325. + * which gets partially truncated: one block for page
  34326. + * itself, stat-data update (estimate_one_insert_into_item)
  34327. + * and one item insertion (estimate_one_insert_into_item)
  34328. + * which may happen if page corresponds to hole extent and
  34329. + * unallocated one will have to be created
  34330. + */
  34331. +static int reserve_partial_page(reiser4_tree * tree)
  34332. +{
  34333. + grab_space_enable();
  34334. + return reiser4_grab_reserved(reiser4_get_current_sb(),
  34335. + 1 +
  34336. + 2 * estimate_one_insert_into_item(tree),
  34337. + BA_CAN_COMMIT);
  34338. +}
  34339. +
  34340. +/* estimate and reserve space needed to cut one item and update one stat data */
  34341. +static int reserve_cut_iteration(reiser4_tree * tree)
  34342. +{
  34343. + __u64 estimate = estimate_one_item_removal(tree)
  34344. + + estimate_one_insert_into_item(tree);
  34345. +
  34346. + assert("nikita-3172", lock_stack_isclean(get_current_lock_stack()));
  34347. +
  34348. + grab_space_enable();
  34349. + /* We need to double our estimate now that we can delete more than one
  34350. + node. */
  34351. + return reiser4_grab_reserved(reiser4_get_current_sb(), estimate * 2,
  34352. + BA_CAN_COMMIT);
  34353. +}
  34354. +
  34355. +int reiser4_update_file_size(struct inode *inode, loff_t new_size,
  34356. + int update_sd)
  34357. +{
  34358. + int result = 0;
  34359. +
  34360. + INODE_SET_SIZE(inode, new_size);
  34361. + if (update_sd) {
  34362. + inode->i_ctime = inode->i_mtime = current_time(inode);
  34363. + result = reiser4_update_sd(inode);
  34364. + }
  34365. + return result;
  34366. +}
  34367. +
  34368. +/**
  34369. + * Cut file items one by one starting from the last one until
  34370. + * new file size (inode->i_size) is reached. Reserve space
  34371. + * and update file stat data on every single cut from the tree
  34372. + */
  34373. +int cut_file_items(struct inode *inode, loff_t new_size,
  34374. + int update_sd, loff_t cur_size,
  34375. + int (*update_actor) (struct inode *, loff_t, int))
  34376. +{
  34377. + reiser4_key from_key, to_key;
  34378. + reiser4_key smallest_removed;
  34379. + file_plugin *fplug = inode_file_plugin(inode);
  34380. + int result;
  34381. + int progress = 0;
  34382. +
  34383. + assert("vs-1248",
  34384. + fplug == file_plugin_by_id(UNIX_FILE_PLUGIN_ID) ||
  34385. + fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
  34386. +
  34387. + fplug->key_by_inode(inode, new_size, &from_key);
  34388. + to_key = from_key;
  34389. + set_key_offset(&to_key, cur_size - 1 /*get_key_offset(reiser4_max_key()) */ );
  34390. + /* this loop normally runs just once */
  34391. + while (1) {
  34392. + result = reserve_cut_iteration(reiser4_tree_by_inode(inode));
  34393. + if (result)
  34394. + break;
  34395. +
  34396. + result = reiser4_cut_tree_object(current_tree, &from_key, &to_key,
  34397. + &smallest_removed, inode, 1,
  34398. + &progress);
  34399. + if (result == -E_REPEAT) {
  34400. + /**
  34401. + * -E_REPEAT is a signal to interrupt a long
  34402. + * file truncation process
  34403. + */
  34404. + if (progress) {
  34405. + result = update_actor(inode,
  34406. + get_key_offset(&smallest_removed),
  34407. + update_sd);
  34408. + if (result)
  34409. + break;
  34410. + }
  34411. + /* the below does up(sbinfo->delete_mutex).
  34412. + * Do not get folled */
  34413. + reiser4_release_reserved(inode->i_sb);
  34414. + /**
  34415. + * reiser4_cut_tree_object() was interrupted probably
  34416. + * because current atom requires commit, we have to
  34417. + * release transaction handle to allow atom commit.
  34418. + */
  34419. + reiser4_txn_restart_current();
  34420. + continue;
  34421. + }
  34422. + if (result
  34423. + && !(result == CBK_COORD_NOTFOUND && new_size == 0
  34424. + && inode->i_size == 0))
  34425. + break;
  34426. +
  34427. + set_key_offset(&smallest_removed, new_size);
  34428. + /* Final sd update after the file gets its correct size */
  34429. + result = update_actor(inode, get_key_offset(&smallest_removed),
  34430. + update_sd);
  34431. + break;
  34432. + }
  34433. +
  34434. + /* the below does up(sbinfo->delete_mutex). Do not get folled */
  34435. + reiser4_release_reserved(inode->i_sb);
  34436. +
  34437. + return result;
  34438. +}
  34439. +
  34440. +int find_or_create_extent(struct page *page);
  34441. +
  34442. +/* part of truncate_file_body: it is called when truncate is used to make file
  34443. + shorter */
  34444. +static int shorten_file(struct inode *inode, loff_t new_size)
  34445. +{
  34446. + int result;
  34447. + struct page *page;
  34448. + int padd_from;
  34449. + unsigned long index;
  34450. + struct unix_file_info *uf_info;
  34451. +
  34452. + /*
  34453. + * all items of ordinary reiser4 file are grouped together. That is why
  34454. + * we can use reiser4_cut_tree. Plan B files (for instance) can not be
  34455. + * truncated that simply
  34456. + */
  34457. + result = cut_file_items(inode, new_size, 1 /*update_sd */ ,
  34458. + get_key_offset(reiser4_max_key()),
  34459. + reiser4_update_file_size);
  34460. + if (result)
  34461. + return result;
  34462. +
  34463. + uf_info = unix_file_inode_data(inode);
  34464. + assert("vs-1105", new_size == inode->i_size);
  34465. + if (new_size == 0) {
  34466. + uf_info->container = UF_CONTAINER_EMPTY;
  34467. + return 0;
  34468. + }
  34469. +
  34470. + result = find_file_state(inode, uf_info);
  34471. + if (result)
  34472. + return result;
  34473. + if (uf_info->container == UF_CONTAINER_TAILS)
  34474. + /*
  34475. + * No need to worry about zeroing last page after new file
  34476. + * end
  34477. + */
  34478. + return 0;
  34479. +
  34480. + padd_from = inode->i_size & (PAGE_SIZE - 1);
  34481. + if (!padd_from)
  34482. + /* file is truncated to page boundary */
  34483. + return 0;
  34484. +
  34485. + result = reserve_partial_page(reiser4_tree_by_inode(inode));
  34486. + if (result) {
  34487. + reiser4_release_reserved(inode->i_sb);
  34488. + return result;
  34489. + }
  34490. +
  34491. + /* last page is partially truncated - zero its content */
  34492. + index = (inode->i_size >> PAGE_SHIFT);
  34493. + page = read_mapping_page(inode->i_mapping, index, NULL);
  34494. + if (IS_ERR(page)) {
  34495. + /*
  34496. + * the below does up(sbinfo->delete_mutex). Do not get
  34497. + * confused
  34498. + */
  34499. + reiser4_release_reserved(inode->i_sb);
  34500. + if (likely(PTR_ERR(page) == -EINVAL)) {
  34501. + /* looks like file is built of tail items */
  34502. + return 0;
  34503. + }
  34504. + return PTR_ERR(page);
  34505. + }
  34506. + wait_on_page_locked(page);
  34507. + if (!PageUptodate(page)) {
  34508. + put_page(page);
  34509. + /*
  34510. + * the below does up(sbinfo->delete_mutex). Do not get
  34511. + * confused
  34512. + */
  34513. + reiser4_release_reserved(inode->i_sb);
  34514. + return RETERR(-EIO);
  34515. + }
  34516. +
  34517. + /*
  34518. + * if page correspons to hole extent unit - unallocated one will be
  34519. + * created here. This is not necessary
  34520. + */
  34521. + result = find_or_create_extent(page);
  34522. +
  34523. + /*
  34524. + * FIXME: cut_file_items has already updated inode. Probably it would
  34525. + * be better to update it here when file is really truncated
  34526. + */
  34527. + if (result) {
  34528. + put_page(page);
  34529. + /*
  34530. + * the below does up(sbinfo->delete_mutex). Do not get
  34531. + * confused
  34532. + */
  34533. + reiser4_release_reserved(inode->i_sb);
  34534. + return result;
  34535. + }
  34536. +
  34537. + lock_page(page);
  34538. + assert("vs-1066", PageLocked(page));
  34539. + zero_user_segment(page, padd_from, PAGE_SIZE);
  34540. + unlock_page(page);
  34541. + put_page(page);
  34542. + /* the below does up(sbinfo->delete_mutex). Do not get confused */
  34543. + reiser4_release_reserved(inode->i_sb);
  34544. + return 0;
  34545. +}
  34546. +
  34547. +/**
  34548. + * should_have_notail
  34549. + * @uf_info:
  34550. + * @new_size:
  34551. + *
  34552. + * Calls formatting plugin to see whether file of size @new_size has to be
  34553. + * stored in unformatted nodes or in tail items. 0 is returned for later case.
  34554. + */
  34555. +static int should_have_notail(const struct unix_file_info *uf_info, loff_t new_size)
  34556. +{
  34557. + if (!uf_info->tplug)
  34558. + return 1;
  34559. + return !uf_info->tplug->have_tail(unix_file_info_to_inode(uf_info),
  34560. + new_size);
  34561. +
  34562. +}
  34563. +
  34564. +/**
  34565. + * truncate_file_body - change length of file
  34566. + * @inode: inode of file
  34567. + * @new_size: new file length
  34568. + *
  34569. + * Adjusts items file @inode is built of to match @new_size. It may either cut
  34570. + * items or add them to represent a hole at the end of file. The caller has to
  34571. + * obtain exclusive access to the file.
  34572. + */
  34573. +static int truncate_file_body(struct inode *inode, struct iattr *attr)
  34574. +{
  34575. + int result;
  34576. + loff_t new_size = attr->ia_size;
  34577. +
  34578. + if (inode->i_size < new_size) {
  34579. + /* expanding truncate */
  34580. + struct unix_file_info *uf_info = unix_file_inode_data(inode);
  34581. +
  34582. + result = find_file_state(inode, uf_info);
  34583. + if (result)
  34584. + return result;
  34585. +
  34586. + if (should_have_notail(uf_info, new_size)) {
  34587. + /*
  34588. + * file of size @new_size has to be built of
  34589. + * extents. If it is built of tails - convert to
  34590. + * extents
  34591. + */
  34592. + if (uf_info->container == UF_CONTAINER_TAILS) {
  34593. + /*
  34594. + * if file is being convered by another process
  34595. + * - wait until it completes
  34596. + */
  34597. + while (1) {
  34598. + if (reiser4_inode_get_flag(inode,
  34599. + REISER4_PART_IN_CONV)) {
  34600. + drop_exclusive_access(uf_info);
  34601. + schedule();
  34602. + get_exclusive_access(uf_info);
  34603. + continue;
  34604. + }
  34605. + break;
  34606. + }
  34607. +
  34608. + if (uf_info->container == UF_CONTAINER_TAILS) {
  34609. + result = tail2extent(uf_info);
  34610. + if (result)
  34611. + return result;
  34612. + }
  34613. + }
  34614. + result = reiser4_write_extent(NULL, inode, NULL,
  34615. + 0, &new_size);
  34616. + if (result)
  34617. + return result;
  34618. + uf_info->container = UF_CONTAINER_EXTENTS;
  34619. + } else {
  34620. + if (uf_info->container == UF_CONTAINER_EXTENTS) {
  34621. + result = reiser4_write_extent(NULL, inode, NULL,
  34622. + 0, &new_size);
  34623. + if (result)
  34624. + return result;
  34625. + } else {
  34626. + result = reiser4_write_tail(NULL, inode, NULL,
  34627. + 0, &new_size);
  34628. + if (result)
  34629. + return result;
  34630. + uf_info->container = UF_CONTAINER_TAILS;
  34631. + }
  34632. + }
  34633. + BUG_ON(result > 0);
  34634. + result = reiser4_update_file_size(inode, new_size, 1);
  34635. + BUG_ON(result != 0);
  34636. + } else
  34637. + result = shorten_file(inode, new_size);
  34638. + return result;
  34639. +}
  34640. +
  34641. +/**
  34642. + * load_file_hint - copy hint from struct file to local variable
  34643. + * @file: file to get hint from
  34644. + * @hint: structure to fill
  34645. + *
  34646. + * Reiser4 specific portion of struct file may contain information (hint)
  34647. + * stored on exiting from previous read or write. That information includes
  34648. + * seal of znode and coord within that znode where previous read or write
  34649. + * stopped. This function copies that information to @hint if it was stored or
  34650. + * initializes @hint by 0s otherwise.
  34651. + */
  34652. +int load_file_hint(struct file *file, hint_t *hint)
  34653. +{
  34654. + reiser4_file_fsdata *fsdata;
  34655. +
  34656. + if (file) {
  34657. + fsdata = reiser4_get_file_fsdata(file);
  34658. + if (IS_ERR(fsdata))
  34659. + return PTR_ERR(fsdata);
  34660. +
  34661. + spin_lock_inode(file_inode(file));
  34662. + if (reiser4_seal_is_set(&fsdata->reg.hint.seal)) {
  34663. + memcpy(hint, &fsdata->reg.hint, sizeof(*hint));
  34664. + init_lh(&hint->lh);
  34665. + hint->ext_coord.lh = &hint->lh;
  34666. + spin_unlock_inode(file_inode(file));
  34667. + /*
  34668. + * force re-validation of the coord on the first
  34669. + * iteration of the read/write loop.
  34670. + */
  34671. + hint->ext_coord.valid = 0;
  34672. + assert("nikita-19892",
  34673. + coords_equal(&hint->seal.coord1,
  34674. + &hint->ext_coord.coord));
  34675. + return 0;
  34676. + }
  34677. + memset(&fsdata->reg.hint, 0, sizeof(hint_t));
  34678. + spin_unlock_inode(file_inode(file));
  34679. + }
  34680. + hint_init_zero(hint);
  34681. + return 0;
  34682. +}
  34683. +
  34684. +/**
  34685. + * save_file_hint - copy hint to reiser4 private struct file's part
  34686. + * @file: file to save hint in
  34687. + * @hint: hint to save
  34688. + *
  34689. + * This copies @hint to reiser4 private part of struct file. It can help
  34690. + * speedup future accesses to the file.
  34691. + */
  34692. +void save_file_hint(struct file *file, const hint_t *hint)
  34693. +{
  34694. + reiser4_file_fsdata *fsdata;
  34695. +
  34696. + assert("edward-1337", hint != NULL);
  34697. +
  34698. + if (!file || !reiser4_seal_is_set(&hint->seal))
  34699. + return;
  34700. + fsdata = reiser4_get_file_fsdata(file);
  34701. + assert("vs-965", !IS_ERR(fsdata));
  34702. + assert("nikita-19891",
  34703. + coords_equal(&hint->seal.coord1, &hint->ext_coord.coord));
  34704. + assert("vs-30", hint->lh.owner == NULL);
  34705. + spin_lock_inode(file_inode(file));
  34706. + fsdata->reg.hint = *hint;
  34707. + spin_unlock_inode(file_inode(file));
  34708. + return;
  34709. +}
  34710. +
  34711. +void reiser4_unset_hint(hint_t * hint)
  34712. +{
  34713. + assert("vs-1315", hint);
  34714. + hint->ext_coord.valid = 0;
  34715. + reiser4_seal_done(&hint->seal);
  34716. + done_lh(&hint->lh);
  34717. +}
  34718. +
  34719. +/* coord must be set properly. So, that reiser4_set_hint
  34720. + has nothing to do */
  34721. +void reiser4_set_hint(hint_t * hint, const reiser4_key * key,
  34722. + znode_lock_mode mode)
  34723. +{
  34724. + ON_DEBUG(coord_t * coord = &hint->ext_coord.coord);
  34725. + assert("vs-1207", WITH_DATA(coord->node, check_coord(coord, key)));
  34726. +
  34727. + reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, key);
  34728. + hint->offset = get_key_offset(key);
  34729. + hint->mode = mode;
  34730. + done_lh(&hint->lh);
  34731. +}
  34732. +
  34733. +int hint_is_set(const hint_t * hint)
  34734. +{
  34735. + return reiser4_seal_is_set(&hint->seal);
  34736. +}
  34737. +
  34738. +#if REISER4_DEBUG
  34739. +static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
  34740. +{
  34741. + return (get_key_locality(k1) == get_key_locality(k2) &&
  34742. + get_key_type(k1) == get_key_type(k2) &&
  34743. + get_key_band(k1) == get_key_band(k2) &&
  34744. + get_key_ordering(k1) == get_key_ordering(k2) &&
  34745. + get_key_objectid(k1) == get_key_objectid(k2));
  34746. +}
  34747. +#endif
  34748. +
  34749. +static int
  34750. +hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
  34751. + znode_lock_mode lock_mode)
  34752. +{
  34753. + if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
  34754. + /* hint either not set or set by different operation */
  34755. + return RETERR(-E_REPEAT);
  34756. +
  34757. + assert("vs-1277", all_but_offset_key_eq(key, &hint->seal.key));
  34758. +
  34759. + if (check_key && get_key_offset(key) != hint->offset)
  34760. + /* hint is set for different key */
  34761. + return RETERR(-E_REPEAT);
  34762. +
  34763. + assert("vs-31", hint->ext_coord.lh == &hint->lh);
  34764. + return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord, key,
  34765. + hint->ext_coord.lh, lock_mode,
  34766. + ZNODE_LOCK_LOPRI);
  34767. +}
  34768. +
  34769. +/**
  34770. + * Look for place at twig level for extent corresponding to page,
  34771. + * call extent's writepage method to create unallocated extent if
  34772. + * it does not exist yet, initialize jnode, capture page
  34773. + */
  34774. +int find_or_create_extent(struct page *page)
  34775. +{
  34776. + int result;
  34777. + struct inode *inode;
  34778. + int plugged_hole;
  34779. +
  34780. + jnode *node;
  34781. +
  34782. + assert("vs-1065", page->mapping && page->mapping->host);
  34783. + inode = page->mapping->host;
  34784. +
  34785. + lock_page(page);
  34786. + node = jnode_of_page(page);
  34787. + if (IS_ERR(node)) {
  34788. + unlock_page(page);
  34789. + return PTR_ERR(node);
  34790. + }
  34791. + JF_SET(node, JNODE_WRITE_PREPARED);
  34792. + unlock_page(page);
  34793. +
  34794. + if (node->blocknr == 0) {
  34795. + plugged_hole = 0;
  34796. + result = reiser4_update_extent(inode, node, page_offset(page),
  34797. + &plugged_hole);
  34798. + if (result) {
  34799. + JF_CLR(node, JNODE_WRITE_PREPARED);
  34800. + jput(node);
  34801. + warning("edward-1549",
  34802. + "reiser4_update_extent failed: %d", result);
  34803. + return result;
  34804. + }
  34805. + if (plugged_hole)
  34806. + reiser4_update_sd(inode);
  34807. + } else {
  34808. + spin_lock_jnode(node);
  34809. + result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  34810. + BUG_ON(result != 0);
  34811. + jnode_make_dirty_locked(node);
  34812. + spin_unlock_jnode(node);
  34813. + }
  34814. +
  34815. + BUG_ON(node->atom == NULL);
  34816. + JF_CLR(node, JNODE_WRITE_PREPARED);
  34817. +
  34818. + if (get_current_context()->entd) {
  34819. + entd_context *ent = get_entd_context(node->tree->super);
  34820. +
  34821. + if (ent->cur_request->page == page)
  34822. + /* the following reference will be
  34823. + dropped in reiser4_writeout */
  34824. + ent->cur_request->node = jref(node);
  34825. + }
  34826. + jput(node);
  34827. + return 0;
  34828. +}
  34829. +
  34830. +/**
  34831. + * has_anonymous_pages - check whether inode has pages dirtied via mmap
  34832. + * @inode: inode to check
  34833. + *
  34834. + * Returns true if inode's mapping has dirty pages which do not belong to any
  34835. + * atom. Those are either tagged PAGECACHE_TAG_REISER4_MOVED in mapping's page
  34836. + * tree or were eflushed and can be found via jnodes tagged
  34837. + * EFLUSH_TAG_ANONYMOUS in radix tree of jnodes.
  34838. + */
  34839. +static int has_anonymous_pages(struct inode *inode)
  34840. +{
  34841. + int result;
  34842. +
  34843. + xa_lock_irq(&inode->i_mapping->i_pages);
  34844. + result = radix_tree_tagged(&inode->i_mapping->i_pages,
  34845. + PAGECACHE_TAG_REISER4_MOVED);
  34846. + xa_unlock_irq(&inode->i_mapping->i_pages);
  34847. + return result;
  34848. +}
  34849. +
  34850. +/**
  34851. + * capture_page_and_create_extent -
  34852. + * @page: page to be captured
  34853. + *
  34854. + * Grabs space for extent creation and stat data update and calls function to
  34855. + * do actual work.
  34856. + * Exclusive, or non-exclusive lock must be held.
  34857. + */
  34858. +static int capture_page_and_create_extent(struct page *page)
  34859. +{
  34860. + int result;
  34861. + struct inode *inode;
  34862. +
  34863. + assert("vs-1084", page->mapping && page->mapping->host);
  34864. + inode = page->mapping->host;
  34865. + assert("vs-1139",
  34866. + unix_file_inode_data(inode)->container == UF_CONTAINER_EXTENTS);
  34867. + /* page belongs to file */
  34868. + assert("vs-1393",
  34869. + inode->i_size > page_offset(page));
  34870. +
  34871. + /* page capture may require extent creation (if it does not exist yet)
  34872. + and stat data's update (number of blocks changes on extent
  34873. + creation) */
  34874. + grab_space_enable();
  34875. + result = reiser4_grab_space(2 * estimate_one_insert_into_item
  34876. + (reiser4_tree_by_inode(inode)),
  34877. + BA_CAN_COMMIT);
  34878. + if (likely(!result))
  34879. + result = find_or_create_extent(page);
  34880. +
  34881. + if (result != 0)
  34882. + SetPageError(page);
  34883. + return result;
  34884. +}
  34885. +
  34886. +/*
  34887. + * Support for "anonymous" pages and jnodes.
  34888. + *
  34889. + * When file is write-accessed through mmap pages can be dirtied from the user
  34890. + * level. In this case kernel is not notified until one of following happens:
  34891. + *
  34892. + * (1) msync()
  34893. + *
  34894. + * (2) truncate() (either explicit or through unlink)
  34895. + *
  34896. + * (3) VM scanner starts reclaiming mapped pages, dirtying them before
  34897. + * starting write-back.
  34898. + *
  34899. + * As a result of (3) ->writepage may be called on a dirty page without
  34900. + * jnode. Such page is called "anonymous" in reiser4. Certain work-loads
  34901. + * (iozone) generate huge number of anonymous pages.
  34902. + *
  34903. + * reiser4_sync_sb() method tries to insert anonymous pages into
  34904. + * tree. This is done by capture_anonymous_*() functions below.
  34905. + */
  34906. +
  34907. +/**
  34908. + * capture_anonymous_page - involve page into transaction
  34909. + * @pg: page to deal with
  34910. + *
  34911. + * Takes care that @page has corresponding metadata in the tree, creates jnode
  34912. + * for @page and captures it. On success 1 is returned.
  34913. + */
  34914. +static int capture_anonymous_page(struct page *page)
  34915. +{
  34916. + int result;
  34917. +
  34918. + if (PageWriteback(page))
  34919. + /* FIXME: do nothing? */
  34920. + return 0;
  34921. +
  34922. + result = capture_page_and_create_extent(page);
  34923. + if (result == 0) {
  34924. + result = 1;
  34925. + } else
  34926. + warning("nikita-3329",
  34927. + "Cannot capture anon page: %i", result);
  34928. +
  34929. + return result;
  34930. +}
  34931. +
  34932. +/**
  34933. + * capture_anonymous_pages - find and capture pages dirtied via mmap
  34934. + * @mapping: address space where to look for pages
  34935. + * @index: start index
  34936. + * @to_capture: maximum number of pages to capture
  34937. + *
  34938. + * Looks for pages tagged REISER4_MOVED starting from the *@index-th page,
  34939. + * captures (involves into atom) them, returns number of captured pages,
  34940. + * updates @index to next page after the last captured one.
  34941. + */
  34942. +static int
  34943. +capture_anonymous_pages(struct address_space *mapping, pgoff_t *index,
  34944. + unsigned int to_capture)
  34945. +{
  34946. + int result;
  34947. + struct pagevec pvec;
  34948. + unsigned int i, count;
  34949. + int nr;
  34950. +
  34951. + pagevec_init(&pvec);
  34952. + count = min(pagevec_space(&pvec), to_capture);
  34953. + nr = 0;
  34954. +
  34955. + /* find pages tagged MOVED */
  34956. + xa_lock_irq(&mapping->i_pages);
  34957. + pvec.nr = radix_tree_gang_lookup_tag(&mapping->i_pages,
  34958. + (void **)pvec.pages, *index, count,
  34959. + PAGECACHE_TAG_REISER4_MOVED);
  34960. + if (pagevec_count(&pvec) == 0) {
  34961. + /*
  34962. + * there are no pages tagged MOVED in mapping->page_tree
  34963. + * starting from *index
  34964. + */
  34965. + xa_unlock_irq(&mapping->i_pages);
  34966. + *index = (pgoff_t)-1;
  34967. + return 0;
  34968. + }
  34969. +
  34970. + /* clear MOVED tag for all found pages */
  34971. + for (i = 0; i < pagevec_count(&pvec); i++) {
  34972. + get_page(pvec.pages[i]);
  34973. + radix_tree_tag_clear(&mapping->i_pages, pvec.pages[i]->index,
  34974. + PAGECACHE_TAG_REISER4_MOVED);
  34975. + }
  34976. + xa_unlock_irq(&mapping->i_pages);
  34977. +
  34978. +
  34979. + *index = pvec.pages[i - 1]->index + 1;
  34980. +
  34981. + for (i = 0; i < pagevec_count(&pvec); i++) {
  34982. + result = capture_anonymous_page(pvec.pages[i]);
  34983. + if (result == 1)
  34984. + nr++;
  34985. + else {
  34986. + if (result < 0) {
  34987. + warning("vs-1454",
  34988. + "failed to capture page: "
  34989. + "result=%d, captured=%d)\n",
  34990. + result, i);
  34991. +
  34992. + /*
  34993. + * set MOVED tag to all pages which left not
  34994. + * captured
  34995. + */
  34996. + xa_lock_irq(&mapping->i_pages);
  34997. + for (; i < pagevec_count(&pvec); i ++) {
  34998. + radix_tree_tag_set(&mapping->i_pages,
  34999. + pvec.pages[i]->index,
  35000. + PAGECACHE_TAG_REISER4_MOVED);
  35001. + }
  35002. + xa_unlock_irq(&mapping->i_pages);
  35003. +
  35004. + pagevec_release(&pvec);
  35005. + return result;
  35006. + } else {
  35007. + /*
  35008. + * result == 0. capture_anonymous_page returns
  35009. + * 0 for Writeback-ed page. Set MOVED tag on
  35010. + * that page
  35011. + */
  35012. + xa_lock_irq(&mapping->i_pages);
  35013. + radix_tree_tag_set(&mapping->i_pages,
  35014. + pvec.pages[i]->index,
  35015. + PAGECACHE_TAG_REISER4_MOVED);
  35016. + xa_unlock_irq(&mapping->i_pages);
  35017. + if (i == 0)
  35018. + *index = pvec.pages[0]->index;
  35019. + else
  35020. + *index = pvec.pages[i - 1]->index + 1;
  35021. + }
  35022. + }
  35023. + }
  35024. + pagevec_release(&pvec);
  35025. + return nr;
  35026. +}
  35027. +
  35028. +/**
  35029. + * capture_anonymous_jnodes - find and capture anonymous jnodes
  35030. + * @mapping: address space where to look for jnodes
  35031. + * @from: start index
  35032. + * @to: end index
  35033. + * @to_capture: maximum number of jnodes to capture
  35034. + *
  35035. + * Looks for jnodes tagged EFLUSH_TAG_ANONYMOUS in inode's tree of jnodes in
  35036. + * the range of indexes @from-@to and captures them, returns number of captured
  35037. + * jnodes, updates @from to next jnode after the last captured one.
  35038. + */
  35039. +static int
  35040. +capture_anonymous_jnodes(struct address_space *mapping,
  35041. + pgoff_t *from, pgoff_t to, int to_capture)
  35042. +{
  35043. + *from = to;
  35044. + return 0;
  35045. +}
  35046. +
  35047. +/*
  35048. + * Commit atom of the jnode of a page.
  35049. + */
  35050. +int reiser4_sync_page(struct page *page)
  35051. +{
  35052. + int result;
  35053. + do {
  35054. + jnode *node;
  35055. + txn_atom *atom;
  35056. +
  35057. + lock_page(page);
  35058. + node = jprivate(page);
  35059. + if (node != NULL) {
  35060. + spin_lock_jnode(node);
  35061. + atom = jnode_get_atom(node);
  35062. + spin_unlock_jnode(node);
  35063. + } else
  35064. + atom = NULL;
  35065. + unlock_page(page);
  35066. + result = reiser4_sync_atom(atom);
  35067. + } while (result == -E_REPEAT);
  35068. + /*
  35069. + * ZAM-FIXME-HANS: document the logic of this loop, is it just to
  35070. + * handle the case where more pages get added to the atom while we are
  35071. + * syncing it?
  35072. + */
  35073. + assert("nikita-3485", ergo(result == 0,
  35074. + get_current_context()->trans->atom == NULL));
  35075. + return result;
  35076. +}
  35077. +
  35078. +/*
  35079. + * Commit atoms of pages on @pages list.
  35080. + * call sync_page for each page from mapping's page tree
  35081. + */
  35082. +static int sync_page_list(struct inode *inode)
  35083. +{
  35084. + int result;
  35085. + struct address_space *mapping;
  35086. + unsigned long from; /* start index for radix_tree_gang_lookup */
  35087. + unsigned int found; /* return value for radix_tree_gang_lookup */
  35088. +
  35089. + mapping = inode->i_mapping;
  35090. + from = 0;
  35091. + result = 0;
  35092. + xa_lock_irq(&mapping->i_pages);
  35093. + while (result == 0) {
  35094. + struct page *page;
  35095. +
  35096. + found =
  35097. + radix_tree_gang_lookup(&mapping->i_pages, (void **)&page,
  35098. + from, 1);
  35099. + assert("edward-1550", found < 2);
  35100. + if (found == 0)
  35101. + break;
  35102. + /**
  35103. + * page may not leave radix tree because it is protected from
  35104. + * truncating by inode->i_mutex locked by sys_fsync
  35105. + */
  35106. + get_page(page);
  35107. + xa_unlock_irq(&mapping->i_pages);
  35108. +
  35109. + from = page->index + 1;
  35110. +
  35111. + result = reiser4_sync_page(page);
  35112. +
  35113. + put_page(page);
  35114. + xa_lock_irq(&mapping->i_pages);
  35115. + }
  35116. +
  35117. + xa_unlock_irq(&mapping->i_pages);
  35118. + return result;
  35119. +}
  35120. +
  35121. +static int commit_file_atoms(struct inode *inode)
  35122. +{
  35123. + int result;
  35124. + struct unix_file_info *uf_info;
  35125. +
  35126. + uf_info = unix_file_inode_data(inode);
  35127. +
  35128. + get_exclusive_access(uf_info);
  35129. + /*
  35130. + * find what items file is made from
  35131. + */
  35132. + result = find_file_state(inode, uf_info);
  35133. + drop_exclusive_access(uf_info);
  35134. + if (result != 0)
  35135. + return result;
  35136. +
  35137. + /*
  35138. + * file state cannot change because we are under ->i_mutex
  35139. + */
  35140. + switch (uf_info->container) {
  35141. + case UF_CONTAINER_EXTENTS:
  35142. + /* find_file_state might open join an atom */
  35143. + reiser4_txn_restart_current();
  35144. + result =
  35145. + /*
  35146. + * when we are called by
  35147. + * filemap_fdatawrite->
  35148. + * do_writepages()->
  35149. + * reiser4_writepages_dispatch()
  35150. + *
  35151. + * inode->i_mapping->dirty_pages are spices into
  35152. + * ->io_pages, leaving ->dirty_pages dirty.
  35153. + *
  35154. + * When we are called from
  35155. + * reiser4_fsync()->sync_unix_file(), we have to
  35156. + * commit atoms of all pages on the ->dirty_list.
  35157. + *
  35158. + * So for simplicity we just commit ->io_pages and
  35159. + * ->dirty_pages.
  35160. + */
  35161. + sync_page_list(inode);
  35162. + break;
  35163. + case UF_CONTAINER_TAILS:
  35164. + /*
  35165. + * NOTE-NIKITA probably we can be smarter for tails. For now
  35166. + * just commit all existing atoms.
  35167. + */
  35168. + result = txnmgr_force_commit_all(inode->i_sb, 0);
  35169. + break;
  35170. + case UF_CONTAINER_EMPTY:
  35171. + result = 0;
  35172. + break;
  35173. + case UF_CONTAINER_UNKNOWN:
  35174. + default:
  35175. + result = -EIO;
  35176. + break;
  35177. + }
  35178. +
  35179. + /*
  35180. + * commit current transaction: there can be captured nodes from
  35181. + * find_file_state() and finish_conversion().
  35182. + */
  35183. + reiser4_txn_restart_current();
  35184. + return result;
  35185. +}
  35186. +
  35187. +/**
  35188. + * writepages_unix_file - writepages of struct address_space_operations
  35189. + * @mapping:
  35190. + * @wbc:
  35191. + *
  35192. + * This captures anonymous pages and anonymous jnodes. Anonymous pages are
  35193. + * pages which are dirtied via mmapping. Anonymous jnodes are ones which were
  35194. + * created by reiser4_writepage.
  35195. + */
  35196. +int writepages_unix_file(struct address_space *mapping,
  35197. + struct writeback_control *wbc)
  35198. +{
  35199. + int result;
  35200. + struct unix_file_info *uf_info;
  35201. + pgoff_t pindex, jindex, nr_pages;
  35202. + long to_capture;
  35203. + struct inode *inode;
  35204. +
  35205. + inode = mapping->host;
  35206. + if (!has_anonymous_pages(inode)) {
  35207. + result = 0;
  35208. + goto end;
  35209. + }
  35210. + jindex = pindex = wbc->range_start >> PAGE_SHIFT;
  35211. + result = 0;
  35212. + nr_pages = size_in_pages(i_size_read(inode));
  35213. +
  35214. + uf_info = unix_file_inode_data(inode);
  35215. +
  35216. + do {
  35217. + reiser4_context *ctx;
  35218. +
  35219. + if (wbc->sync_mode != WB_SYNC_ALL)
  35220. + to_capture = min(wbc->nr_to_write, CAPTURE_APAGE_BURST);
  35221. + else
  35222. + to_capture = CAPTURE_APAGE_BURST;
  35223. +
  35224. + ctx = reiser4_init_context(inode->i_sb);
  35225. + if (IS_ERR(ctx)) {
  35226. + result = PTR_ERR(ctx);
  35227. + break;
  35228. + }
  35229. + /* avoid recursive calls to ->sync_inodes */
  35230. + ctx->nobalance = 1;
  35231. + assert("zam-760", lock_stack_isclean(get_current_lock_stack()));
  35232. + assert("edward-1551", LOCK_CNT_NIL(inode_sem_w));
  35233. + assert("edward-1552", LOCK_CNT_NIL(inode_sem_r));
  35234. +
  35235. + reiser4_txn_restart_current();
  35236. +
  35237. + /* we have to get nonexclusive access to the file */
  35238. + if (get_current_context()->entd) {
  35239. + /*
  35240. + * use nonblocking version of nonexclusive_access to
  35241. + * avoid deadlock which might look like the following:
  35242. + * process P1 holds NEA on file F1 and called entd to
  35243. + * reclaim some memory. Entd works for P1 and is going
  35244. + * to capture pages of file F2. To do that entd has to
  35245. + * get NEA to F2. F2 is held by process P2 which also
  35246. + * called entd. But entd is serving P1 at the moment
  35247. + * and P2 has to wait. Process P3 trying to get EA to
  35248. + * file F2. Existence of pending EA request to file F2
  35249. + * makes impossible for entd to get NEA to file
  35250. + * F2. Neither of these process can continue. Using
  35251. + * nonblocking version of gettign NEA is supposed to
  35252. + * avoid this deadlock.
  35253. + */
  35254. + if (try_to_get_nonexclusive_access(uf_info) == 0) {
  35255. + result = RETERR(-EBUSY);
  35256. + reiser4_exit_context(ctx);
  35257. + break;
  35258. + }
  35259. + } else
  35260. + get_nonexclusive_access(uf_info);
  35261. +
  35262. + while (to_capture > 0) {
  35263. + pgoff_t start;
  35264. +
  35265. + assert("vs-1727", jindex <= pindex);
  35266. + if (pindex == jindex) {
  35267. + start = pindex;
  35268. + result =
  35269. + capture_anonymous_pages(inode->i_mapping,
  35270. + &pindex,
  35271. + to_capture);
  35272. + if (result <= 0)
  35273. + break;
  35274. + to_capture -= result;
  35275. + wbc->nr_to_write -= result;
  35276. + if (start + result == pindex) {
  35277. + jindex = pindex;
  35278. + continue;
  35279. + }
  35280. + if (to_capture <= 0)
  35281. + break;
  35282. + }
  35283. + /* deal with anonymous jnodes between jindex and pindex */
  35284. + result =
  35285. + capture_anonymous_jnodes(inode->i_mapping, &jindex,
  35286. + pindex, to_capture);
  35287. + if (result < 0)
  35288. + break;
  35289. + to_capture -= result;
  35290. + get_current_context()->nr_captured += result;
  35291. +
  35292. + if (jindex == (pgoff_t) - 1) {
  35293. + assert("vs-1728", pindex == (pgoff_t) - 1);
  35294. + break;
  35295. + }
  35296. + }
  35297. + if (to_capture <= 0)
  35298. + /* there may be left more pages */
  35299. + __mark_inode_dirty(inode, I_DIRTY_PAGES);
  35300. +
  35301. + drop_nonexclusive_access(uf_info);
  35302. + if (result < 0) {
  35303. + /* error happened */
  35304. + reiser4_exit_context(ctx);
  35305. + return result;
  35306. + }
  35307. + if (wbc->sync_mode != WB_SYNC_ALL) {
  35308. + reiser4_exit_context(ctx);
  35309. + return 0;
  35310. + }
  35311. + result = commit_file_atoms(inode);
  35312. + reiser4_exit_context(ctx);
  35313. + if (pindex >= nr_pages && jindex == pindex)
  35314. + break;
  35315. + } while (1);
  35316. +
  35317. + end:
  35318. + if (is_in_reiser4_context()) {
  35319. + if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
  35320. + /*
  35321. + * there are already pages to flush, flush them out, do
  35322. + * not delay until end of reiser4_sync_inodes
  35323. + */
  35324. + reiser4_writeout(inode->i_sb, wbc);
  35325. + get_current_context()->nr_captured = 0;
  35326. + }
  35327. + }
  35328. + return result;
  35329. +}
  35330. +
  35331. +/**
  35332. + * readpage_unix_file_nolock - readpage of struct address_space_operations
  35333. + * @file:
  35334. + * @page:
  35335. + *
  35336. + * Compose a key and search for item containing information about @page
  35337. + * data. If item is found - its readpage method is called.
  35338. + */
  35339. +int readpage_unix_file(struct file *file, struct page *page)
  35340. +{
  35341. + reiser4_context *ctx;
  35342. + int result;
  35343. + struct inode *inode;
  35344. + reiser4_key key;
  35345. + item_plugin *iplug;
  35346. + hint_t *hint;
  35347. + lock_handle *lh;
  35348. + coord_t *coord;
  35349. +
  35350. + assert("vs-1062", PageLocked(page));
  35351. + assert("vs-976", !PageUptodate(page));
  35352. + assert("vs-1061", page->mapping && page->mapping->host);
  35353. +
  35354. + if (page->mapping->host->i_size <= page_offset(page)) {
  35355. + /* page is out of file */
  35356. + zero_user(page, 0, PAGE_SIZE);
  35357. + SetPageUptodate(page);
  35358. + unlock_page(page);
  35359. + return 0;
  35360. + }
  35361. +
  35362. + inode = page->mapping->host;
  35363. + ctx = reiser4_init_context(inode->i_sb);
  35364. + if (IS_ERR(ctx)) {
  35365. + unlock_page(page);
  35366. + return PTR_ERR(ctx);
  35367. + }
  35368. +
  35369. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  35370. + if (hint == NULL) {
  35371. + unlock_page(page);
  35372. + reiser4_exit_context(ctx);
  35373. + return RETERR(-ENOMEM);
  35374. + }
  35375. +
  35376. + result = load_file_hint(file, hint);
  35377. + if (result) {
  35378. + kfree(hint);
  35379. + unlock_page(page);
  35380. + reiser4_exit_context(ctx);
  35381. + return result;
  35382. + }
  35383. + lh = &hint->lh;
  35384. +
  35385. + /* get key of first byte of the page */
  35386. + key_by_inode_and_offset_common(inode, page_offset(page), &key);
  35387. +
  35388. + /* look for file metadata corresponding to first byte of page */
  35389. + get_page(page);
  35390. + unlock_page(page);
  35391. + result = find_file_item(hint, &key, ZNODE_READ_LOCK, inode);
  35392. + lock_page(page);
  35393. + put_page(page);
  35394. +
  35395. + if (page->mapping == NULL) {
  35396. + /*
  35397. + * readpage allows truncate to run concurrently. Page was
  35398. + * truncated while it was not locked
  35399. + */
  35400. + done_lh(lh);
  35401. + kfree(hint);
  35402. + unlock_page(page);
  35403. + reiser4_txn_restart(ctx);
  35404. + reiser4_exit_context(ctx);
  35405. + return -EINVAL;
  35406. + }
  35407. +
  35408. + if (result != CBK_COORD_FOUND || hint->ext_coord.coord.between != AT_UNIT) {
  35409. + if (result == CBK_COORD_FOUND &&
  35410. + hint->ext_coord.coord.between != AT_UNIT)
  35411. + /* file is truncated */
  35412. + result = -EINVAL;
  35413. + done_lh(lh);
  35414. + kfree(hint);
  35415. + unlock_page(page);
  35416. + reiser4_txn_restart(ctx);
  35417. + reiser4_exit_context(ctx);
  35418. + return result;
  35419. + }
  35420. +
  35421. + /*
  35422. + * item corresponding to page is found. It can not be removed because
  35423. + * znode lock is held
  35424. + */
  35425. + if (PageUptodate(page)) {
  35426. + done_lh(lh);
  35427. + kfree(hint);
  35428. + unlock_page(page);
  35429. + reiser4_txn_restart(ctx);
  35430. + reiser4_exit_context(ctx);
  35431. + return 0;
  35432. + }
  35433. +
  35434. + coord = &hint->ext_coord.coord;
  35435. + result = zload(coord->node);
  35436. + if (result) {
  35437. + done_lh(lh);
  35438. + kfree(hint);
  35439. + unlock_page(page);
  35440. + reiser4_txn_restart(ctx);
  35441. + reiser4_exit_context(ctx);
  35442. + return result;
  35443. + }
  35444. +
  35445. + validate_extended_coord(&hint->ext_coord, page_offset(page));
  35446. +
  35447. + if (!coord_is_existing_unit(coord)) {
  35448. + /* this indicates corruption */
  35449. + warning("vs-280",
  35450. + "Looking for page %lu of file %llu (size %lli). "
  35451. + "No file items found (%d). File is corrupted?\n",
  35452. + page->index, (unsigned long long)get_inode_oid(inode),
  35453. + inode->i_size, result);
  35454. + zrelse(coord->node);
  35455. + done_lh(lh);
  35456. + kfree(hint);
  35457. + unlock_page(page);
  35458. + reiser4_txn_restart(ctx);
  35459. + reiser4_exit_context(ctx);
  35460. + return RETERR(-EIO);
  35461. + }
  35462. +
  35463. + /*
  35464. + * get plugin of found item or use plugin if extent if there are no
  35465. + * one
  35466. + */
  35467. + iplug = item_plugin_by_coord(coord);
  35468. + if (iplug->s.file.readpage)
  35469. + result = iplug->s.file.readpage(coord, page);
  35470. + else
  35471. + result = RETERR(-EINVAL);
  35472. +
  35473. + if (!result) {
  35474. + set_key_offset(&key,
  35475. + (loff_t) (page->index + 1) << PAGE_SHIFT);
  35476. + /* FIXME should call reiser4_set_hint() */
  35477. + reiser4_unset_hint(hint);
  35478. + } else {
  35479. + unlock_page(page);
  35480. + reiser4_unset_hint(hint);
  35481. + }
  35482. + assert("vs-979",
  35483. + ergo(result == 0, (PageLocked(page) || PageUptodate(page))));
  35484. + assert("vs-9791", ergo(result != 0, !PageLocked(page)));
  35485. +
  35486. + zrelse(coord->node);
  35487. + done_lh(lh);
  35488. +
  35489. + save_file_hint(file, hint);
  35490. + kfree(hint);
  35491. +
  35492. + /*
  35493. + * FIXME: explain why it is needed. HINT: page allocation in write can
  35494. + * not be done when atom is not NULL because reiser4_writepage can not
  35495. + * kick entd and have to eflush
  35496. + */
  35497. + reiser4_txn_restart(ctx);
  35498. + reiser4_exit_context(ctx);
  35499. + return result;
  35500. +}
  35501. +
  35502. +struct uf_readpages_context {
  35503. + lock_handle lh;
  35504. + coord_t coord;
  35505. +};
  35506. +
  35507. +/*
  35508. + * A callback function for readpages_unix_file/read_cache_pages.
  35509. + * We don't take non-exclusive access. If an item different from
  35510. + * extent pointer is found in some iteration, then return error
  35511. + * (-EINVAL).
  35512. + *
  35513. + * @data -- a pointer to reiser4_readpages_context object,
  35514. + * to save the twig lock and the coord between
  35515. + * read_cache_page iterations.
  35516. + * @page -- page to start read.
  35517. + */
  35518. +static int readpages_filler(void * data, struct page * page)
  35519. +{
  35520. + struct uf_readpages_context *rc = data;
  35521. + jnode * node;
  35522. + int ret = 0;
  35523. + reiser4_extent *ext;
  35524. + __u64 ext_index;
  35525. + int cbk_done = 0;
  35526. + struct address_space *mapping = page->mapping;
  35527. +
  35528. + if (PageUptodate(page)) {
  35529. + unlock_page(page);
  35530. + return 0;
  35531. + }
  35532. + get_page(page);
  35533. +
  35534. + if (rc->lh.node == 0) {
  35535. + /* no twig lock - have to do tree search. */
  35536. + reiser4_key key;
  35537. + repeat:
  35538. + unlock_page(page);
  35539. + key_by_inode_and_offset_common(
  35540. + mapping->host, page_offset(page), &key);
  35541. + ret = coord_by_key(
  35542. + &get_super_private(mapping->host->i_sb)->tree,
  35543. + &key, &rc->coord, &rc->lh,
  35544. + ZNODE_READ_LOCK, FIND_EXACT,
  35545. + TWIG_LEVEL, TWIG_LEVEL, CBK_UNIQUE, NULL);
  35546. + if (unlikely(ret))
  35547. + goto exit;
  35548. + lock_page(page);
  35549. + if (PageUptodate(page))
  35550. + goto unlock;
  35551. + cbk_done = 1;
  35552. + }
  35553. + ret = zload(rc->coord.node);
  35554. + if (unlikely(ret))
  35555. + goto unlock;
  35556. + if (!coord_is_existing_item(&rc->coord)) {
  35557. + zrelse(rc->coord.node);
  35558. + ret = RETERR(-ENOENT);
  35559. + goto unlock;
  35560. + }
  35561. + if (!item_is_extent(&rc->coord)) {
  35562. + /*
  35563. + * ->readpages() is not
  35564. + * defined for tail items
  35565. + */
  35566. + zrelse(rc->coord.node);
  35567. + ret = RETERR(-EINVAL);
  35568. + goto unlock;
  35569. + }
  35570. + ext = extent_by_coord(&rc->coord);
  35571. + ext_index = extent_unit_index(&rc->coord);
  35572. + if (page->index < ext_index ||
  35573. + page->index >= ext_index + extent_get_width(ext)) {
  35574. + /* the page index doesn't belong to the extent unit
  35575. + which the coord points to - release the lock and
  35576. + repeat with tree search. */
  35577. + zrelse(rc->coord.node);
  35578. + done_lh(&rc->lh);
  35579. + /* we can be here after a CBK call only in case of
  35580. + corruption of the tree or the tree lookup algorithm bug. */
  35581. + if (unlikely(cbk_done)) {
  35582. + ret = RETERR(-EIO);
  35583. + goto unlock;
  35584. + }
  35585. + goto repeat;
  35586. + }
  35587. + node = jnode_of_page(page);
  35588. + if (unlikely(IS_ERR(node))) {
  35589. + zrelse(rc->coord.node);
  35590. + ret = PTR_ERR(node);
  35591. + goto unlock;
  35592. + }
  35593. + ret = reiser4_do_readpage_extent(ext, page->index - ext_index, page);
  35594. + jput(node);
  35595. + zrelse(rc->coord.node);
  35596. + if (likely(!ret))
  35597. + goto exit;
  35598. + unlock:
  35599. + unlock_page(page);
  35600. + exit:
  35601. + put_page(page);
  35602. + return ret;
  35603. +}
  35604. +
  35605. +/**
  35606. + * readpages_unix_file - called by the readahead code, starts reading for each
  35607. + * page of given list of pages
  35608. + */
  35609. +int readpages_unix_file(struct file *file, struct address_space *mapping,
  35610. + struct list_head *pages, unsigned nr_pages)
  35611. +{
  35612. + reiser4_context *ctx;
  35613. + struct uf_readpages_context rc;
  35614. + int ret;
  35615. +
  35616. + ctx = reiser4_init_context(mapping->host->i_sb);
  35617. + if (IS_ERR(ctx)) {
  35618. + put_pages_list(pages);
  35619. + return PTR_ERR(ctx);
  35620. + }
  35621. + init_lh(&rc.lh);
  35622. + ret = read_cache_pages(mapping, pages, readpages_filler, &rc);
  35623. + done_lh(&rc.lh);
  35624. +
  35625. + context_set_commit_async(ctx);
  35626. + /* close the transaction to protect further page allocation from deadlocks */
  35627. + reiser4_txn_restart(ctx);
  35628. + reiser4_exit_context(ctx);
  35629. + return ret;
  35630. +}
  35631. +
  35632. +static reiser4_block_nr unix_file_estimate_read(struct inode *inode,
  35633. + loff_t count UNUSED_ARG)
  35634. +{
  35635. + /* We should reserve one block, because of updating of the stat data
  35636. + item */
  35637. + assert("vs-1249",
  35638. + inode_file_plugin(inode)->estimate.update ==
  35639. + estimate_update_common);
  35640. + return estimate_update_common(inode);
  35641. +}
  35642. +
  35643. +/**
  35644. + * In a sequential manner find corresponding items in the tree
  35645. + * and read against them.
  35646. + * This is called with nonexclusive access obtained, so that
  35647. + * file's container can not change
  35648. + */
  35649. +static ssize_t do_read_compound_file(hint_t *hint,
  35650. + struct kiocb *iocb,
  35651. + struct iov_iter *iter)
  35652. +{
  35653. + struct inode *inode = file_inode(iocb->ki_filp);
  35654. + size_t count = iov_iter_count(iter);
  35655. + coord_t *coord;
  35656. + znode *loaded;
  35657. + int result;
  35658. + flow_t flow;
  35659. +
  35660. + result = flow_by_inode_unix_file(inode, NULL,
  35661. + 1 /* user space */, count,
  35662. + iocb->ki_pos, READ_OP, &flow);
  35663. + if (unlikely(result))
  35664. + return result;
  35665. + /*
  35666. + * get seal and coord sealed with it from reiser4 private data
  35667. + * of struct file. The coord will tell us where our last read
  35668. + * of this file finished, and the seal will help to determine
  35669. + * if that location is still valid.
  35670. + */
  35671. + coord = &hint->ext_coord.coord;
  35672. + while (flow.length && result == 0) {
  35673. + result = find_file_item(hint, &flow.key,
  35674. + ZNODE_READ_LOCK, inode);
  35675. + if (cbk_errored(result))
  35676. + /* error happened */
  35677. + break;
  35678. +
  35679. + if (coord->between != AT_UNIT) {
  35680. + /*
  35681. + * there were no items corresponding to given offset
  35682. + */
  35683. + done_lh(hint->ext_coord.lh);
  35684. + break;
  35685. + }
  35686. + loaded = coord->node;
  35687. + result = zload(loaded);
  35688. + if (unlikely(result)) {
  35689. + done_lh(hint->ext_coord.lh);
  35690. + break;
  35691. + }
  35692. + if (hint->ext_coord.valid == 0)
  35693. + validate_extended_coord(&hint->ext_coord,
  35694. + get_key_offset(&flow.key));
  35695. +
  35696. + assert("vs-4", hint->ext_coord.valid == 1);
  35697. + assert("vs-33", hint->ext_coord.lh == &hint->lh);
  35698. + /* call item's read method */
  35699. + result = item_plugin_by_coord(coord)->s.file.read(&flow, hint,
  35700. + iocb, iter);
  35701. + zrelse(loaded);
  35702. + done_lh(hint->ext_coord.lh);
  35703. + }
  35704. + return (count - flow.length) ? (count - flow.length) : result;
  35705. +}
  35706. +
  35707. +static ssize_t read_compound_file(struct kiocb *iocb, struct iov_iter *iter);
  35708. +
  35709. +/**
  35710. + * unix-file specific ->read() method
  35711. + * of struct file_operations.
  35712. + */
  35713. +ssize_t read_unix_file(struct kiocb *iocb, struct iov_iter *iter)
  35714. +{
  35715. + struct file *file = iocb->ki_filp;
  35716. + reiser4_context *ctx;
  35717. + ssize_t result;
  35718. + struct inode *inode;
  35719. + struct unix_file_info *uf_info;
  35720. +
  35721. + if (unlikely(iov_iter_count(iter) == 0))
  35722. + return 0;
  35723. +
  35724. + inode = file_inode(file);
  35725. + assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  35726. +
  35727. + ctx = reiser4_init_context(inode->i_sb);
  35728. + if (IS_ERR(ctx))
  35729. + return PTR_ERR(ctx);
  35730. +
  35731. + result = reiser4_grab_space_force(unix_file_estimate_read(inode,
  35732. + iov_iter_count(iter)), BA_CAN_COMMIT);
  35733. + if (unlikely(result != 0))
  35734. + goto out2;
  35735. +
  35736. + uf_info = unix_file_inode_data(inode);
  35737. +
  35738. + if (uf_info->container == UF_CONTAINER_UNKNOWN) {
  35739. + get_exclusive_access(uf_info);
  35740. + result = find_file_state(inode, uf_info);
  35741. + if (unlikely(result != 0))
  35742. + goto out;
  35743. + }
  35744. + else
  35745. + get_nonexclusive_access(uf_info);
  35746. +
  35747. + switch (uf_info->container) {
  35748. + case UF_CONTAINER_EXTENTS:
  35749. + if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
  35750. + result = generic_file_read_iter(iocb, iter);
  35751. + break;
  35752. + }
  35753. + fallthrough;
  35754. + case UF_CONTAINER_TAILS:
  35755. + case UF_CONTAINER_UNKNOWN:
  35756. + result = read_compound_file(iocb, iter);
  35757. + break;
  35758. + case UF_CONTAINER_EMPTY:
  35759. + result = 0;
  35760. + }
  35761. + out:
  35762. + drop_access(uf_info);
  35763. + out2:
  35764. + context_set_commit_async(ctx);
  35765. + reiser4_exit_context(ctx);
  35766. + return result;
  35767. +}
  35768. +
  35769. +/*
  35770. + * Read a file, which contains tails and, maybe,
  35771. + * extents.
  35772. + *
  35773. + * Sometimes file can consist of items of both types
  35774. + * (extents and tails). It can happen, e.g. because
  35775. + * of failed tail conversion. Also the conversion code
  35776. + * may release exclusive lock before calling
  35777. + * balance_dirty_pages().
  35778. + *
  35779. + * In this case applying a generic VFS library function
  35780. + * would be suboptimal. We use our own "light-weigth"
  35781. + * version below.
  35782. + */
  35783. +static ssize_t read_compound_file(struct kiocb *iocb, struct iov_iter *iter)
  35784. +{
  35785. + ssize_t result = 0;
  35786. + struct inode *inode = file_inode(iocb->ki_filp);
  35787. + loff_t i_size = i_size_read(inode);
  35788. + loff_t off = iocb->ki_pos;
  35789. + hint_t *hint;
  35790. +
  35791. + assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  35792. +
  35793. + if (off >= i_size)
  35794. + /* position to read from is past the end of file */
  35795. + return 0;
  35796. + iov_iter_truncate(iter, i_size - off);
  35797. +
  35798. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  35799. + if (hint == NULL)
  35800. + return RETERR(-ENOMEM);
  35801. +
  35802. + result = load_file_hint(iocb->ki_filp, hint);
  35803. + if (result) {
  35804. + kfree(hint);
  35805. + return result;
  35806. + }
  35807. + result = do_read_compound_file(hint, iocb, iter);
  35808. +
  35809. + done_lh(&hint->lh);
  35810. + save_file_hint(iocb->ki_filp, hint);
  35811. + kfree(hint);
  35812. + if (result > 0)
  35813. + file_accessed(iocb->ki_filp);
  35814. +
  35815. + return result;
  35816. +}
  35817. +
  35818. +/* This function takes care about @file's pages. First of all it checks if
  35819. + filesystems readonly and if so gets out. Otherwise, it throws out all
  35820. + pages of file if it was mapped for read and going to be mapped for write
  35821. + and consists of tails. This is done in order to not manage few copies
  35822. + of the data (first in page cache and second one in tails them selves)
  35823. + for the case of mapping files consisting tails.
  35824. +
  35825. + Here also tail2extent conversion is performed if it is allowed and file
  35826. + is going to be written or mapped for write. This functions may be called
  35827. + from write_unix_file() or mmap_unix_file(). */
  35828. +static int check_pages_unix_file(struct file *file, struct inode *inode)
  35829. +{
  35830. + reiser4_invalidate_pages(inode->i_mapping, 0,
  35831. + (inode->i_size + PAGE_SIZE -
  35832. + 1) >> PAGE_SHIFT, 0);
  35833. + return unpack(file, inode, 0 /* not forever */ );
  35834. +}
  35835. +
  35836. +/**
  35837. + * mmap_unix_file - mmap of struct file_operations
  35838. + * @file: file to mmap
  35839. + * @vma:
  35840. + *
  35841. + * This is implementation of vfs's mmap method of struct file_operations for
  35842. + * unix file plugin. It converts file to extent if necessary. Sets
  35843. + * reiser4_inode's flag - REISER4_HAS_MMAP.
  35844. + */
  35845. +int mmap_unix_file(struct file *file, struct vm_area_struct *vma)
  35846. +{
  35847. + reiser4_context *ctx;
  35848. + int result;
  35849. + struct inode *inode;
  35850. + struct unix_file_info *uf_info;
  35851. + reiser4_block_nr needed;
  35852. +
  35853. + inode = file_inode(file);
  35854. + ctx = reiser4_init_context(inode->i_sb);
  35855. + if (IS_ERR(ctx))
  35856. + return PTR_ERR(ctx);
  35857. +
  35858. + uf_info = unix_file_inode_data(inode);
  35859. +
  35860. + get_exclusive_access_careful(uf_info, inode);
  35861. +
  35862. + if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) {
  35863. + /*
  35864. + * we need file built of extent items. If it is still built of
  35865. + * tail items we have to convert it. Find what items the file
  35866. + * is built of
  35867. + */
  35868. + result = find_file_state(inode, uf_info);
  35869. + if (result != 0) {
  35870. + drop_exclusive_access(uf_info);
  35871. + reiser4_exit_context(ctx);
  35872. + return result;
  35873. + }
  35874. +
  35875. + assert("vs-1648", (uf_info->container == UF_CONTAINER_TAILS ||
  35876. + uf_info->container == UF_CONTAINER_EXTENTS ||
  35877. + uf_info->container == UF_CONTAINER_EMPTY));
  35878. + if (uf_info->container == UF_CONTAINER_TAILS) {
  35879. + /*
  35880. + * invalidate all pages and convert file from tails to
  35881. + * extents
  35882. + */
  35883. + result = check_pages_unix_file(file, inode);
  35884. + if (result) {
  35885. + drop_exclusive_access(uf_info);
  35886. + reiser4_exit_context(ctx);
  35887. + return result;
  35888. + }
  35889. + }
  35890. + }
  35891. +
  35892. + /*
  35893. + * generic_file_mmap will do update_atime. Grab space for stat data
  35894. + * update.
  35895. + */
  35896. + needed = inode_file_plugin(inode)->estimate.update(inode);
  35897. + result = reiser4_grab_space_force(needed, BA_CAN_COMMIT);
  35898. + if (result) {
  35899. + drop_exclusive_access(uf_info);
  35900. + reiser4_exit_context(ctx);
  35901. + return result;
  35902. + }
  35903. +
  35904. + result = generic_file_mmap(file, vma);
  35905. + if (result == 0) {
  35906. + /* mark file as having mapping. */
  35907. + reiser4_inode_set_flag(inode, REISER4_HAS_MMAP);
  35908. + }
  35909. +
  35910. + drop_exclusive_access(uf_info);
  35911. + reiser4_exit_context(ctx);
  35912. + return result;
  35913. +}
  35914. +
  35915. +/**
  35916. + * find_first_item
  35917. + * @inode:
  35918. + *
  35919. + * Finds file item which is responsible for first byte in the file.
  35920. + */
  35921. +static int find_first_item(struct inode *inode)
  35922. +{
  35923. + coord_t coord;
  35924. + lock_handle lh;
  35925. + reiser4_key key;
  35926. + int result;
  35927. +
  35928. + coord_init_zero(&coord);
  35929. + init_lh(&lh);
  35930. + inode_file_plugin(inode)->key_by_inode(inode, 0, &key);
  35931. + result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK,
  35932. + inode);
  35933. + if (result == CBK_COORD_FOUND) {
  35934. + if (coord.between == AT_UNIT) {
  35935. + result = zload(coord.node);
  35936. + if (result == 0) {
  35937. + result = item_id_by_coord(&coord);
  35938. + zrelse(coord.node);
  35939. + if (result != EXTENT_POINTER_ID &&
  35940. + result != FORMATTING_ID)
  35941. + result = RETERR(-EIO);
  35942. + }
  35943. + } else
  35944. + result = RETERR(-EIO);
  35945. + }
  35946. + done_lh(&lh);
  35947. + return result;
  35948. +}
  35949. +
  35950. +/**
  35951. + * open_unix_file
  35952. + * @inode:
  35953. + * @file:
  35954. + *
  35955. + * If filesystem is not readonly - complete uncompleted tail conversion if
  35956. + * there was one
  35957. + */
  35958. +int open_unix_file(struct inode *inode, struct file *file)
  35959. +{
  35960. + int result;
  35961. + reiser4_context *ctx;
  35962. + struct unix_file_info *uf_info;
  35963. +
  35964. + if (IS_RDONLY(inode))
  35965. + return 0;
  35966. +
  35967. + if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED))
  35968. + return 0;
  35969. +
  35970. + ctx = reiser4_init_context(inode->i_sb);
  35971. + if (IS_ERR(ctx))
  35972. + return PTR_ERR(ctx);
  35973. +
  35974. + uf_info = unix_file_inode_data(inode);
  35975. +
  35976. + get_exclusive_access_careful(uf_info, inode);
  35977. +
  35978. + if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
  35979. + /*
  35980. + * other process completed the conversion
  35981. + */
  35982. + drop_exclusive_access(uf_info);
  35983. + reiser4_exit_context(ctx);
  35984. + return 0;
  35985. + }
  35986. +
  35987. + /*
  35988. + * file left in semi converted state after unclean shutdown or another
  35989. + * thread is doing conversion and dropped exclusive access which doing
  35990. + * balance dirty pages. Complete the conversion
  35991. + */
  35992. + result = find_first_item(inode);
  35993. + if (result == EXTENT_POINTER_ID)
  35994. + /*
  35995. + * first item is extent, therefore there was incomplete
  35996. + * tail2extent conversion. Complete it
  35997. + */
  35998. + result = tail2extent(unix_file_inode_data(inode));
  35999. + else if (result == FORMATTING_ID)
  36000. + /*
  36001. + * first item is formatting item, therefore there was
  36002. + * incomplete extent2tail conversion. Complete it
  36003. + */
  36004. + result = extent2tail(file, unix_file_inode_data(inode));
  36005. + else
  36006. + result = -EIO;
  36007. +
  36008. + assert("vs-1712",
  36009. + ergo(result == 0,
  36010. + (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED) &&
  36011. + !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))));
  36012. + drop_exclusive_access(uf_info);
  36013. + reiser4_exit_context(ctx);
  36014. + return result;
  36015. +}
  36016. +
  36017. +#define NEITHER_OBTAINED 0
  36018. +#define EA_OBTAINED 1
  36019. +#define NEA_OBTAINED 2
  36020. +
  36021. +static void drop_access(struct unix_file_info *uf_info)
  36022. +{
  36023. + if (uf_info->exclusive_use)
  36024. + drop_exclusive_access(uf_info);
  36025. + else
  36026. + drop_nonexclusive_access(uf_info);
  36027. +}
  36028. +
  36029. +#define debug_wuf(format, ...) printk("%s: %d: %s: " format "\n", \
  36030. + __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
  36031. +
  36032. +/**
  36033. + * write_unix_file - private ->write() method of unix_file plugin.
  36034. + *
  36035. + * @file: file to write to
  36036. + * @buf: address of user-space buffer
  36037. + * @count: number of bytes to write
  36038. + * @pos: position in file to write to
  36039. + * @cont: unused argument, as we don't perform plugin conversion when being
  36040. + * managed by unix_file plugin.
  36041. + */
  36042. +ssize_t write_unix_file(struct file *file,
  36043. + const char __user *buf,
  36044. + size_t count, loff_t *pos,
  36045. + struct dispatch_context *cont)
  36046. +{
  36047. + int result;
  36048. + reiser4_context *ctx;
  36049. + struct inode *inode;
  36050. + struct unix_file_info *uf_info;
  36051. + ssize_t written;
  36052. + int to_write = PAGE_SIZE * WRITE_GRANULARITY;
  36053. + size_t left;
  36054. + ssize_t (*write_op)(struct file *, struct inode *,
  36055. + const char __user *, size_t,
  36056. + loff_t *pos);
  36057. + int ea;
  36058. + int enospc = 0; /* item plugin ->write() returned ENOSPC */
  36059. + loff_t new_size;
  36060. +
  36061. + ctx = get_current_context();
  36062. + inode = file_inode(file);
  36063. +
  36064. + assert("vs-947", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  36065. + assert("vs-9471", (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)));
  36066. +
  36067. + result = file_remove_privs(file);
  36068. + if (result) {
  36069. + context_set_commit_async(ctx);
  36070. + return result;
  36071. + }
  36072. + /* remove_suid might create a transaction */
  36073. + reiser4_txn_restart(ctx);
  36074. +
  36075. + uf_info = unix_file_inode_data(inode);
  36076. +
  36077. + written = 0;
  36078. + left = count;
  36079. + ea = NEITHER_OBTAINED;
  36080. + enospc = 0;
  36081. +
  36082. + new_size = i_size_read(inode);
  36083. + if (*pos + count > new_size)
  36084. + new_size = *pos + count;
  36085. +
  36086. + while (left) {
  36087. + int update_sd = 0;
  36088. + if (left < to_write)
  36089. + to_write = left;
  36090. +
  36091. + if (uf_info->container == UF_CONTAINER_EMPTY) {
  36092. + get_exclusive_access(uf_info);
  36093. + ea = EA_OBTAINED;
  36094. + if (uf_info->container != UF_CONTAINER_EMPTY) {
  36095. + /* file is made not empty by another process */
  36096. + drop_exclusive_access(uf_info);
  36097. + ea = NEITHER_OBTAINED;
  36098. + continue;
  36099. + }
  36100. + } else if (uf_info->container == UF_CONTAINER_UNKNOWN) {
  36101. + /*
  36102. + * get exclusive access directly just to not have to
  36103. + * re-obtain it if file will appear empty
  36104. + */
  36105. + get_exclusive_access(uf_info);
  36106. + ea = EA_OBTAINED;
  36107. + result = find_file_state(inode, uf_info);
  36108. + if (result) {
  36109. + drop_exclusive_access(uf_info);
  36110. + ea = NEITHER_OBTAINED;
  36111. + break;
  36112. + }
  36113. + } else {
  36114. + get_nonexclusive_access(uf_info);
  36115. + ea = NEA_OBTAINED;
  36116. + }
  36117. +
  36118. + /* either EA or NEA is obtained. Choose item write method */
  36119. + if (uf_info->container == UF_CONTAINER_EXTENTS) {
  36120. + /* file is built of extent items */
  36121. + write_op = reiser4_write_extent;
  36122. + } else if (uf_info->container == UF_CONTAINER_EMPTY) {
  36123. + /* file is empty */
  36124. + if (should_have_notail(uf_info, new_size))
  36125. + write_op = reiser4_write_extent;
  36126. + else
  36127. + write_op = reiser4_write_tail;
  36128. + } else {
  36129. + /* file is built of tail items */
  36130. + if (should_have_notail(uf_info, new_size)) {
  36131. + if (ea == NEA_OBTAINED) {
  36132. + drop_nonexclusive_access(uf_info);
  36133. + get_exclusive_access(uf_info);
  36134. + ea = EA_OBTAINED;
  36135. + }
  36136. + if (uf_info->container == UF_CONTAINER_TAILS) {
  36137. + /*
  36138. + * if file is being convered by another
  36139. + * process - wait until it completes
  36140. + */
  36141. + while (1) {
  36142. + if (reiser4_inode_get_flag(inode,
  36143. + REISER4_PART_IN_CONV)) {
  36144. + drop_exclusive_access(uf_info);
  36145. + schedule();
  36146. + get_exclusive_access(uf_info);
  36147. + continue;
  36148. + }
  36149. + break;
  36150. + }
  36151. + if (uf_info->container == UF_CONTAINER_TAILS) {
  36152. + result = tail2extent(uf_info);
  36153. + if (result) {
  36154. + drop_exclusive_access(uf_info);
  36155. + context_set_commit_async(ctx);
  36156. + break;
  36157. + }
  36158. + }
  36159. + }
  36160. + drop_exclusive_access(uf_info);
  36161. + ea = NEITHER_OBTAINED;
  36162. + continue;
  36163. + }
  36164. + write_op = reiser4_write_tail;
  36165. + }
  36166. +
  36167. + written = write_op(file, inode, buf, to_write, pos);
  36168. + if (written == -ENOSPC && !enospc) {
  36169. + drop_access(uf_info);
  36170. + txnmgr_force_commit_all(inode->i_sb, 0);
  36171. + enospc = 1;
  36172. + continue;
  36173. + }
  36174. + if (written < 0) {
  36175. + /*
  36176. + * If this is -ENOSPC, then it happened
  36177. + * second time, so don't try to free space
  36178. + * once again.
  36179. + */
  36180. + drop_access(uf_info);
  36181. + result = written;
  36182. + break;
  36183. + }
  36184. + /* something is written. */
  36185. + if (enospc)
  36186. + enospc = 0;
  36187. + if (uf_info->container == UF_CONTAINER_EMPTY) {
  36188. + assert("edward-1553", ea == EA_OBTAINED);
  36189. + uf_info->container =
  36190. + (write_op == reiser4_write_extent) ?
  36191. + UF_CONTAINER_EXTENTS : UF_CONTAINER_TAILS;
  36192. + }
  36193. + assert("edward-1554",
  36194. + ergo(uf_info->container == UF_CONTAINER_EXTENTS,
  36195. + write_op == reiser4_write_extent));
  36196. + assert("edward-1555",
  36197. + ergo(uf_info->container == UF_CONTAINER_TAILS,
  36198. + write_op == reiser4_write_tail));
  36199. + if (*pos + written > inode->i_size) {
  36200. + INODE_SET_FIELD(inode, i_size, *pos + written);
  36201. + update_sd = 1;
  36202. + }
  36203. + if (!IS_NOCMTIME(inode)) {
  36204. + inode->i_ctime = inode->i_mtime = current_time(inode);
  36205. + update_sd = 1;
  36206. + }
  36207. + if (update_sd) {
  36208. + /*
  36209. + * space for update_sd was reserved in write_op
  36210. + */
  36211. + result = reiser4_update_sd(inode);
  36212. + if (result) {
  36213. + warning("edward-1574",
  36214. + "Can not update stat-data: %i. FSCK?",
  36215. + result);
  36216. + drop_access(uf_info);
  36217. + context_set_commit_async(ctx);
  36218. + break;
  36219. + }
  36220. + }
  36221. + drop_access(uf_info);
  36222. + ea = NEITHER_OBTAINED;
  36223. +
  36224. + /*
  36225. + * tell VM how many pages were dirtied. Maybe number of pages
  36226. + * which were dirty already should not be counted
  36227. + */
  36228. + reiser4_throttle_write(inode);
  36229. + left -= written;
  36230. + buf += written;
  36231. + *pos += written;
  36232. + }
  36233. + if (result == 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
  36234. + reiser4_txn_restart_current();
  36235. + grab_space_enable();
  36236. + result = reiser4_sync_file_common(file, 0, LONG_MAX,
  36237. + 0 /* data and stat data */);
  36238. + if (result)
  36239. + warning("reiser4-7", "failed to sync file %llu",
  36240. + (unsigned long long)get_inode_oid(inode));
  36241. + }
  36242. + /*
  36243. + * return number of written bytes or error code if nothing is
  36244. + * written. Note, that it does not work correctly in case when
  36245. + * sync_unix_file returns error
  36246. + */
  36247. + return (count - left) ? (count - left) : result;
  36248. +}
  36249. +
  36250. +/**
  36251. + * release_unix_file - release of struct file_operations
  36252. + * @inode: inode of released file
  36253. + * @file: file to release
  36254. + *
  36255. + * Implementation of release method of struct file_operations for unix file
  36256. + * plugin. If last reference to indode is released - convert all extent items
  36257. + * into tail items if necessary. Frees reiser4 specific file data.
  36258. + */
  36259. +int release_unix_file(struct inode *inode, struct file *file)
  36260. +{
  36261. + reiser4_context *ctx;
  36262. + struct unix_file_info *uf_info;
  36263. + int result;
  36264. + int in_reiser4;
  36265. +
  36266. + in_reiser4 = is_in_reiser4_context();
  36267. +
  36268. + ctx = reiser4_init_context(inode->i_sb);
  36269. + if (IS_ERR(ctx))
  36270. + return PTR_ERR(ctx);
  36271. +
  36272. + result = 0;
  36273. + if (in_reiser4 == 0) {
  36274. + uf_info = unix_file_inode_data(inode);
  36275. +
  36276. + get_exclusive_access_careful(uf_info, inode);
  36277. + if (file->f_path.dentry->d_lockref.count == 1 &&
  36278. + uf_info->container == UF_CONTAINER_EXTENTS &&
  36279. + !should_have_notail(uf_info, inode->i_size) &&
  36280. + !IS_RDONLY(inode)) {
  36281. + result = extent2tail(file, uf_info);
  36282. + if (result != 0) {
  36283. + context_set_commit_async(ctx);
  36284. + warning("nikita-3233",
  36285. + "Failed (%d) to convert in %s (%llu)",
  36286. + result, __FUNCTION__,
  36287. + (unsigned long long)
  36288. + get_inode_oid(inode));
  36289. + }
  36290. + }
  36291. + drop_exclusive_access(uf_info);
  36292. + } else {
  36293. + /*
  36294. + we are within reiser4 context already. How latter is
  36295. + possible? Simple:
  36296. +
  36297. + (gdb) bt
  36298. + #0 get_exclusive_access ()
  36299. + #2 0xc01e56d3 in release_unix_file ()
  36300. + #3 0xc01c3643 in reiser4_release ()
  36301. + #4 0xc014cae0 in __fput ()
  36302. + #5 0xc013ffc3 in remove_vm_struct ()
  36303. + #6 0xc0141786 in exit_mmap ()
  36304. + #7 0xc0118480 in mmput ()
  36305. + #8 0xc0133205 in oom_kill ()
  36306. + #9 0xc01332d1 in out_of_memory ()
  36307. + #10 0xc013bc1d in try_to_free_pages ()
  36308. + #11 0xc013427b in __alloc_pages ()
  36309. + #12 0xc013f058 in do_anonymous_page ()
  36310. + #13 0xc013f19d in do_no_page ()
  36311. + #14 0xc013f60e in handle_mm_fault ()
  36312. + #15 0xc01131e5 in do_page_fault ()
  36313. + #16 0xc0104935 in error_code ()
  36314. + #17 0xc025c0c6 in __copy_to_user_ll ()
  36315. + #18 0xc01d496f in reiser4_read_tail ()
  36316. + #19 0xc01e4def in read_unix_file ()
  36317. + #20 0xc01c3504 in reiser4_read ()
  36318. + #21 0xc014bd4f in vfs_read ()
  36319. + #22 0xc014bf66 in sys_read ()
  36320. + */
  36321. + warning("vs-44", "out of memory?");
  36322. + }
  36323. +
  36324. + reiser4_free_file_fsdata(file);
  36325. +
  36326. + reiser4_exit_context(ctx);
  36327. + return result;
  36328. +}
  36329. +
  36330. +static void set_file_notail(struct inode *inode)
  36331. +{
  36332. + reiser4_inode *state;
  36333. + formatting_plugin *tplug;
  36334. +
  36335. + state = reiser4_inode_data(inode);
  36336. + tplug = formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID);
  36337. + force_plugin_pset(inode, PSET_FORMATTING, (reiser4_plugin *)tplug);
  36338. +}
  36339. +
  36340. +/* if file is built of tails - convert it to extents */
  36341. +static int unpack(struct file *filp, struct inode *inode, int forever)
  36342. +{
  36343. + int result = 0;
  36344. + struct unix_file_info *uf_info;
  36345. +
  36346. + uf_info = unix_file_inode_data(inode);
  36347. + assert("vs-1628", ea_obtained(uf_info));
  36348. +
  36349. + result = find_file_state(inode, uf_info);
  36350. + if (result)
  36351. + return result;
  36352. + assert("vs-1074", uf_info->container != UF_CONTAINER_UNKNOWN);
  36353. +
  36354. + if (uf_info->container == UF_CONTAINER_TAILS) {
  36355. + /*
  36356. + * if file is being convered by another process - wait until it
  36357. + * completes
  36358. + */
  36359. + while (1) {
  36360. + if (reiser4_inode_get_flag(inode,
  36361. + REISER4_PART_IN_CONV)) {
  36362. + drop_exclusive_access(uf_info);
  36363. + schedule();
  36364. + get_exclusive_access(uf_info);
  36365. + continue;
  36366. + }
  36367. + break;
  36368. + }
  36369. + if (uf_info->container == UF_CONTAINER_TAILS) {
  36370. + result = tail2extent(uf_info);
  36371. + if (result)
  36372. + return result;
  36373. + }
  36374. + }
  36375. + if (forever) {
  36376. + /* safe new formatting plugin in stat data */
  36377. + __u64 tograb;
  36378. +
  36379. + set_file_notail(inode);
  36380. +
  36381. + grab_space_enable();
  36382. + tograb = inode_file_plugin(inode)->estimate.update(inode);
  36383. + result = reiser4_grab_space(tograb, BA_CAN_COMMIT);
  36384. + result = reiser4_update_sd(inode);
  36385. + }
  36386. +
  36387. + return result;
  36388. +}
  36389. +
  36390. +/* implentation of vfs' ioctl method of struct file_operations for unix file
  36391. + plugin
  36392. +*/
  36393. +int ioctl_unix_file(struct file *filp, unsigned int cmd,
  36394. + unsigned long arg UNUSED_ARG)
  36395. +{
  36396. + reiser4_context *ctx;
  36397. + int result;
  36398. + struct inode *inode = filp->f_path.dentry->d_inode;
  36399. +
  36400. + ctx = reiser4_init_context(inode->i_sb);
  36401. + if (IS_ERR(ctx))
  36402. + return PTR_ERR(ctx);
  36403. +
  36404. + switch (cmd) {
  36405. + case REISER4_IOC_UNPACK:
  36406. + get_exclusive_access(unix_file_inode_data(inode));
  36407. + result = unpack(filp, inode, 1 /* forever */ );
  36408. + drop_exclusive_access(unix_file_inode_data(inode));
  36409. + break;
  36410. +
  36411. + default:
  36412. + result = RETERR(-ENOTTY);
  36413. + break;
  36414. + }
  36415. + reiser4_exit_context(ctx);
  36416. + return result;
  36417. +}
  36418. +
  36419. +/* implentation of vfs' bmap method of struct address_space_operations for unix
  36420. + file plugin
  36421. +*/
  36422. +sector_t bmap_unix_file(struct address_space * mapping, sector_t lblock)
  36423. +{
  36424. + reiser4_context *ctx;
  36425. + sector_t result;
  36426. + reiser4_key key;
  36427. + coord_t coord;
  36428. + lock_handle lh;
  36429. + struct inode *inode;
  36430. + item_plugin *iplug;
  36431. + sector_t block;
  36432. +
  36433. + inode = mapping->host;
  36434. +
  36435. + ctx = reiser4_init_context(inode->i_sb);
  36436. + if (IS_ERR(ctx))
  36437. + return PTR_ERR(ctx);
  36438. + key_by_inode_and_offset_common(inode,
  36439. + (loff_t) lblock * current_blocksize,
  36440. + &key);
  36441. +
  36442. + init_lh(&lh);
  36443. + result =
  36444. + find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode);
  36445. + if (cbk_errored(result)) {
  36446. + done_lh(&lh);
  36447. + reiser4_exit_context(ctx);
  36448. + return result;
  36449. + }
  36450. +
  36451. + result = zload(coord.node);
  36452. + if (result) {
  36453. + done_lh(&lh);
  36454. + reiser4_exit_context(ctx);
  36455. + return result;
  36456. + }
  36457. +
  36458. + iplug = item_plugin_by_coord(&coord);
  36459. + if (iplug->s.file.get_block) {
  36460. + result = iplug->s.file.get_block(&coord, lblock, &block);
  36461. + if (result == 0)
  36462. + result = block;
  36463. + } else
  36464. + result = RETERR(-EINVAL);
  36465. +
  36466. + zrelse(coord.node);
  36467. + done_lh(&lh);
  36468. + reiser4_exit_context(ctx);
  36469. + return result;
  36470. +}
  36471. +
  36472. +/**
  36473. + * flow_by_inode_unix_file - initizlize structure flow
  36474. + * @inode: inode of file for which read or write is abou
  36475. + * @buf: buffer to perform read to or write from
  36476. + * @user: flag showing whether @buf is user space or kernel space
  36477. + * @size: size of buffer @buf
  36478. + * @off: start offset fro read or write
  36479. + * @op: READ or WRITE
  36480. + * @flow:
  36481. + *
  36482. + * Initializes fields of @flow: key, size of data, i/o mode (read or write).
  36483. + */
  36484. +int flow_by_inode_unix_file(struct inode *inode,
  36485. + const char __user *buf, int user,
  36486. + loff_t size, loff_t off,
  36487. + rw_op op, flow_t *flow)
  36488. +{
  36489. + assert("nikita-1100", inode != NULL);
  36490. +
  36491. + flow->length = size;
  36492. + memcpy(&flow->data, &buf, sizeof(buf));
  36493. + flow->user = user;
  36494. + flow->op = op;
  36495. + assert("nikita-1931", inode_file_plugin(inode) != NULL);
  36496. + assert("nikita-1932",
  36497. + inode_file_plugin(inode)->key_by_inode ==
  36498. + key_by_inode_and_offset_common);
  36499. + /* calculate key of write position and insert it into flow->key */
  36500. + return key_by_inode_and_offset_common(inode, off, &flow->key);
  36501. +}
  36502. +
  36503. +/* plugin->u.file.set_plug_in_sd = NULL
  36504. + plugin->u.file.set_plug_in_inode = NULL
  36505. + plugin->u.file.create_blank_sd = NULL */
  36506. +/* plugin->u.file.delete */
  36507. +/*
  36508. + plugin->u.file.add_link = reiser4_add_link_common
  36509. + plugin->u.file.rem_link = NULL */
  36510. +
  36511. +/* plugin->u.file.owns_item
  36512. + this is common_file_owns_item with assertion */
  36513. +/* Audited by: green(2002.06.15) */
  36514. +int
  36515. +owns_item_unix_file(const struct inode *inode /* object to check against */ ,
  36516. + const coord_t * coord /* coord to check */ )
  36517. +{
  36518. + int result;
  36519. +
  36520. + result = owns_item_common(inode, coord);
  36521. + if (!result)
  36522. + return 0;
  36523. + if (!plugin_of_group(item_plugin_by_coord(coord),
  36524. + UNIX_FILE_METADATA_ITEM_TYPE))
  36525. + return 0;
  36526. + assert("vs-547",
  36527. + item_id_by_coord(coord) == EXTENT_POINTER_ID ||
  36528. + item_id_by_coord(coord) == FORMATTING_ID);
  36529. + return 1;
  36530. +}
  36531. +
  36532. +static int setattr_truncate(struct inode *inode, struct iattr *attr)
  36533. +{
  36534. + int result;
  36535. + int s_result;
  36536. + loff_t old_size;
  36537. + reiser4_tree *tree;
  36538. +
  36539. + inode_check_scale(inode, inode->i_size, attr->ia_size);
  36540. +
  36541. + old_size = inode->i_size;
  36542. + tree = reiser4_tree_by_inode(inode);
  36543. +
  36544. + result = safe_link_grab(tree, BA_CAN_COMMIT);
  36545. + if (result == 0)
  36546. + result = safe_link_add(inode, SAFE_TRUNCATE);
  36547. + if (result == 0)
  36548. + result = truncate_file_body(inode, attr);
  36549. + if (result)
  36550. + warning("vs-1588", "truncate_file failed: oid %lli, "
  36551. + "old size %lld, new size %lld, retval %d",
  36552. + (unsigned long long)get_inode_oid(inode),
  36553. + old_size, attr->ia_size, result);
  36554. +
  36555. + s_result = safe_link_grab(tree, BA_CAN_COMMIT);
  36556. + if (s_result == 0)
  36557. + s_result =
  36558. + safe_link_del(tree, get_inode_oid(inode), SAFE_TRUNCATE);
  36559. + if (s_result != 0) {
  36560. + warning("nikita-3417", "Cannot kill safelink %lli: %i",
  36561. + (unsigned long long)get_inode_oid(inode), s_result);
  36562. + }
  36563. + safe_link_release(tree);
  36564. + return result;
  36565. +}
  36566. +
  36567. +/* plugin->u.file.setattr method */
  36568. +/* This calls inode_setattr and if truncate is in effect it also takes
  36569. + exclusive inode access to avoid races */
  36570. +int setattr_unix_file(struct dentry *dentry, /* Object to change attributes */
  36571. + struct iattr *attr /* change description */ )
  36572. +{
  36573. + int result;
  36574. +
  36575. + if (attr->ia_valid & ATTR_SIZE) {
  36576. + reiser4_context *ctx;
  36577. + struct unix_file_info *uf_info;
  36578. +
  36579. + /* truncate does reservation itself and requires exclusive
  36580. + access obtained */
  36581. + ctx = reiser4_init_context(dentry->d_inode->i_sb);
  36582. + if (IS_ERR(ctx))
  36583. + return PTR_ERR(ctx);
  36584. +
  36585. + uf_info = unix_file_inode_data(dentry->d_inode);
  36586. + get_exclusive_access_careful(uf_info, dentry->d_inode);
  36587. + result = setattr_truncate(dentry->d_inode, attr);
  36588. + drop_exclusive_access(uf_info);
  36589. + context_set_commit_async(ctx);
  36590. + reiser4_exit_context(ctx);
  36591. + } else
  36592. + result = reiser4_setattr_common(&init_user_ns, dentry, attr);
  36593. +
  36594. + return result;
  36595. +}
  36596. +
  36597. +/* plugin->u.file.init_inode_data */
  36598. +void
  36599. +init_inode_data_unix_file(struct inode *inode,
  36600. + reiser4_object_create_data * crd, int create)
  36601. +{
  36602. + struct unix_file_info *data;
  36603. +
  36604. + data = unix_file_inode_data(inode);
  36605. + data->container = create ? UF_CONTAINER_EMPTY : UF_CONTAINER_UNKNOWN;
  36606. + init_rwsem(&data->latch);
  36607. + data->tplug = inode_formatting_plugin(inode);
  36608. + data->exclusive_use = 0;
  36609. +
  36610. +#if REISER4_DEBUG
  36611. + data->ea_owner = NULL;
  36612. + atomic_set(&data->nr_neas, 0);
  36613. +#endif
  36614. + init_inode_ordering(inode, crd, create);
  36615. +}
  36616. +
  36617. +/**
  36618. + * delete_unix_file - delete_object of file_plugin
  36619. + * @inode: inode to be deleted
  36620. + *
  36621. + * Truncates file to length 0, removes stat data and safe link.
  36622. + */
  36623. +int delete_object_unix_file(struct inode *inode)
  36624. +{
  36625. + struct unix_file_info *uf_info;
  36626. + int result;
  36627. +
  36628. + if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
  36629. + return 0;
  36630. +
  36631. + /* truncate file bogy first */
  36632. + uf_info = unix_file_inode_data(inode);
  36633. + get_exclusive_access(uf_info);
  36634. + result = shorten_file(inode, 0 /* size */ );
  36635. + drop_exclusive_access(uf_info);
  36636. +
  36637. + if (result)
  36638. + warning("edward-1556",
  36639. + "failed to truncate file (%llu) on removal: %d",
  36640. + get_inode_oid(inode), result);
  36641. +
  36642. + /* remove stat data and safe link */
  36643. + return reiser4_delete_object_common(inode);
  36644. +}
  36645. +
  36646. +static int do_write_begin(struct file *file, struct page *page,
  36647. + loff_t pos, unsigned len)
  36648. +{
  36649. + int ret;
  36650. + if (len == PAGE_SIZE || PageUptodate(page))
  36651. + return 0;
  36652. +
  36653. + ret = readpage_unix_file(file, page);
  36654. + if (ret) {
  36655. + SetPageError(page);
  36656. + ClearPageUptodate(page);
  36657. + /* All reiser4 readpage() implementations should return the
  36658. + * page locked in case of error. */
  36659. + assert("nikita-3472", PageLocked(page));
  36660. + return ret;
  36661. + }
  36662. + /*
  36663. + * ->readpage() either:
  36664. + *
  36665. + * 1. starts IO against @page. @page is locked for IO in
  36666. + * this case.
  36667. + *
  36668. + * 2. doesn't start IO. @page is unlocked.
  36669. + *
  36670. + * In either case, page should be locked.
  36671. + */
  36672. + lock_page(page);
  36673. + /*
  36674. + * IO (if any) is completed at this point. Check for IO
  36675. + * errors.
  36676. + */
  36677. + if (!PageUptodate(page))
  36678. + return RETERR(-EIO);
  36679. + return ret;
  36680. +}
  36681. +
  36682. +/* plugin->write_begin() */
  36683. +int write_begin_unix_file(struct file *file, struct page *page,
  36684. + loff_t pos, unsigned len, void **fsdata)
  36685. +{
  36686. + int ret;
  36687. + struct inode * inode;
  36688. + struct unix_file_info *info;
  36689. +
  36690. + inode = file_inode(file);
  36691. + info = unix_file_inode_data(inode);
  36692. +
  36693. + ret = reiser4_grab_space_force(estimate_one_insert_into_item
  36694. + (reiser4_tree_by_inode(inode)),
  36695. + BA_CAN_COMMIT);
  36696. + if (ret)
  36697. + return ret;
  36698. + get_exclusive_access(info);
  36699. + ret = find_file_state(file_inode(file), info);
  36700. + if (unlikely(ret != 0)) {
  36701. + drop_exclusive_access(info);
  36702. + return ret;
  36703. + }
  36704. + if (info->container == UF_CONTAINER_TAILS) {
  36705. + ret = tail2extent(info);
  36706. + if (ret) {
  36707. + warning("edward-1575",
  36708. + "tail conversion failed: %d", ret);
  36709. + drop_exclusive_access(info);
  36710. + return ret;
  36711. + }
  36712. + }
  36713. + ret = do_write_begin(file, page, pos, len);
  36714. + if (unlikely(ret != 0))
  36715. + drop_exclusive_access(info);
  36716. + /* else exclusive access will be dropped in ->write_end() */
  36717. + return ret;
  36718. +}
  36719. +
  36720. +/* plugin->write_end() */
  36721. +int write_end_unix_file(struct file *file, struct page *page,
  36722. + loff_t pos, unsigned copied, void *fsdata)
  36723. +{
  36724. + int ret;
  36725. + struct inode *inode;
  36726. + struct unix_file_info *info;
  36727. +
  36728. + inode = file_inode(file);
  36729. + info = unix_file_inode_data(inode);
  36730. +
  36731. + unlock_page(page);
  36732. + ret = find_or_create_extent(page);
  36733. + if (ret) {
  36734. + SetPageError(page);
  36735. + goto exit;
  36736. + }
  36737. + if (pos + copied > inode->i_size) {
  36738. + INODE_SET_FIELD(inode, i_size, pos + copied);
  36739. + ret = reiser4_update_sd(inode);
  36740. + if (unlikely(ret != 0))
  36741. + warning("edward-1604",
  36742. + "Can not update stat-data: %i. FSCK?",
  36743. + ret);
  36744. + }
  36745. + exit:
  36746. + drop_exclusive_access(info);
  36747. + return ret;
  36748. +}
  36749. +
  36750. +/*
  36751. + * Local variables:
  36752. + * c-indentation-style: "K&R"
  36753. + * mode-name: "LC"
  36754. + * c-basic-offset: 8
  36755. + * tab-width: 8
  36756. + * fill-column: 79
  36757. + * scroll-step: 1
  36758. + * End:
  36759. + */
  36760. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/file_conversion.c linux-5.16.14/fs/reiser4/plugin/file/file_conversion.c
  36761. --- linux-5.16.14.orig/fs/reiser4/plugin/file/file_conversion.c 1970-01-01 01:00:00.000000000 +0100
  36762. +++ linux-5.16.14/fs/reiser4/plugin/file/file_conversion.c 2022-03-12 13:26:19.671892775 +0100
  36763. @@ -0,0 +1,762 @@
  36764. +/* Copyright 2001, 2002, 2003 by Hans Reiser,
  36765. + licensing governed by reiser4/README */
  36766. +
  36767. +/**
  36768. + * This file contains dispatching hooks, and conversion methods, which
  36769. + * implement transitions in the FILE interface.
  36770. + *
  36771. + * Dispatching hook makes a decision (at dispatching point) about the
  36772. + * most reasonable plugin. Such decision is made in accordance with some
  36773. + * O(1)-heuristic.
  36774. + *
  36775. + * We implement a transition CRYPTCOMPRESS -> UNIX_FILE for files with
  36776. + * incompressible data. Current heuristic to estimate compressibility is
  36777. + * very simple: if first complete logical cluster (64K by default) of a
  36778. + * file is incompressible, then we make a decision, that the whole file
  36779. + * is incompressible.
  36780. + *
  36781. + * To enable dispatching we install a special "magic" compression mode
  36782. + * plugin CONVX_COMPRESSION_MODE_ID at file creation time.
  36783. + *
  36784. + * Note, that we don't perform back conversion (UNIX_FILE->CRYPTCOMPRESS)
  36785. + * because of compatibility reasons).
  36786. + *
  36787. + * In conversion time we protect CS, the conversion set (file's (meta)data
  36788. + * and plugin table (pset)) via special per-inode rw-semaphore (conv_sem).
  36789. + * The methods which implement conversion are CS writers. The methods of FS
  36790. + * interface (file_operations, inode_operations, address_space_operations)
  36791. + * are CS readers.
  36792. + */
  36793. +
  36794. +#include <linux/uio.h>
  36795. +#include "../../inode.h"
  36796. +#include "../cluster.h"
  36797. +#include "file.h"
  36798. +
  36799. +#define conversion_enabled(inode) \
  36800. + (inode_compression_mode_plugin(inode) == \
  36801. + compression_mode_plugin_by_id(CONVX_COMPRESSION_MODE_ID))
  36802. +
  36803. +/**
  36804. + * Located sections (readers and writers of @pset) are not permanently
  36805. + * critical: cryptcompress file can be converted only if the conversion
  36806. + * is enabled (see the macrio above). Also we don't perform back
  36807. + * conversion. The following helper macro is a sanity check to decide
  36808. + * if we need the protection (locks are always additional overheads).
  36809. + */
  36810. +#define should_protect(inode) \
  36811. + (inode_file_plugin(inode) == \
  36812. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID) && \
  36813. + conversion_enabled(inode))
  36814. +/**
  36815. + * To avoid confusion with read/write file operations, we'll speak about
  36816. + * "passive" protection for FCS readers and "active" protection for FCS
  36817. + * writers. All methods with active or passive protection have suffix
  36818. + * "careful".
  36819. + */
  36820. +/**
  36821. + * Macros for passive protection.
  36822. + *
  36823. + * Construct invariant operation to be supplied to VFS.
  36824. + * The macro accepts the following lexemes:
  36825. + * @type - type of the value represented by the compound statement;
  36826. + * @method - name of an operation to be supplied to VFS (reiser4 file
  36827. + * plugin also should contain a method with such name).
  36828. + */
  36829. +#define PROT_PASSIVE(type, method, args) \
  36830. +({ \
  36831. + type _result; \
  36832. + struct rw_semaphore * guard = \
  36833. + &reiser4_inode_data(inode)->conv_sem; \
  36834. + \
  36835. + if (should_protect(inode)) { \
  36836. + down_read(guard); \
  36837. + if (!should_protect(inode)) \
  36838. + up_read(guard); \
  36839. + } \
  36840. + _result = inode_file_plugin(inode)->method args; \
  36841. + if (should_protect(inode)) \
  36842. + up_read(guard); \
  36843. + _result; \
  36844. +})
  36845. +
  36846. +#define PROT_PASSIVE_VOID(method, args) \
  36847. +({ \
  36848. + struct rw_semaphore * guard = \
  36849. + &reiser4_inode_data(inode)->conv_sem; \
  36850. + \
  36851. + if (should_protect(inode)) { \
  36852. + down_read(guard); \
  36853. + if (!should_protect(inode)) \
  36854. + up_read(guard); \
  36855. + } \
  36856. + inode_file_plugin(inode)->method args; \
  36857. + \
  36858. + if (should_protect(inode)) \
  36859. + up_read(guard); \
  36860. +})
  36861. +
  36862. +/* Pass management to the unix-file plugin with "notail" policy */
  36863. +static int __cryptcompress2unixfile(struct file *file, struct inode * inode)
  36864. +{
  36865. + int result;
  36866. + reiser4_inode *info;
  36867. + struct unix_file_info * uf;
  36868. + info = reiser4_inode_data(inode);
  36869. +
  36870. + result = aset_set_unsafe(&info->pset,
  36871. + PSET_FILE,
  36872. + (reiser4_plugin *)
  36873. + file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
  36874. + if (result)
  36875. + return result;
  36876. + result = aset_set_unsafe(&info->pset,
  36877. + PSET_FORMATTING,
  36878. + (reiser4_plugin *)
  36879. + formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID));
  36880. + if (result)
  36881. + return result;
  36882. + /* get rid of non-standard plugins */
  36883. + info->plugin_mask &= ~cryptcompress_mask;
  36884. + /* get rid of plugin stat-data extension */
  36885. + info->extmask &= ~(1 << PLUGIN_STAT);
  36886. +
  36887. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  36888. +
  36889. + /* FIXME use init_inode_data_unix_file() instead,
  36890. + but aviod init_inode_ordering() */
  36891. + /* Init unix-file specific part of inode */
  36892. + uf = unix_file_inode_data(inode);
  36893. + uf->container = UF_CONTAINER_UNKNOWN;
  36894. + init_rwsem(&uf->latch);
  36895. + uf->tplug = inode_formatting_plugin(inode);
  36896. + uf->exclusive_use = 0;
  36897. +#if REISER4_DEBUG
  36898. + uf->ea_owner = NULL;
  36899. + atomic_set(&uf->nr_neas, 0);
  36900. +#endif
  36901. + /**
  36902. + * we was carefull for file_ops, inode_ops and as_ops
  36903. + * to be invariant for plugin conversion, so there is
  36904. + * no need to update ones already installed in the
  36905. + * vfs's residence.
  36906. + */
  36907. + return 0;
  36908. +}
  36909. +
  36910. +#if REISER4_DEBUG
  36911. +static int disabled_conversion_inode_ok(struct inode * inode)
  36912. +{
  36913. + __u64 extmask = reiser4_inode_data(inode)->extmask;
  36914. + __u16 plugin_mask = reiser4_inode_data(inode)->plugin_mask;
  36915. +
  36916. + return ((extmask & (1 << LIGHT_WEIGHT_STAT)) &&
  36917. + (extmask & (1 << UNIX_STAT)) &&
  36918. + (extmask & (1 << LARGE_TIMES_STAT)) &&
  36919. + (extmask & (1 << PLUGIN_STAT)) &&
  36920. + (plugin_mask & (1 << PSET_COMPRESSION_MODE)));
  36921. +}
  36922. +#endif
  36923. +
  36924. +/**
  36925. + * Disable future attempts to schedule/convert file plugin.
  36926. + * This function is called by plugin schedule hooks.
  36927. + *
  36928. + * To disable conversion we assign any compression mode plugin id
  36929. + * different from CONVX_COMPRESSION_MODE_ID.
  36930. + */
  36931. +static int disable_conversion(struct inode * inode)
  36932. +{
  36933. + int result;
  36934. + result =
  36935. + force_plugin_pset(inode,
  36936. + PSET_COMPRESSION_MODE,
  36937. + (reiser4_plugin *)compression_mode_plugin_by_id
  36938. + (LATTD_COMPRESSION_MODE_ID));
  36939. + assert("edward-1500",
  36940. + ergo(!result, disabled_conversion_inode_ok(inode)));
  36941. + return result;
  36942. +}
  36943. +
  36944. +/**
  36945. + * Check if we really have achieved plugin scheduling point
  36946. + */
  36947. +static int check_dispatch_point(struct inode * inode,
  36948. + loff_t pos /* position in the
  36949. + file to write from */,
  36950. + struct cluster_handle * clust,
  36951. + struct dispatch_context * cont)
  36952. +{
  36953. + assert("edward-1505", conversion_enabled(inode));
  36954. + /*
  36955. + * if file size is more then cluster size, then compressible
  36956. + * status must be figured out (i.e. compression was disabled,
  36957. + * or file plugin was converted to unix_file)
  36958. + */
  36959. + assert("edward-1506", inode->i_size <= inode_cluster_size(inode));
  36960. +
  36961. + if (pos > inode->i_size)
  36962. + /* first logical cluster will contain a (partial) hole */
  36963. + return disable_conversion(inode);
  36964. + if (pos < inode_cluster_size(inode))
  36965. + /* writing to the first logical cluster */
  36966. + return 0;
  36967. + /*
  36968. + * here we have:
  36969. + * cluster_size <= pos <= i_size <= cluster_size,
  36970. + * and, hence, pos == i_size == cluster_size
  36971. + */
  36972. + assert("edward-1498",
  36973. + pos == inode->i_size &&
  36974. + pos == inode_cluster_size(inode));
  36975. + assert("edward-1539", cont != NULL);
  36976. + assert("edward-1540", cont->state == DISPATCH_INVAL_STATE);
  36977. +
  36978. + cont->state = DISPATCH_POINT;
  36979. + return 0;
  36980. +}
  36981. +
  36982. +static void start_check_compressibility(struct inode * inode,
  36983. + struct cluster_handle * clust,
  36984. + hint_t * hint)
  36985. +{
  36986. + assert("edward-1507", clust->index == 1);
  36987. + assert("edward-1508", !tfm_cluster_is_uptodate(&clust->tc));
  36988. + assert("edward-1509", cluster_get_tfm_act(&clust->tc) == TFMA_READ);
  36989. +
  36990. + hint_init_zero(hint);
  36991. + clust->hint = hint;
  36992. + clust->index --;
  36993. + clust->nr_pages = size_in_pages(lbytes(clust->index, inode));
  36994. +
  36995. + /* first logical cluster (of index #0) must be complete */
  36996. + assert("edward-1510", lbytes(clust->index, inode) ==
  36997. + inode_cluster_size(inode));
  36998. +}
  36999. +
  37000. +static void finish_check_compressibility(struct inode * inode,
  37001. + struct cluster_handle * clust,
  37002. + hint_t * hint)
  37003. +{
  37004. + reiser4_unset_hint(clust->hint);
  37005. + clust->hint = hint;
  37006. + clust->index ++;
  37007. +}
  37008. +
  37009. +#if REISER4_DEBUG
  37010. +static int prepped_dclust_ok(hint_t * hint)
  37011. +{
  37012. + reiser4_key key;
  37013. + coord_t * coord = &hint->ext_coord.coord;
  37014. +
  37015. + item_key_by_coord(coord, &key);
  37016. + return (item_id_by_coord(coord) == CTAIL_ID &&
  37017. + !coord_is_unprepped_ctail(coord) &&
  37018. + (get_key_offset(&key) + nr_units_ctail(coord) ==
  37019. + dclust_get_extension_dsize(hint)));
  37020. +}
  37021. +#endif
  37022. +
  37023. +#define fifty_persent(size) (size >> 1)
  37024. +/* evaluation of data compressibility */
  37025. +#define data_is_compressible(osize, isize) \
  37026. + (osize < fifty_persent(isize))
  37027. +
  37028. +/**
  37029. + * A simple O(1)-heuristic for compressibility.
  37030. + * This is called not more then one time per file's life.
  37031. + * Read first logical cluster (of index #0) and estimate its compressibility.
  37032. + * Save estimation result in @cont.
  37033. + */
  37034. +static int read_check_compressibility(struct inode * inode,
  37035. + struct cluster_handle * clust,
  37036. + struct dispatch_context * cont)
  37037. +{
  37038. + int i;
  37039. + int result;
  37040. + size_t dst_len;
  37041. + hint_t tmp_hint;
  37042. + hint_t * cur_hint = clust->hint;
  37043. + assert("edward-1541", cont->state == DISPATCH_POINT);
  37044. +
  37045. + start_check_compressibility(inode, clust, &tmp_hint);
  37046. +
  37047. + reset_cluster_pgset(clust, cluster_nrpages(inode));
  37048. + result = grab_page_cluster(inode, clust, READ_OP);
  37049. + if (result)
  37050. + return result;
  37051. + /* Read page cluster here */
  37052. + for (i = 0; i < clust->nr_pages; i++) {
  37053. + struct page *page = clust->pages[i];
  37054. + lock_page(page);
  37055. + result = do_readpage_ctail(inode, clust, page,
  37056. + ZNODE_READ_LOCK);
  37057. + unlock_page(page);
  37058. + if (result)
  37059. + goto error;
  37060. + }
  37061. + tfm_cluster_clr_uptodate(&clust->tc);
  37062. +
  37063. + cluster_set_tfm_act(&clust->tc, TFMA_WRITE);
  37064. +
  37065. + if (hint_is_valid(&tmp_hint) && !hint_is_unprepped_dclust(&tmp_hint)) {
  37066. + /* lenght of compressed data is known, no need to compress */
  37067. + assert("edward-1511",
  37068. + znode_is_any_locked(tmp_hint.lh.node));
  37069. + assert("edward-1512",
  37070. + WITH_DATA(tmp_hint.ext_coord.coord.node,
  37071. + prepped_dclust_ok(&tmp_hint)));
  37072. + dst_len = dclust_get_extension_dsize(&tmp_hint);
  37073. + }
  37074. + else {
  37075. + struct tfm_cluster * tc = &clust->tc;
  37076. + compression_plugin * cplug = inode_compression_plugin(inode);
  37077. + result = grab_tfm_stream(inode, tc, INPUT_STREAM);
  37078. + if (result)
  37079. + goto error;
  37080. + for (i = 0; i < clust->nr_pages; i++) {
  37081. + char *data;
  37082. + lock_page(clust->pages[i]);
  37083. + BUG_ON(!PageUptodate(clust->pages[i]));
  37084. + data = kmap(clust->pages[i]);
  37085. + memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
  37086. + data, PAGE_SIZE);
  37087. + kunmap(clust->pages[i]);
  37088. + unlock_page(clust->pages[i]);
  37089. + }
  37090. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  37091. + if (result)
  37092. + goto error;
  37093. + result = grab_coa(tc, cplug);
  37094. + if (result)
  37095. + goto error;
  37096. + tc->len = tc->lsize = lbytes(clust->index, inode);
  37097. + assert("edward-1513", tc->len == inode_cluster_size(inode));
  37098. + dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
  37099. + cplug->compress(get_coa(tc, cplug->h.id, tc->act),
  37100. + tfm_input_data(clust), tc->len,
  37101. + tfm_output_data(clust), &dst_len);
  37102. + assert("edward-1514",
  37103. + dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
  37104. + }
  37105. + finish_check_compressibility(inode, clust, cur_hint);
  37106. + cont->state =
  37107. + (data_is_compressible(dst_len, inode_cluster_size(inode)) ?
  37108. + DISPATCH_REMAINS_OLD :
  37109. + DISPATCH_ASSIGNED_NEW);
  37110. + return 0;
  37111. + error:
  37112. + put_page_cluster(clust, inode, READ_OP);
  37113. + return result;
  37114. +}
  37115. +
  37116. +/* Cut disk cluster of index @idx */
  37117. +static int cut_disk_cluster(struct inode * inode, cloff_t idx)
  37118. +{
  37119. + reiser4_key from, to;
  37120. + assert("edward-1515", inode_file_plugin(inode) ==
  37121. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
  37122. + key_by_inode_cryptcompress(inode, clust_to_off(idx, inode), &from);
  37123. + to = from;
  37124. + set_key_offset(&to,
  37125. + get_key_offset(&from) + inode_cluster_size(inode) - 1);
  37126. + return reiser4_cut_tree(reiser4_tree_by_inode(inode),
  37127. + &from, &to, inode, 0);
  37128. +}
  37129. +
  37130. +static int reserve_cryptcompress2unixfile(struct inode *inode)
  37131. +{
  37132. + reiser4_block_nr unformatted_nodes;
  37133. + reiser4_tree *tree;
  37134. +
  37135. + tree = reiser4_tree_by_inode(inode);
  37136. +
  37137. + /* number of unformatted nodes which will be created */
  37138. + unformatted_nodes = cluster_nrpages(inode); /* N */
  37139. +
  37140. + /*
  37141. + * space required for one iteration of extent->tail conversion:
  37142. + *
  37143. + * 1. kill ctail items
  37144. + *
  37145. + * 2. insert N unformatted nodes
  37146. + *
  37147. + * 3. insert N (worst-case single-block
  37148. + * extents) extent units.
  37149. + *
  37150. + * 4. drilling to the leaf level by coord_by_key()
  37151. + *
  37152. + * 5. possible update of stat-data
  37153. + *
  37154. + */
  37155. + grab_space_enable();
  37156. + return reiser4_grab_space
  37157. + (2 * tree->height +
  37158. + unformatted_nodes +
  37159. + unformatted_nodes * estimate_one_insert_into_item(tree) +
  37160. + 1 + estimate_one_insert_item(tree) +
  37161. + inode_file_plugin(inode)->estimate.update(inode),
  37162. + BA_CAN_COMMIT);
  37163. +}
  37164. +
  37165. +/**
  37166. + * Convert cryptcompress file plugin to unix_file plugin.
  37167. + */
  37168. +static int cryptcompress2unixfile(struct file *file, struct inode *inode,
  37169. + struct dispatch_context *cont)
  37170. +{
  37171. + int i;
  37172. + int result = 0;
  37173. + struct cryptcompress_info *cr_info;
  37174. + struct unix_file_info *uf_info;
  37175. + assert("edward-1516", cont->pages[0]->index == 0);
  37176. +
  37177. + /* release all cryptcompress-specific resources */
  37178. + cr_info = cryptcompress_inode_data(inode);
  37179. + result = reserve_cryptcompress2unixfile(inode);
  37180. + if (result)
  37181. + goto out;
  37182. + /* tell kill_hook to not truncate pages */
  37183. + reiser4_inode_set_flag(inode, REISER4_FILE_CONV_IN_PROGRESS);
  37184. + result = cut_disk_cluster(inode, 0);
  37185. + if (result)
  37186. + goto out;
  37187. + /* captured jnode of cluster and assotiated resources (pages,
  37188. + reserved disk space) were released by ->kill_hook() method
  37189. + of the item plugin */
  37190. +
  37191. + result = __cryptcompress2unixfile(file, inode);
  37192. + if (result)
  37193. + goto out;
  37194. + /* At this point file is managed by unix file plugin */
  37195. +
  37196. + uf_info = unix_file_inode_data(inode);
  37197. +
  37198. + assert("edward-1518",
  37199. + ergo(jprivate(cont->pages[0]),
  37200. + !jnode_is_cluster_page(jprivate(cont->pages[0]))));
  37201. + for(i = 0; i < cont->nr_pages; i++) {
  37202. + assert("edward-1519", cont->pages[i]);
  37203. + assert("edward-1520", PageUptodate(cont->pages[i]));
  37204. +
  37205. + result = find_or_create_extent(cont->pages[i]);
  37206. + if (result)
  37207. + break;
  37208. + }
  37209. + if (unlikely(result))
  37210. + goto out;
  37211. + uf_info->container = UF_CONTAINER_EXTENTS;
  37212. + result = reiser4_update_sd(inode);
  37213. + out:
  37214. + all_grabbed2free();
  37215. + return result;
  37216. +}
  37217. +
  37218. +#define convert_file_plugin cryptcompress2unixfile
  37219. +
  37220. +/**
  37221. + * This is called by ->write() method of a cryptcompress file plugin.
  37222. + * Make a decision about the most reasonable file plugin id to manage
  37223. + * the file.
  37224. + */
  37225. +int write_dispatch_hook(struct file *file, struct inode *inode,
  37226. + loff_t pos, struct cluster_handle *clust,
  37227. + struct dispatch_context *cont)
  37228. +{
  37229. + int result;
  37230. + if (!conversion_enabled(inode))
  37231. + return 0;
  37232. + result = check_dispatch_point(inode, pos, clust, cont);
  37233. + if (result || cont->state != DISPATCH_POINT)
  37234. + return result;
  37235. + result = read_check_compressibility(inode, clust, cont);
  37236. + if (result)
  37237. + return result;
  37238. + if (cont->state == DISPATCH_REMAINS_OLD) {
  37239. + put_page_cluster(clust, inode, READ_OP);
  37240. + return disable_conversion(inode);
  37241. + }
  37242. + assert("edward-1543", cont->state == DISPATCH_ASSIGNED_NEW);
  37243. + /*
  37244. + * page cluster is grabbed and uptodate. It will be
  37245. + * released with a pgset after plugin conversion is
  37246. + * finished, see put_dispatch_context().
  37247. + */
  37248. + reiser4_unset_hint(clust->hint);
  37249. + move_cluster_pgset(clust, &cont->pages, &cont->nr_pages);
  37250. + return 0;
  37251. +}
  37252. +
  37253. +/**
  37254. + * This is called by ->setattr() method of cryptcompress file plugin.
  37255. + */
  37256. +int setattr_dispatch_hook(struct inode * inode)
  37257. +{
  37258. + if (conversion_enabled(inode))
  37259. + return disable_conversion(inode);
  37260. + return 0;
  37261. +}
  37262. +
  37263. +static inline void init_dispatch_context(struct dispatch_context * cont)
  37264. +{
  37265. + memset(cont, 0, sizeof(*cont));
  37266. +}
  37267. +
  37268. +static inline void done_dispatch_context(struct dispatch_context * cont,
  37269. + struct inode * inode)
  37270. +{
  37271. + if (cont->pages) {
  37272. + __put_page_cluster(0, cont->nr_pages, cont->pages, inode);
  37273. + kfree(cont->pages);
  37274. + }
  37275. +}
  37276. +
  37277. +static inline ssize_t reiser4_write_checks(struct file *file,
  37278. + const char __user *buf,
  37279. + size_t count, loff_t *off)
  37280. +{
  37281. + ssize_t result;
  37282. + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
  37283. + struct kiocb iocb;
  37284. + struct iov_iter iter;
  37285. +
  37286. + init_sync_kiocb(&iocb, file);
  37287. + iocb.ki_pos = *off;
  37288. + iov_iter_init(&iter, WRITE, &iov, 1, count);
  37289. +
  37290. + result = generic_write_checks(&iocb, &iter);
  37291. + *off = iocb.ki_pos;
  37292. + return result;
  37293. +}
  37294. +
  37295. +/*
  37296. + * ->write() VFS file operation
  37297. + *
  37298. + * performs "intelligent" conversion in the FILE interface.
  37299. + * Write a file in 3 steps (2d and 3d steps are optional).
  37300. + */
  37301. +ssize_t reiser4_write_dispatch(struct file *file, const char __user *buf,
  37302. + size_t count, loff_t *off)
  37303. +{
  37304. + ssize_t result;
  37305. + reiser4_context *ctx;
  37306. + ssize_t written_old = 0; /* bytes written with initial plugin */
  37307. + ssize_t written_new = 0; /* bytes written with new plugin */
  37308. + struct dispatch_context cont;
  37309. + struct inode * inode = file_inode(file);
  37310. +
  37311. + ctx = reiser4_init_context(inode->i_sb);
  37312. + if (IS_ERR(ctx))
  37313. + return PTR_ERR(ctx);
  37314. + current->backing_dev_info = inode_to_bdi(inode);
  37315. + init_dispatch_context(&cont);
  37316. + inode_lock(inode);
  37317. +
  37318. + result = reiser4_write_checks(file, buf, count, off);
  37319. + if (unlikely(result <= 0))
  37320. + goto exit;
  37321. + /**
  37322. + * First step.
  37323. + * Start write with initial file plugin.
  37324. + * Keep a plugin schedule status at @cont (if any).
  37325. + */
  37326. + written_old = inode_file_plugin(inode)->write(file,
  37327. + buf,
  37328. + count,
  37329. + off,
  37330. + &cont);
  37331. + if (cont.state != DISPATCH_ASSIGNED_NEW || written_old < 0)
  37332. + goto exit;
  37333. + /**
  37334. + * Second step.
  37335. + * New file plugin has been scheduled.
  37336. + * Commit respective atom and pass management to the new plugin.
  37337. + */
  37338. + assert("edward-181", cont.pages[0] != NULL);
  37339. + /*
  37340. + * this will commit the whole logical cluster
  37341. + * the file consists of
  37342. + */
  37343. + reiser4_sync_page(cont.pages[0]);
  37344. +
  37345. + down_read(&reiser4_inode_data(inode)->conv_sem);
  37346. + result = convert_file_plugin(file, inode, &cont);
  37347. + up_read(&reiser4_inode_data(inode)->conv_sem);
  37348. + if (result) {
  37349. + warning("edward-1544",
  37350. + "Inode %llu: file plugin conversion failed (%d)",
  37351. + (unsigned long long)get_inode_oid(inode),
  37352. + (int)result);
  37353. + goto exit;
  37354. + }
  37355. + reiser4_txn_restart(ctx);
  37356. + /**
  37357. + * Third step:
  37358. + * Finish write with the new file plugin.
  37359. + */
  37360. + assert("edward-1536",
  37361. + inode_file_plugin(inode) ==
  37362. + file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
  37363. +
  37364. + written_new = inode_file_plugin(inode)->write(file,
  37365. + buf + written_old,
  37366. + count - written_old,
  37367. + off,
  37368. + NULL);
  37369. + exit:
  37370. + inode_unlock(inode);
  37371. + done_dispatch_context(&cont, inode);
  37372. + current->backing_dev_info = NULL;
  37373. + context_set_commit_async(ctx);
  37374. + reiser4_exit_context(ctx);
  37375. +
  37376. + return written_old + (written_new < 0 ? 0 : written_new);
  37377. +}
  37378. +
  37379. +/*
  37380. + * Dispatchers with "passive" protection for:
  37381. + *
  37382. + * ->open();
  37383. + * ->read();
  37384. + * ->ioctl();
  37385. + * ->mmap();
  37386. + * ->release();
  37387. + * ->bmap().
  37388. + */
  37389. +
  37390. +int reiser4_open_dispatch(struct inode *inode, struct file *file)
  37391. +{
  37392. + return PROT_PASSIVE(int, open, (inode, file));
  37393. +}
  37394. +
  37395. +ssize_t reiser4_read_dispatch(struct kiocb *iocb, struct iov_iter *iter)
  37396. +{
  37397. + struct inode * inode = file_inode(iocb->ki_filp);
  37398. + return PROT_PASSIVE(ssize_t, read, (iocb, iter));
  37399. +}
  37400. +
  37401. +long reiser4_ioctl_dispatch(struct file *filp, unsigned int cmd,
  37402. + unsigned long arg)
  37403. +{
  37404. + struct inode * inode = file_inode(filp);
  37405. + return PROT_PASSIVE(int, ioctl, (filp, cmd, arg));
  37406. +}
  37407. +
  37408. +int reiser4_mmap_dispatch(struct file *file, struct vm_area_struct *vma)
  37409. +{
  37410. + struct inode *inode = file_inode(file);
  37411. + return PROT_PASSIVE(int, mmap, (file, vma));
  37412. +}
  37413. +
  37414. +int reiser4_release_dispatch(struct inode *inode, struct file *file)
  37415. +{
  37416. + return PROT_PASSIVE(int, release, (inode, file));
  37417. +}
  37418. +
  37419. +sector_t reiser4_bmap_dispatch(struct address_space * mapping, sector_t lblock)
  37420. +{
  37421. + struct inode *inode = mapping->host;
  37422. + return PROT_PASSIVE(sector_t, bmap, (mapping, lblock));
  37423. +}
  37424. +
  37425. +/**
  37426. + * NOTE: The following two methods are
  37427. + * used only for loopback functionality.
  37428. + * reiser4_write_end() can not cope with
  37429. + * short writes for now.
  37430. + */
  37431. +int reiser4_write_begin_dispatch(struct file *file,
  37432. + struct address_space *mapping,
  37433. + loff_t pos,
  37434. + unsigned len,
  37435. + unsigned flags,
  37436. + struct page **pagep,
  37437. + void **fsdata)
  37438. +{
  37439. + int ret = 0;
  37440. + struct page *page;
  37441. + pgoff_t index;
  37442. + reiser4_context *ctx;
  37443. + struct inode * inode = file_inode(file);
  37444. +
  37445. + index = pos >> PAGE_SHIFT;
  37446. + page = grab_cache_page_write_begin(mapping, index,
  37447. + flags & AOP_FLAG_NOFS);
  37448. + *pagep = page;
  37449. + if (!page)
  37450. + return -ENOMEM;
  37451. +
  37452. + ctx = reiser4_init_context(file_inode(file)->i_sb);
  37453. + if (IS_ERR(ctx)) {
  37454. + ret = PTR_ERR(ctx);
  37455. + goto err2;
  37456. + }
  37457. + ret = reiser4_grab_space_force(/* for update_sd:
  37458. + * one when updating file size and
  37459. + * one when updating mtime/ctime */
  37460. + 2 * estimate_update_common(inode),
  37461. + BA_CAN_COMMIT);
  37462. + if (ret)
  37463. + goto err1;
  37464. + ret = PROT_PASSIVE(int, write_begin, (file, page, pos, len, fsdata));
  37465. + if (unlikely(ret))
  37466. + goto err1;
  37467. + /* Success. Resorces will be released in write_end_dispatch */
  37468. + return 0;
  37469. + err1:
  37470. + reiser4_exit_context(ctx);
  37471. + err2:
  37472. + unlock_page(page);
  37473. + put_page(page);
  37474. + return ret;
  37475. +}
  37476. +
  37477. +int reiser4_write_end_dispatch(struct file *file,
  37478. + struct address_space *mapping,
  37479. + loff_t pos,
  37480. + unsigned len,
  37481. + unsigned copied,
  37482. + struct page *page,
  37483. + void *fsdata)
  37484. +{
  37485. + int ret;
  37486. + reiser4_context *ctx;
  37487. + struct inode *inode = page->mapping->host;
  37488. +
  37489. + assert("umka-3101", file != NULL);
  37490. + assert("umka-3102", page != NULL);
  37491. + assert("umka-3093", PageLocked(page));
  37492. +
  37493. + ctx = get_current_context();
  37494. +
  37495. + SetPageUptodate(page);
  37496. + set_page_dirty_notag(page);
  37497. +
  37498. + ret = PROT_PASSIVE(int, write_end, (file, page, pos, copied, fsdata));
  37499. + put_page(page);
  37500. +
  37501. + /* don't commit transaction under inode semaphore */
  37502. + context_set_commit_async(ctx);
  37503. + reiser4_exit_context(ctx);
  37504. + return ret == 0 ? copied : ret;
  37505. +}
  37506. +
  37507. +/*
  37508. + * Dispatchers without protection
  37509. + */
  37510. +int reiser4_setattr_dispatch(struct user_namespace *mnt_userns,
  37511. + struct dentry *dentry, struct iattr *attr)
  37512. +{
  37513. + return inode_file_plugin(dentry->d_inode)->setattr(dentry, attr);
  37514. +}
  37515. +
  37516. +/*
  37517. + Local variables:
  37518. + c-indentation-style: "K&R"
  37519. + mode-name: "LC"
  37520. + c-basic-offset: 8
  37521. + tab-width: 8
  37522. + fill-column: 80
  37523. + scroll-step: 1
  37524. + End:
  37525. +*/
  37526. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/file.h linux-5.16.14/fs/reiser4/plugin/file/file.h
  37527. --- linux-5.16.14.orig/fs/reiser4/plugin/file/file.h 1970-01-01 01:00:00.000000000 +0100
  37528. +++ linux-5.16.14/fs/reiser4/plugin/file/file.h 2022-03-12 13:26:19.671892775 +0100
  37529. @@ -0,0 +1,321 @@
  37530. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  37531. + * reiser4/README */
  37532. +
  37533. +/* this file contains declarations of methods implementing
  37534. + file plugins (UNIX_FILE_PLUGIN_ID, CRYPTCOMPRESS_FILE_PLUGIN_ID
  37535. + and SYMLINK_FILE_PLUGIN_ID) */
  37536. +
  37537. +#if !defined( __REISER4_FILE_H__ )
  37538. +#define __REISER4_FILE_H__
  37539. +
  37540. +/* possible states in dispatching process */
  37541. +typedef enum {
  37542. + DISPATCH_INVAL_STATE, /* invalid state */
  37543. + DISPATCH_POINT, /* dispatching point has been achieved */
  37544. + DISPATCH_REMAINS_OLD, /* made a decision to manage by old plugin */
  37545. + DISPATCH_ASSIGNED_NEW /* a new plugin has been assigned */
  37546. +} dispatch_state;
  37547. +
  37548. +struct dispatch_context {
  37549. + int nr_pages;
  37550. + struct page **pages;
  37551. + dispatch_state state;
  37552. +};
  37553. +
  37554. +/*
  37555. + * Declarations of methods provided for VFS.
  37556. + */
  37557. +
  37558. +/* inode operations */
  37559. +int reiser4_setattr_dispatch(struct user_namespace *mnt_userns,
  37560. + struct dentry *, struct iattr *);
  37561. +
  37562. +/* file operations */
  37563. +ssize_t reiser4_read_dispatch(struct kiocb *iocb, struct iov_iter *iter);
  37564. +ssize_t reiser4_write_dispatch(struct file *, const char __user *buf,
  37565. + size_t count, loff_t * off);
  37566. +long reiser4_ioctl_dispatch(struct file *filp, unsigned int cmd,
  37567. + unsigned long arg);
  37568. +int reiser4_mmap_dispatch(struct file *, struct vm_area_struct *);
  37569. +int reiser4_open_dispatch(struct inode *inode, struct file *file);
  37570. +int reiser4_release_dispatch(struct inode *, struct file *);
  37571. +int reiser4_sync_file_common(struct file *, loff_t, loff_t, int datasync);
  37572. +int reiser4_sync_page(struct page *page);
  37573. +
  37574. +/* address space operations */
  37575. +int reiser4_readpage_dispatch(struct file *, struct page *);
  37576. +int reiser4_readpages_dispatch(struct file *, struct address_space *,
  37577. + struct list_head *, unsigned);
  37578. +int reiser4_writepages_dispatch(struct address_space *,
  37579. + struct writeback_control *);
  37580. +int reiser4_write_begin_dispatch(struct file *file,
  37581. + struct address_space *mapping,
  37582. + loff_t pos, unsigned len, unsigned flags,
  37583. + struct page **pagep, void **fsdata);
  37584. +int reiser4_write_end_dispatch(struct file *file,
  37585. + struct address_space *mapping,
  37586. + loff_t pos, unsigned len, unsigned copied,
  37587. + struct page *page, void *fsdata);
  37588. +sector_t reiser4_bmap_dispatch(struct address_space *, sector_t lblock);
  37589. +
  37590. +/*
  37591. + * Private methods of unix-file plugin
  37592. + * (UNIX_FILE_PLUGIN_ID)
  37593. + */
  37594. +
  37595. +/* private inode operations */
  37596. +int setattr_unix_file(struct dentry *, struct iattr *);
  37597. +
  37598. +/* private file operations */
  37599. +
  37600. +ssize_t read_unix_file(struct kiocb *iocb, struct iov_iter *iter);
  37601. +ssize_t write_unix_file(struct file *, const char __user *buf, size_t write_amount,
  37602. + loff_t * off, struct dispatch_context * cont);
  37603. +int ioctl_unix_file(struct file *, unsigned int cmd, unsigned long arg);
  37604. +int mmap_unix_file(struct file *, struct vm_area_struct *);
  37605. +int open_unix_file(struct inode *, struct file *);
  37606. +int release_unix_file(struct inode *, struct file *);
  37607. +
  37608. +/* private address space operations */
  37609. +int readpage_unix_file(struct file *, struct page *);
  37610. +int readpages_unix_file(struct file*, struct address_space*, struct list_head*,
  37611. + unsigned);
  37612. +int writepages_unix_file(struct address_space *, struct writeback_control *);
  37613. +int write_begin_unix_file(struct file *file, struct page *page,
  37614. + loff_t pos, unsigned len, void **fsdata);
  37615. +int write_end_unix_file(struct file *file, struct page *page,
  37616. + loff_t pos, unsigned copied, void *fsdata);
  37617. +sector_t bmap_unix_file(struct address_space *, sector_t lblock);
  37618. +
  37619. +/* other private methods */
  37620. +int delete_object_unix_file(struct inode *);
  37621. +int flow_by_inode_unix_file(struct inode *, const char __user *buf,
  37622. + int user, loff_t, loff_t, rw_op, flow_t *);
  37623. +int owns_item_unix_file(const struct inode *, const coord_t *);
  37624. +void init_inode_data_unix_file(struct inode *, reiser4_object_create_data *,
  37625. + int create);
  37626. +
  37627. +/*
  37628. + * Private methods of cryptcompress file plugin
  37629. + * (CRYPTCOMPRESS_FILE_PLUGIN_ID)
  37630. + */
  37631. +
  37632. +/* private inode operations */
  37633. +int setattr_cryptcompress(struct dentry *, struct iattr *);
  37634. +
  37635. +/* private file operations */
  37636. +ssize_t read_cryptcompress(struct kiocb *iocb, struct iov_iter *iter);
  37637. +ssize_t write_cryptcompress(struct file *, const char __user *buf,
  37638. + size_t count, loff_t * off,
  37639. + struct dispatch_context *cont);
  37640. +int ioctl_cryptcompress(struct file *, unsigned int cmd, unsigned long arg);
  37641. +int mmap_cryptcompress(struct file *, struct vm_area_struct *);
  37642. +int open_cryptcompress(struct inode *, struct file *);
  37643. +int release_cryptcompress(struct inode *, struct file *);
  37644. +
  37645. +/* private address space operations */
  37646. +int readpage_cryptcompress(struct file *, struct page *);
  37647. +int readpages_cryptcompress(struct file*, struct address_space*,
  37648. + struct list_head*, unsigned);
  37649. +int writepages_cryptcompress(struct address_space *,
  37650. + struct writeback_control *);
  37651. +int write_begin_cryptcompress(struct file *file, struct page *page,
  37652. + loff_t pos, unsigned len, void **fsdata);
  37653. +int write_end_cryptcompress(struct file *file, struct page *page,
  37654. + loff_t pos, unsigned copied, void *fsdata);
  37655. +sector_t bmap_cryptcompress(struct address_space *, sector_t lblock);
  37656. +
  37657. +/* other private methods */
  37658. +int flow_by_inode_cryptcompress(struct inode *, const char __user *buf,
  37659. + int user, loff_t, loff_t, rw_op, flow_t *);
  37660. +int key_by_inode_cryptcompress(struct inode *, loff_t off, reiser4_key *);
  37661. +int create_object_cryptcompress(struct inode *, struct inode *,
  37662. + reiser4_object_create_data *);
  37663. +int delete_object_cryptcompress(struct inode *);
  37664. +void init_inode_data_cryptcompress(struct inode *, reiser4_object_create_data *,
  37665. + int create);
  37666. +int cut_tree_worker_cryptcompress(tap_t *, const reiser4_key * from_key,
  37667. + const reiser4_key * to_key,
  37668. + reiser4_key * smallest_removed,
  37669. + struct inode *object, int truncate,
  37670. + int *progress);
  37671. +void destroy_inode_cryptcompress(struct inode *);
  37672. +
  37673. +/*
  37674. + * Private methods of symlink file plugin
  37675. + * (SYMLINK_FILE_PLUGIN_ID)
  37676. + */
  37677. +int reiser4_create_symlink(struct inode *symlink, struct inode *dir,
  37678. + reiser4_object_create_data *);
  37679. +void destroy_inode_symlink(struct inode *);
  37680. +
  37681. +/*
  37682. + * all the write into unix file is performed by item write method. Write method
  37683. + * of unix file plugin only decides which item plugin (extent or tail) and in
  37684. + * which mode (one from the enum below) to call
  37685. + */
  37686. +typedef enum {
  37687. + FIRST_ITEM = 1,
  37688. + APPEND_ITEM = 2,
  37689. + OVERWRITE_ITEM = 3
  37690. +} write_mode_t;
  37691. +
  37692. +/* unix file may be in one the following states */
  37693. +typedef enum {
  37694. + UF_CONTAINER_UNKNOWN = 0,
  37695. + UF_CONTAINER_TAILS = 1,
  37696. + UF_CONTAINER_EXTENTS = 2,
  37697. + UF_CONTAINER_EMPTY = 3
  37698. +} file_container_t;
  37699. +
  37700. +struct formatting_plugin;
  37701. +struct inode;
  37702. +
  37703. +/* unix file plugin specific part of reiser4 inode */
  37704. +struct unix_file_info {
  37705. + /*
  37706. + * this read-write lock protects file containerization change. Accesses
  37707. + * which do not change file containerization (see file_container_t)
  37708. + * (read, readpage, writepage, write (until tail conversion is
  37709. + * involved)) take read-lock. Accesses which modify file
  37710. + * containerization (truncate, conversion from tail to extent and back)
  37711. + * take write-lock.
  37712. + */
  37713. + struct rw_semaphore latch;
  37714. + /* this enum specifies which items are used to build the file */
  37715. + file_container_t container;
  37716. + /*
  37717. + * plugin which controls when file is to be converted to extents and
  37718. + * back to tail
  37719. + */
  37720. + struct formatting_plugin *tplug;
  37721. + /* if this is set, file is in exclusive use */
  37722. + int exclusive_use;
  37723. +#if REISER4_DEBUG
  37724. + /* pointer to task struct of thread owning exclusive access to file */
  37725. + void *ea_owner;
  37726. + atomic_t nr_neas;
  37727. + void *last_reader;
  37728. +#endif
  37729. +};
  37730. +
  37731. +struct unix_file_info *unix_file_inode_data(const struct inode *inode);
  37732. +void get_exclusive_access(struct unix_file_info *);
  37733. +void drop_exclusive_access(struct unix_file_info *);
  37734. +void get_nonexclusive_access(struct unix_file_info *);
  37735. +void drop_nonexclusive_access(struct unix_file_info *);
  37736. +int try_to_get_nonexclusive_access(struct unix_file_info *);
  37737. +int find_file_item(hint_t *, const reiser4_key *, znode_lock_mode,
  37738. + struct inode *);
  37739. +int find_file_item_nohint(coord_t *, lock_handle *,
  37740. + const reiser4_key *, znode_lock_mode,
  37741. + struct inode *);
  37742. +
  37743. +int load_file_hint(struct file *, hint_t *);
  37744. +void save_file_hint(struct file *, const hint_t *);
  37745. +
  37746. +#include "../item/extent.h"
  37747. +#include "../item/tail.h"
  37748. +#include "../item/ctail.h"
  37749. +
  37750. +struct uf_coord {
  37751. + coord_t coord;
  37752. + lock_handle *lh;
  37753. + int valid;
  37754. + union {
  37755. + struct extent_coord_extension extent;
  37756. + struct tail_coord_extension tail;
  37757. + struct ctail_coord_extension ctail;
  37758. + } extension;
  37759. +};
  37760. +
  37761. +#include "../../forward.h"
  37762. +#include "../../seal.h"
  37763. +#include "../../lock.h"
  37764. +
  37765. +/*
  37766. + * This structure is used to speed up file operations (reads and writes). A
  37767. + * hint is a suggestion about where a key resolved to last time. A seal
  37768. + * indicates whether a node has been modified since a hint was last recorded.
  37769. + * You check the seal, and if the seal is still valid, you can use the hint
  37770. + * without traversing the tree again.
  37771. + */
  37772. +struct hint {
  37773. + seal_t seal; /* a seal over last file item accessed */
  37774. + uf_coord_t ext_coord;
  37775. + loff_t offset;
  37776. + znode_lock_mode mode;
  37777. + lock_handle lh;
  37778. +};
  37779. +
  37780. +static inline int hint_is_valid(hint_t * hint)
  37781. +{
  37782. + return hint->ext_coord.valid;
  37783. +}
  37784. +
  37785. +static inline void hint_set_valid(hint_t * hint)
  37786. +{
  37787. + hint->ext_coord.valid = 1;
  37788. +}
  37789. +
  37790. +static inline void hint_clr_valid(hint_t * hint)
  37791. +{
  37792. + hint->ext_coord.valid = 0;
  37793. +}
  37794. +
  37795. +int load_file_hint(struct file *, hint_t *);
  37796. +void save_file_hint(struct file *, const hint_t *);
  37797. +void hint_init_zero(hint_t *);
  37798. +void reiser4_set_hint(hint_t *, const reiser4_key *, znode_lock_mode);
  37799. +int hint_is_set(const hint_t *);
  37800. +void reiser4_unset_hint(hint_t *);
  37801. +
  37802. +int reiser4_update_file_size(struct inode *, loff_t, int update_sd);
  37803. +int cut_file_items(struct inode *, loff_t new_size,
  37804. + int update_sd, loff_t cur_size,
  37805. + int (*update_actor) (struct inode *, loff_t, int));
  37806. +#if REISER4_DEBUG
  37807. +
  37808. +/* return 1 is exclusive access is obtained, 0 - otherwise */
  37809. +static inline int ea_obtained(struct unix_file_info * uf_info)
  37810. +{
  37811. + int ret;
  37812. +
  37813. + ret = down_read_trylock(&uf_info->latch);
  37814. + if (ret)
  37815. + up_read(&uf_info->latch);
  37816. + return !ret;
  37817. +}
  37818. +
  37819. +#endif
  37820. +
  37821. +#define WRITE_GRANULARITY 32
  37822. +
  37823. +int tail2extent(struct unix_file_info *);
  37824. +int extent2tail(struct file *, struct unix_file_info *);
  37825. +
  37826. +int goto_right_neighbor(coord_t *, lock_handle *);
  37827. +int find_or_create_extent(struct page *);
  37828. +int equal_to_ldk(znode *, const reiser4_key *);
  37829. +
  37830. +void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh);
  37831. +
  37832. +static inline int cbk_errored(int cbk_result)
  37833. +{
  37834. + return (cbk_result != CBK_COORD_NOTFOUND
  37835. + && cbk_result != CBK_COORD_FOUND);
  37836. +}
  37837. +
  37838. +/* __REISER4_FILE_H__ */
  37839. +#endif
  37840. +
  37841. +/*
  37842. + * Local variables:
  37843. + * c-indentation-style: "K&R"
  37844. + * mode-name: "LC"
  37845. + * c-basic-offset: 8
  37846. + * tab-width: 8
  37847. + * fill-column: 79
  37848. + * scroll-step: 1
  37849. + * End:
  37850. +*/
  37851. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/Makefile linux-5.16.14/fs/reiser4/plugin/file/Makefile
  37852. --- linux-5.16.14.orig/fs/reiser4/plugin/file/Makefile 1970-01-01 01:00:00.000000000 +0100
  37853. +++ linux-5.16.14/fs/reiser4/plugin/file/Makefile 2022-03-12 13:26:19.666892763 +0100
  37854. @@ -0,0 +1,10 @@
  37855. +
  37856. +MODULE := file_plugins
  37857. +
  37858. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  37859. +
  37860. +$(MODULE)-objs += \
  37861. + file.o \
  37862. + tail_conversion.o \
  37863. + symlink.o \
  37864. + cryptcompress.o
  37865. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/symfile.c linux-5.16.14/fs/reiser4/plugin/file/symfile.c
  37866. --- linux-5.16.14.orig/fs/reiser4/plugin/file/symfile.c 1970-01-01 01:00:00.000000000 +0100
  37867. +++ linux-5.16.14/fs/reiser4/plugin/file/symfile.c 2022-03-12 13:26:19.671892775 +0100
  37868. @@ -0,0 +1,87 @@
  37869. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  37870. +
  37871. +/* Symfiles are a generalization of Unix symlinks.
  37872. +
  37873. + A symfile when read behaves as though you took its contents and
  37874. + substituted them into the reiser4 naming system as the right hand side
  37875. + of an assignment, and then read that which you had assigned to it.
  37876. +
  37877. + A key issue for symfiles is how to implement writes through to
  37878. + subfiles. In general, one must have some method of determining what
  37879. + of that which is written to the symfile is written to what subfile.
  37880. + This can be done by use of custom plugin methods written by users, or
  37881. + by using a few general methods we provide for those willing to endure
  37882. + the insertion of delimiters into what is read.
  37883. +
  37884. + Writing to symfiles without delimiters to denote what is written to
  37885. + what subfile is not supported by any plugins we provide in this
  37886. + release. Our most sophisticated support for writes is that embodied
  37887. + by the invert plugin (see invert.c).
  37888. +
  37889. + A read only version of the /etc/passwd file might be
  37890. + constructed as a symfile whose contents are as follows:
  37891. +
  37892. + /etc/passwd/userlines/*
  37893. +
  37894. + or
  37895. +
  37896. + /etc/passwd/userlines/demidov+/etc/passwd/userlines/edward+/etc/passwd/userlines/reiser+/etc/passwd/userlines/root
  37897. +
  37898. + or
  37899. +
  37900. + /etc/passwd/userlines/(demidov+edward+reiser+root)
  37901. +
  37902. + A symfile with contents
  37903. +
  37904. + /filenameA+"(some text stored in the uninvertable symfile)+/filenameB
  37905. +
  37906. + will return when read
  37907. +
  37908. + The contents of filenameAsome text stored in the uninvertable symfileThe contents of filenameB
  37909. +
  37910. + and write of what has been read will not be possible to implement as
  37911. + an identity operation because there are no delimiters denoting the
  37912. + boundaries of what is to be written to what subfile.
  37913. +
  37914. + Note that one could make this a read/write symfile if one specified
  37915. + delimiters, and the write method understood those delimiters delimited
  37916. + what was written to subfiles.
  37917. +
  37918. + So, specifying the symfile in a manner that allows writes:
  37919. +
  37920. + /etc/passwd/userlines/demidov+"(
  37921. + )+/etc/passwd/userlines/edward+"(
  37922. + )+/etc/passwd/userlines/reiser+"(
  37923. + )+/etc/passwd/userlines/root+"(
  37924. + )
  37925. +
  37926. + or
  37927. +
  37928. + /etc/passwd/userlines/(demidov+"(
  37929. + )+edward+"(
  37930. + )+reiser+"(
  37931. + )+root+"(
  37932. + ))
  37933. +
  37934. + and the file demidov might be specified as:
  37935. +
  37936. + /etc/passwd/userlines/demidov/username+"(:)+/etc/passwd/userlines/demidov/password+"(:)+/etc/passwd/userlines/demidov/userid+"(:)+/etc/passwd/userlines/demidov/groupid+"(:)+/etc/passwd/userlines/demidov/gecos+"(:)+/etc/passwd/userlines/demidov/home+"(:)+/etc/passwd/userlines/demidov/shell
  37937. +
  37938. + or
  37939. +
  37940. + /etc/passwd/userlines/demidov/(username+"(:)+password+"(:)+userid+"(:)+groupid+"(:)+gecos+"(:)+home+"(:)+shell)
  37941. +
  37942. + Notice that if the file demidov has a carriage return in it, the
  37943. + parsing fails, but then if you put carriage returns in the wrong place
  37944. + in a normal /etc/passwd file it breaks things also.
  37945. +
  37946. + Note that it is forbidden to have no text between two interpolations
  37947. + if one wants to be able to define what parts of a write go to what
  37948. + subfiles referenced in an interpolation.
  37949. +
  37950. + If one wants to be able to add new lines by writing to the file, one
  37951. + must either write a custom plugin for /etc/passwd that knows how to
  37952. + name an added line, or one must use an invert, or one must use a more
  37953. + sophisticated symfile syntax that we are not planning to write for
  37954. + version 4.0.
  37955. +*/
  37956. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/symlink.c linux-5.16.14/fs/reiser4/plugin/file/symlink.c
  37957. --- linux-5.16.14.orig/fs/reiser4/plugin/file/symlink.c 1970-01-01 01:00:00.000000000 +0100
  37958. +++ linux-5.16.14/fs/reiser4/plugin/file/symlink.c 2022-03-12 13:26:19.671892775 +0100
  37959. @@ -0,0 +1,95 @@
  37960. +/* Copyright 2002, 2003, 2005 by Hans Reiser, licensing governed by reiser4/README */
  37961. +
  37962. +#include "../../inode.h"
  37963. +
  37964. +#include <linux/types.h>
  37965. +#include <linux/fs.h>
  37966. +
  37967. +/* file plugin methods specific for symlink files
  37968. + (SYMLINK_FILE_PLUGIN_ID) */
  37969. +
  37970. +/* this is implementation of create_object method of file plugin for
  37971. + SYMLINK_FILE_PLUGIN_ID
  37972. + */
  37973. +
  37974. +/**
  37975. + * reiser4_create_symlink - create_object of file plugin for SYMLINK_FILE_PLUGIN_ID
  37976. + * @symlink: inode of symlink object
  37977. + * @dir: inode of parent directory
  37978. + * @info: parameters of new object
  37979. + *
  37980. + * Inserts stat data with symlink extension where into the tree.
  37981. + */
  37982. +int reiser4_create_symlink(struct inode *symlink,
  37983. + struct inode *dir UNUSED_ARG,
  37984. + reiser4_object_create_data *data /* info passed to us
  37985. + * this is filled by
  37986. + * reiser4() syscall
  37987. + * in particular */)
  37988. +{
  37989. + int result;
  37990. +
  37991. + assert("nikita-680", symlink != NULL);
  37992. + assert("nikita-681", S_ISLNK(symlink->i_mode));
  37993. + assert("nikita-685", reiser4_inode_get_flag(symlink, REISER4_NO_SD));
  37994. + assert("nikita-682", dir != NULL);
  37995. + assert("nikita-684", data != NULL);
  37996. + assert("nikita-686", data->id == SYMLINK_FILE_PLUGIN_ID);
  37997. +
  37998. + /*
  37999. + * stat data of symlink has symlink extension in which we store
  38000. + * symlink content, that is, path symlink is pointing to.
  38001. + */
  38002. + reiser4_inode_data(symlink)->extmask |= (1 << SYMLINK_STAT);
  38003. +
  38004. + assert("vs-838", symlink->i_private == NULL);
  38005. + symlink->i_private = (void *)data->name;
  38006. +
  38007. + assert("vs-843", symlink->i_size == 0);
  38008. + INODE_SET_FIELD(symlink, i_size, strlen(data->name));
  38009. +
  38010. + /* insert stat data appended with data->name */
  38011. + result = inode_file_plugin(symlink)->write_sd_by_inode(symlink);
  38012. + if (result) {
  38013. + /* FIXME-VS: Make sure that symlink->i_private is not attached
  38014. + to kmalloced data */
  38015. + INODE_SET_FIELD(symlink, i_size, 0);
  38016. + } else {
  38017. + assert("vs-849", symlink->i_private
  38018. + && reiser4_inode_get_flag(symlink,
  38019. + REISER4_GENERIC_PTR_USED));
  38020. + assert("vs-850",
  38021. + !memcmp((char *)symlink->i_private, data->name,
  38022. + (size_t) symlink->i_size + 1));
  38023. + }
  38024. + return result;
  38025. +}
  38026. +
  38027. +/* this is implementation of destroy_inode method of file plugin for
  38028. + SYMLINK_FILE_PLUGIN_ID
  38029. + */
  38030. +void destroy_inode_symlink(struct inode *inode)
  38031. +{
  38032. + assert("edward-799",
  38033. + inode_file_plugin(inode) ==
  38034. + file_plugin_by_id(SYMLINK_FILE_PLUGIN_ID));
  38035. + assert("edward-800", !is_bad_inode(inode) && is_inode_loaded(inode));
  38036. + assert("edward-801", reiser4_inode_get_flag(inode,
  38037. + REISER4_GENERIC_PTR_USED));
  38038. + assert("vs-839", S_ISLNK(inode->i_mode));
  38039. +
  38040. + kfree(inode->i_private);
  38041. + inode->i_private = NULL;
  38042. + reiser4_inode_clr_flag(inode, REISER4_GENERIC_PTR_USED);
  38043. +}
  38044. +
  38045. +/*
  38046. + Local variables:
  38047. + c-indentation-style: "K&R"
  38048. + mode-name: "LC"
  38049. + c-basic-offset: 8
  38050. + tab-width: 8
  38051. + fill-column: 80
  38052. + scroll-step: 1
  38053. + End:
  38054. +*/
  38055. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file/tail_conversion.c linux-5.16.14/fs/reiser4/plugin/file/tail_conversion.c
  38056. --- linux-5.16.14.orig/fs/reiser4/plugin/file/tail_conversion.c 1970-01-01 01:00:00.000000000 +0100
  38057. +++ linux-5.16.14/fs/reiser4/plugin/file/tail_conversion.c 2022-03-12 13:26:19.672892777 +0100
  38058. @@ -0,0 +1,763 @@
  38059. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  38060. +
  38061. +#include "../../inode.h"
  38062. +#include "../../super.h"
  38063. +#include "../../page_cache.h"
  38064. +#include "../../carry.h"
  38065. +#include "../../safe_link.h"
  38066. +#include "../../vfs_ops.h"
  38067. +
  38068. +#include <linux/writeback.h>
  38069. +
  38070. +/* this file contains:
  38071. + tail2extent and extent2tail */
  38072. +
  38073. +/* exclusive access to a file is acquired when file state changes: tail2extent, empty2tail, extent2tail, etc */
  38074. +void get_exclusive_access(struct unix_file_info * uf_info)
  38075. +{
  38076. + assert("nikita-3028", reiser4_schedulable());
  38077. + assert("nikita-3047", LOCK_CNT_NIL(inode_sem_w));
  38078. + assert("nikita-3048", LOCK_CNT_NIL(inode_sem_r));
  38079. + /*
  38080. + * "deadlock avoidance": sometimes we commit a transaction under
  38081. + * rw-semaphore on a file. Such commit can deadlock with another
  38082. + * thread that captured some block (hence preventing atom from being
  38083. + * committed) and waits on rw-semaphore.
  38084. + */
  38085. + reiser4_txn_restart_current();
  38086. + LOCK_CNT_INC(inode_sem_w);
  38087. + down_write(&uf_info->latch);
  38088. + uf_info->exclusive_use = 1;
  38089. + assert("vs-1713", uf_info->ea_owner == NULL);
  38090. + assert("vs-1713", atomic_read(&uf_info->nr_neas) == 0);
  38091. + ON_DEBUG(uf_info->ea_owner = current);
  38092. +}
  38093. +
  38094. +void drop_exclusive_access(struct unix_file_info * uf_info)
  38095. +{
  38096. + assert("vs-1714", uf_info->ea_owner == current);
  38097. + assert("vs-1715", atomic_read(&uf_info->nr_neas) == 0);
  38098. + ON_DEBUG(uf_info->ea_owner = NULL);
  38099. + uf_info->exclusive_use = 0;
  38100. + up_write(&uf_info->latch);
  38101. + assert("nikita-3049", LOCK_CNT_NIL(inode_sem_r));
  38102. + assert("nikita-3049", LOCK_CNT_GTZ(inode_sem_w));
  38103. + LOCK_CNT_DEC(inode_sem_w);
  38104. + reiser4_txn_restart_current();
  38105. +}
  38106. +
  38107. +/**
  38108. + * nea_grabbed - do something when file semaphore is down_read-ed
  38109. + * @uf_info:
  38110. + *
  38111. + * This is called when nonexclisive access is obtained on file. All it does is
  38112. + * for debugging purposes.
  38113. + */
  38114. +static void nea_grabbed(struct unix_file_info *uf_info)
  38115. +{
  38116. +#if REISER4_DEBUG
  38117. + LOCK_CNT_INC(inode_sem_r);
  38118. + assert("vs-1716", uf_info->ea_owner == NULL);
  38119. + atomic_inc(&uf_info->nr_neas);
  38120. + uf_info->last_reader = current;
  38121. +#endif
  38122. +}
  38123. +
  38124. +/**
  38125. + * get_nonexclusive_access - get nonexclusive access to a file
  38126. + * @uf_info: unix file specific part of inode to obtain access to
  38127. + *
  38128. + * Nonexclusive access is obtained on a file before read, write, readpage.
  38129. + */
  38130. +void get_nonexclusive_access(struct unix_file_info *uf_info)
  38131. +{
  38132. + assert("nikita-3029", reiser4_schedulable());
  38133. + assert("nikita-3361", get_current_context()->trans->atom == NULL);
  38134. +
  38135. + down_read(&uf_info->latch);
  38136. + nea_grabbed(uf_info);
  38137. +}
  38138. +
  38139. +/**
  38140. + * try_to_get_nonexclusive_access - try to get nonexclusive access to a file
  38141. + * @uf_info: unix file specific part of inode to obtain access to
  38142. + *
  38143. + * Non-blocking version of nonexclusive access obtaining.
  38144. + */
  38145. +int try_to_get_nonexclusive_access(struct unix_file_info *uf_info)
  38146. +{
  38147. + int result;
  38148. +
  38149. + result = down_read_trylock(&uf_info->latch);
  38150. + if (result)
  38151. + nea_grabbed(uf_info);
  38152. + return result;
  38153. +}
  38154. +
  38155. +void drop_nonexclusive_access(struct unix_file_info * uf_info)
  38156. +{
  38157. + assert("vs-1718", uf_info->ea_owner == NULL);
  38158. + assert("vs-1719", atomic_read(&uf_info->nr_neas) > 0);
  38159. + ON_DEBUG(atomic_dec(&uf_info->nr_neas));
  38160. +
  38161. + up_read(&uf_info->latch);
  38162. +
  38163. + LOCK_CNT_DEC(inode_sem_r);
  38164. + reiser4_txn_restart_current();
  38165. +}
  38166. +
  38167. +/* part of tail2extent. Cut all items covering @count bytes starting from
  38168. + @offset */
  38169. +/* Audited by: green(2002.06.15) */
  38170. +static int cut_formatting_items(struct inode *inode, loff_t offset, int count)
  38171. +{
  38172. + reiser4_key from, to;
  38173. +
  38174. + /* AUDIT: How about putting an assertion here, what would check
  38175. + all provided range is covered by tail items only? */
  38176. + /* key of first byte in the range to be cut */
  38177. + inode_file_plugin(inode)->key_by_inode(inode, offset, &from);
  38178. +
  38179. + /* key of last byte in that range */
  38180. + to = from;
  38181. + set_key_offset(&to, (__u64) (offset + count - 1));
  38182. +
  38183. + /* cut everything between those keys */
  38184. + return reiser4_cut_tree(reiser4_tree_by_inode(inode), &from, &to,
  38185. + inode, 0);
  38186. +}
  38187. +
  38188. +static void release_all_pages(struct page **pages, unsigned nr_pages)
  38189. +{
  38190. + unsigned i;
  38191. +
  38192. + for (i = 0; i < nr_pages; i++) {
  38193. + if (pages[i] == NULL) {
  38194. +#if REISER4_DEBUG
  38195. + unsigned j;
  38196. + for (j = i + 1; j < nr_pages; j++)
  38197. + assert("vs-1620", pages[j] == NULL);
  38198. +#endif
  38199. + break;
  38200. + }
  38201. + put_page(pages[i]);
  38202. + pages[i] = NULL;
  38203. + }
  38204. +}
  38205. +
  38206. +/* part of tail2extent. replace tail items with extent one. Content of tail
  38207. + items (@count bytes) being cut are copied already into
  38208. + pages. extent_writepage method is called to create extents corresponding to
  38209. + those pages */
  38210. +static int replace(struct inode *inode, struct page **pages, unsigned nr_pages, int count)
  38211. +{
  38212. + int result;
  38213. + unsigned i;
  38214. + STORE_COUNTERS;
  38215. +
  38216. + if (nr_pages == 0)
  38217. + return 0;
  38218. +
  38219. + assert("vs-596", pages[0]);
  38220. +
  38221. + /* cut copied items */
  38222. + result = cut_formatting_items(inode, page_offset(pages[0]), count);
  38223. + if (result)
  38224. + return result;
  38225. +
  38226. + CHECK_COUNTERS;
  38227. +
  38228. + /* put into tree replacement for just removed items: extent item, namely */
  38229. + for (i = 0; i < nr_pages; i++) {
  38230. + result = add_to_page_cache_lru(pages[i], inode->i_mapping,
  38231. + pages[i]->index,
  38232. + mapping_gfp_mask(inode->
  38233. + i_mapping));
  38234. + if (result)
  38235. + break;
  38236. + SetPageUptodate(pages[i]);
  38237. + set_page_dirty_notag(pages[i]);
  38238. + unlock_page(pages[i]);
  38239. + result = find_or_create_extent(pages[i]);
  38240. + if (result) {
  38241. + /*
  38242. + * Unsuccess in critical place:
  38243. + * tail has been removed,
  38244. + * but extent hasn't been created
  38245. + */
  38246. + warning("edward-1572",
  38247. + "Report the error code %i to developers. Run FSCK",
  38248. + result);
  38249. + break;
  38250. + }
  38251. + }
  38252. + return result;
  38253. +}
  38254. +
  38255. +#define TAIL2EXTENT_PAGE_NUM 3 /* number of pages to fill before cutting tail
  38256. + * items */
  38257. +
  38258. +static int reserve_tail2extent_iteration(struct inode *inode)
  38259. +{
  38260. + reiser4_block_nr unformatted_nodes;
  38261. + reiser4_tree *tree;
  38262. +
  38263. + tree = reiser4_tree_by_inode(inode);
  38264. +
  38265. + /* number of unformatted nodes which will be created */
  38266. + unformatted_nodes = TAIL2EXTENT_PAGE_NUM;
  38267. +
  38268. + /*
  38269. + * space required for one iteration of extent->tail conversion:
  38270. + *
  38271. + * 1. kill N tail items
  38272. + *
  38273. + * 2. insert TAIL2EXTENT_PAGE_NUM unformatted nodes
  38274. + *
  38275. + * 3. insert TAIL2EXTENT_PAGE_NUM (worst-case single-block
  38276. + * extents) extent units.
  38277. + *
  38278. + * 4. drilling to the leaf level by coord_by_key()
  38279. + *
  38280. + * 5. possible update of stat-data
  38281. + *
  38282. + */
  38283. + grab_space_enable();
  38284. + return reiser4_grab_space
  38285. + (2 * tree->height +
  38286. + TAIL2EXTENT_PAGE_NUM +
  38287. + TAIL2EXTENT_PAGE_NUM * estimate_one_insert_into_item(tree) +
  38288. + 1 + estimate_one_insert_item(tree) +
  38289. + inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT);
  38290. +}
  38291. +
  38292. +/* clear stat data's flag indicating that conversion is being converted */
  38293. +static int complete_conversion(struct inode *inode)
  38294. +{
  38295. + int result;
  38296. +
  38297. + grab_space_enable();
  38298. + result =
  38299. + reiser4_grab_space(inode_file_plugin(inode)->estimate.update(inode),
  38300. + BA_CAN_COMMIT);
  38301. + if (result == 0) {
  38302. + reiser4_inode_clr_flag(inode, REISER4_PART_MIXED);
  38303. + result = reiser4_update_sd(inode);
  38304. + }
  38305. + if (result)
  38306. + warning("vs-1696", "Failed to clear converting bit of %llu: %i",
  38307. + (unsigned long long)get_inode_oid(inode), result);
  38308. + return 0;
  38309. +}
  38310. +
  38311. +/**
  38312. + * find_start
  38313. + * @inode:
  38314. + * @id:
  38315. + * @offset:
  38316. + *
  38317. + * this is used by tail2extent and extent2tail to detect where previous
  38318. + * uncompleted conversion stopped
  38319. + */
  38320. +static int find_start(struct inode *inode, reiser4_plugin_id id, __u64 *offset)
  38321. +{
  38322. + int result;
  38323. + lock_handle lh;
  38324. + coord_t coord;
  38325. + struct unix_file_info *ufo;
  38326. + int found;
  38327. + reiser4_key key;
  38328. +
  38329. + ufo = unix_file_inode_data(inode);
  38330. + init_lh(&lh);
  38331. + result = 0;
  38332. + found = 0;
  38333. + inode_file_plugin(inode)->key_by_inode(inode, *offset, &key);
  38334. + do {
  38335. + init_lh(&lh);
  38336. + result = find_file_item_nohint(&coord, &lh, &key,
  38337. + ZNODE_READ_LOCK, inode);
  38338. +
  38339. + if (result == CBK_COORD_FOUND) {
  38340. + if (coord.between == AT_UNIT) {
  38341. + /*coord_clear_iplug(&coord); */
  38342. + result = zload(coord.node);
  38343. + if (result == 0) {
  38344. + if (item_id_by_coord(&coord) == id)
  38345. + found = 1;
  38346. + else
  38347. + item_plugin_by_coord(&coord)->s.
  38348. + file.append_key(&coord,
  38349. + &key);
  38350. + zrelse(coord.node);
  38351. + }
  38352. + } else
  38353. + result = RETERR(-ENOENT);
  38354. + }
  38355. + done_lh(&lh);
  38356. + } while (result == 0 && !found);
  38357. + *offset = get_key_offset(&key);
  38358. + return result;
  38359. +}
  38360. +
  38361. +/**
  38362. + * tail2extent
  38363. + * @uf_info:
  38364. + *
  38365. + *
  38366. + */
  38367. +int tail2extent(struct unix_file_info *uf_info)
  38368. +{
  38369. + int result;
  38370. + reiser4_key key; /* key of next byte to be moved to page */
  38371. + char *p_data; /* data of page */
  38372. + unsigned page_off = 0, /* offset within the page where to copy data */
  38373. + count; /* number of bytes of item which can be
  38374. + * copied to page */
  38375. + struct page *pages[TAIL2EXTENT_PAGE_NUM];
  38376. + struct page *page;
  38377. + int done; /* set to 1 when all file is read */
  38378. + char *item;
  38379. + int i;
  38380. + struct inode *inode;
  38381. + int first_iteration;
  38382. + int bytes;
  38383. + __u64 offset;
  38384. +
  38385. + assert("nikita-3362", ea_obtained(uf_info));
  38386. + inode = unix_file_info_to_inode(uf_info);
  38387. + assert("nikita-3412", !IS_RDONLY(inode));
  38388. + assert("vs-1649", uf_info->container != UF_CONTAINER_EXTENTS);
  38389. + assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV));
  38390. +
  38391. + offset = 0;
  38392. + first_iteration = 1;
  38393. + result = 0;
  38394. + if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
  38395. + /*
  38396. + * file is marked on disk as there was a conversion which did
  38397. + * not complete due to either crash or some error. Find which
  38398. + * offset tail conversion stopped at
  38399. + */
  38400. + result = find_start(inode, FORMATTING_ID, &offset);
  38401. + if (result == -ENOENT) {
  38402. + /* no tail items found, everything is converted */
  38403. + uf_info->container = UF_CONTAINER_EXTENTS;
  38404. + complete_conversion(inode);
  38405. + return 0;
  38406. + } else if (result != 0)
  38407. + /* some other error */
  38408. + return result;
  38409. + first_iteration = 0;
  38410. + }
  38411. +
  38412. + reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV);
  38413. +
  38414. + /* get key of first byte of a file */
  38415. + inode_file_plugin(inode)->key_by_inode(inode, offset, &key);
  38416. +
  38417. + done = 0;
  38418. + while (done == 0) {
  38419. + memset(pages, 0, sizeof(pages));
  38420. + result = reserve_tail2extent_iteration(inode);
  38421. + if (result != 0) {
  38422. + reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
  38423. + goto out;
  38424. + }
  38425. + if (first_iteration) {
  38426. + reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
  38427. + reiser4_update_sd(inode);
  38428. + first_iteration = 0;
  38429. + }
  38430. + bytes = 0;
  38431. + for (i = 0; i < sizeof_array(pages) && done == 0; i++) {
  38432. + assert("vs-598",
  38433. + (get_key_offset(&key) & ~PAGE_MASK) == 0);
  38434. + page = alloc_page(reiser4_ctx_gfp_mask_get());
  38435. + if (!page) {
  38436. + result = RETERR(-ENOMEM);
  38437. + goto error;
  38438. + }
  38439. +
  38440. + page->index =
  38441. + (unsigned long)(get_key_offset(&key) >>
  38442. + PAGE_SHIFT);
  38443. + /*
  38444. + * usually when one is going to longterm lock znode (as
  38445. + * find_file_item does, for instance) he must not hold
  38446. + * locked pages. However, there is an exception for
  38447. + * case tail2extent. Pages appearing here are not
  38448. + * reachable to everyone else, they are clean, they do
  38449. + * not have jnodes attached so keeping them locked do
  38450. + * not risk deadlock appearance
  38451. + */
  38452. + assert("vs-983", !PagePrivate(page));
  38453. + reiser4_invalidate_pages(inode->i_mapping, page->index,
  38454. + 1, 0);
  38455. +
  38456. + for (page_off = 0; page_off < PAGE_SIZE;) {
  38457. + coord_t coord;
  38458. + lock_handle lh;
  38459. +
  38460. + /* get next item */
  38461. + /* FIXME: we might want to readahead here */
  38462. + init_lh(&lh);
  38463. + result =
  38464. + find_file_item_nohint(&coord, &lh, &key,
  38465. + ZNODE_READ_LOCK,
  38466. + inode);
  38467. + if (result != CBK_COORD_FOUND) {
  38468. + /*
  38469. + * error happened of not items of file
  38470. + * were found
  38471. + */
  38472. + done_lh(&lh);
  38473. + put_page(page);
  38474. + goto error;
  38475. + }
  38476. +
  38477. + if (coord.between == AFTER_UNIT) {
  38478. + /*
  38479. + * end of file is reached. Padd page
  38480. + * with zeros
  38481. + */
  38482. + done_lh(&lh);
  38483. + done = 1;
  38484. + p_data = kmap_atomic(page);
  38485. + memset(p_data + page_off, 0,
  38486. + PAGE_SIZE - page_off);
  38487. + kunmap_atomic(p_data);
  38488. + break;
  38489. + }
  38490. +
  38491. + result = zload(coord.node);
  38492. + if (result) {
  38493. + put_page(page);
  38494. + done_lh(&lh);
  38495. + goto error;
  38496. + }
  38497. + assert("vs-856", coord.between == AT_UNIT);
  38498. + item = ((char *)item_body_by_coord(&coord)) +
  38499. + coord.unit_pos;
  38500. +
  38501. + /* how many bytes to copy */
  38502. + count =
  38503. + item_length_by_coord(&coord) -
  38504. + coord.unit_pos;
  38505. + /* limit length of copy to end of page */
  38506. + if (count > PAGE_SIZE - page_off)
  38507. + count = PAGE_SIZE - page_off;
  38508. +
  38509. + /*
  38510. + * copy item (as much as will fit starting from
  38511. + * the beginning of the item) into the page
  38512. + */
  38513. + p_data = kmap_atomic(page);
  38514. + memcpy(p_data + page_off, item, count);
  38515. + kunmap_atomic(p_data);
  38516. +
  38517. + page_off += count;
  38518. + bytes += count;
  38519. + set_key_offset(&key,
  38520. + get_key_offset(&key) + count);
  38521. +
  38522. + zrelse(coord.node);
  38523. + done_lh(&lh);
  38524. + } /* end of loop which fills one page by content of
  38525. + * formatting items */
  38526. +
  38527. + if (page_off) {
  38528. + /* something was copied into page */
  38529. + pages[i] = page;
  38530. + } else {
  38531. + put_page(page);
  38532. + assert("vs-1648", done == 1);
  38533. + break;
  38534. + }
  38535. + } /* end of loop through pages of one conversion iteration */
  38536. +
  38537. + if (i > 0) {
  38538. + result = replace(inode, pages, i, bytes);
  38539. + release_all_pages(pages, sizeof_array(pages));
  38540. + if (result)
  38541. + goto error;
  38542. + /*
  38543. + * We have to drop exclusive access to avoid deadlock
  38544. + * which may happen because called by reiser4_writepages
  38545. + * capture_unix_file requires to get non-exclusive
  38546. + * access to a file. It is safe to drop EA in the middle
  38547. + * of tail2extent conversion because write_unix_file,
  38548. + * setattr_unix_file(truncate), mmap_unix_file,
  38549. + * release_unix_file(extent2tail) checks if conversion
  38550. + * is not in progress (see comments before
  38551. + * get_exclusive_access_careful().
  38552. + * Other processes that acquire non-exclusive access
  38553. + * (read_unix_file, reiser4_writepages, etc) should work
  38554. + * on partially converted files.
  38555. + */
  38556. + drop_exclusive_access(uf_info);
  38557. + /* throttle the conversion */
  38558. + reiser4_throttle_write(inode);
  38559. + get_exclusive_access(uf_info);
  38560. +
  38561. + /*
  38562. + * nobody is allowed to complete conversion but a
  38563. + * process which started it
  38564. + */
  38565. + assert("", reiser4_inode_get_flag(inode,
  38566. + REISER4_PART_MIXED));
  38567. + }
  38568. + }
  38569. + if (result == 0) {
  38570. + /* file is converted to extent items */
  38571. + reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
  38572. + assert("vs-1697", reiser4_inode_get_flag(inode,
  38573. + REISER4_PART_MIXED));
  38574. +
  38575. + uf_info->container = UF_CONTAINER_EXTENTS;
  38576. + complete_conversion(inode);
  38577. + } else {
  38578. + /*
  38579. + * conversion is not complete. Inode was already marked as
  38580. + * REISER4_PART_MIXED and stat-data were updated at the first
  38581. + * iteration of the loop above.
  38582. + */
  38583. + error:
  38584. + release_all_pages(pages, sizeof_array(pages));
  38585. + reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
  38586. + warning("edward-1548", "Partial conversion of %llu: %i",
  38587. + (unsigned long long)get_inode_oid(inode), result);
  38588. + }
  38589. +
  38590. + out:
  38591. + /* this flag should be cleared, otherwise get_exclusive_access_careful()
  38592. + will fall into infinite loop */
  38593. + assert("edward-1549", !reiser4_inode_get_flag(inode,
  38594. + REISER4_PART_IN_CONV));
  38595. + return result;
  38596. +}
  38597. +
  38598. +static int reserve_extent2tail_iteration(struct inode *inode)
  38599. +{
  38600. + reiser4_tree *tree;
  38601. +
  38602. + tree = reiser4_tree_by_inode(inode);
  38603. + /*
  38604. + * reserve blocks for (in this order):
  38605. + *
  38606. + * 1. removal of extent item
  38607. + *
  38608. + * 2. insertion of tail by insert_flow()
  38609. + *
  38610. + * 3. drilling to the leaf level by coord_by_key()
  38611. + *
  38612. + * 4. possible update of stat-data
  38613. + */
  38614. + grab_space_enable();
  38615. + return reiser4_grab_space
  38616. + (estimate_one_item_removal(tree) +
  38617. + estimate_insert_flow(tree->height) +
  38618. + 1 + estimate_one_insert_item(tree) +
  38619. + inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT);
  38620. +}
  38621. +
  38622. +/* for every page of file: read page, cut part of extent pointing to this page,
  38623. + put data of page tree by tail item */
  38624. +int extent2tail(struct file * file, struct unix_file_info *uf_info)
  38625. +{
  38626. + int result;
  38627. + struct inode *inode;
  38628. + struct page *page;
  38629. + unsigned long num_pages, i;
  38630. + unsigned long start_page;
  38631. + reiser4_key from;
  38632. + reiser4_key to;
  38633. + unsigned count;
  38634. + __u64 offset;
  38635. +
  38636. + assert("nikita-3362", ea_obtained(uf_info));
  38637. + inode = unix_file_info_to_inode(uf_info);
  38638. + assert("nikita-3412", !IS_RDONLY(inode));
  38639. + assert("vs-1649", uf_info->container != UF_CONTAINER_TAILS);
  38640. + assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV));
  38641. +
  38642. + offset = 0;
  38643. + if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
  38644. + /*
  38645. + * file is marked on disk as there was a conversion which did
  38646. + * not complete due to either crash or some error. Find which
  38647. + * offset tail conversion stopped at
  38648. + */
  38649. + result = find_start(inode, EXTENT_POINTER_ID, &offset);
  38650. + if (result == -ENOENT) {
  38651. + /* no extent found, everything is converted */
  38652. + uf_info->container = UF_CONTAINER_TAILS;
  38653. + complete_conversion(inode);
  38654. + return 0;
  38655. + } else if (result != 0)
  38656. + /* some other error */
  38657. + return result;
  38658. + }
  38659. + reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV);
  38660. +
  38661. + /* number of pages in the file */
  38662. + num_pages =
  38663. + (inode->i_size + - offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
  38664. + start_page = offset >> PAGE_SHIFT;
  38665. +
  38666. + inode_file_plugin(inode)->key_by_inode(inode, offset, &from);
  38667. + to = from;
  38668. +
  38669. + result = 0;
  38670. + for (i = 0; i < num_pages; i++) {
  38671. + __u64 start_byte;
  38672. +
  38673. + result = reserve_extent2tail_iteration(inode);
  38674. + if (result != 0)
  38675. + break;
  38676. + if (i == 0 && offset == 0) {
  38677. + reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
  38678. + reiser4_update_sd(inode);
  38679. + }
  38680. +
  38681. + page = read_mapping_page(inode->i_mapping,
  38682. + (unsigned)(i + start_page), NULL);
  38683. + if (IS_ERR(page)) {
  38684. + result = PTR_ERR(page);
  38685. + warning("edward-1569",
  38686. + "Can not read page %lu of %lu: %i",
  38687. + i, num_pages, result);
  38688. + break;
  38689. + }
  38690. +
  38691. + wait_on_page_locked(page);
  38692. +
  38693. + if (!PageUptodate(page)) {
  38694. + put_page(page);
  38695. + result = RETERR(-EIO);
  38696. + break;
  38697. + }
  38698. +
  38699. + /* cut part of file we have read */
  38700. + start_byte = (__u64) ((i + start_page) << PAGE_SHIFT);
  38701. + set_key_offset(&from, start_byte);
  38702. + set_key_offset(&to, start_byte + PAGE_SIZE - 1);
  38703. + /*
  38704. + * reiser4_cut_tree_object() returns -E_REPEAT to allow atom
  38705. + * commits during over-long truncates. But
  38706. + * extent->tail conversion should be performed in one
  38707. + * transaction.
  38708. + */
  38709. + result = reiser4_cut_tree(reiser4_tree_by_inode(inode), &from,
  38710. + &to, inode, 0);
  38711. +
  38712. + if (result) {
  38713. + put_page(page);
  38714. + warning("edward-1570",
  38715. + "Can not delete converted chunk: %i",
  38716. + result);
  38717. + break;
  38718. + }
  38719. +
  38720. + /* put page data into tree via tail_write */
  38721. + count = PAGE_SIZE;
  38722. + if ((i == (num_pages - 1)) &&
  38723. + (inode->i_size & ~PAGE_MASK))
  38724. + /* last page can be incompleted */
  38725. + count = (inode->i_size & ~PAGE_MASK);
  38726. + while (count) {
  38727. + loff_t pos = start_byte;
  38728. +
  38729. + assert("edward-1537",
  38730. + file != NULL && file->f_path.dentry != NULL);
  38731. + assert("edward-1538",
  38732. + file_inode(file) == inode);
  38733. +
  38734. + result = reiser4_write_tail_noreserve(file, inode,
  38735. + (char __user *)kmap(page),
  38736. + count, &pos);
  38737. + kunmap(page);
  38738. + /* FIXME:
  38739. + may be put_file_hint() instead ? */
  38740. + reiser4_free_file_fsdata(file);
  38741. + if (result <= 0) {
  38742. + /*
  38743. + * Unsuccess in critical place:
  38744. + * extent has been removed,
  38745. + * but tail hasn't been created
  38746. + */
  38747. + warning("edward-1571",
  38748. + "Report the error code %i to developers. Run FSCK",
  38749. + result);
  38750. + put_page(page);
  38751. + reiser4_inode_clr_flag(inode,
  38752. + REISER4_PART_IN_CONV);
  38753. + return result;
  38754. + }
  38755. + count -= result;
  38756. + }
  38757. +
  38758. + /* release page */
  38759. + lock_page(page);
  38760. + /* page is already detached from jnode and mapping. */
  38761. + assert("vs-1086", page->mapping == NULL);
  38762. + assert("nikita-2690",
  38763. + (!PagePrivate(page) && jprivate(page) == 0));
  38764. + /* waiting for writeback completion with page lock held is
  38765. + * perfectly valid. */
  38766. + wait_on_page_writeback(page);
  38767. + reiser4_drop_page(page);
  38768. + /* release reference taken by read_cache_page() above */
  38769. + put_page(page);
  38770. +
  38771. + drop_exclusive_access(uf_info);
  38772. + /* throttle the conversion */
  38773. + reiser4_throttle_write(inode);
  38774. + get_exclusive_access(uf_info);
  38775. + /*
  38776. + * nobody is allowed to complete conversion but a process which
  38777. + * started it
  38778. + */
  38779. + assert("", reiser4_inode_get_flag(inode, REISER4_PART_MIXED));
  38780. + }
  38781. +
  38782. + reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
  38783. +
  38784. + if (i == num_pages) {
  38785. + /* file is converted to formatted items */
  38786. + assert("vs-1698", reiser4_inode_get_flag(inode,
  38787. + REISER4_PART_MIXED));
  38788. + assert("vs-1260",
  38789. + inode_has_no_jnodes(reiser4_inode_data(inode)));
  38790. +
  38791. + uf_info->container = UF_CONTAINER_TAILS;
  38792. + complete_conversion(inode);
  38793. + return 0;
  38794. + }
  38795. + /*
  38796. + * conversion is not complete. Inode was already marked as
  38797. + * REISER4_PART_MIXED and stat-data were updated at the first
  38798. + * iteration of the loop above.
  38799. + */
  38800. + warning("nikita-2282",
  38801. + "Partial conversion of %llu: %lu of %lu: %i",
  38802. + (unsigned long long)get_inode_oid(inode), i,
  38803. + num_pages, result);
  38804. +
  38805. + /* this flag should be cleared, otherwise get_exclusive_access_careful()
  38806. + will fall into infinite loop */
  38807. + assert("edward-1550", !reiser4_inode_get_flag(inode,
  38808. + REISER4_PART_IN_CONV));
  38809. + return result;
  38810. +}
  38811. +
  38812. +/*
  38813. + * Local variables:
  38814. + * c-indentation-style: "K&R"
  38815. + * mode-name: "LC"
  38816. + * c-basic-offset: 8
  38817. + * tab-width: 8
  38818. + * fill-column: 79
  38819. + * scroll-step: 1
  38820. + * End:
  38821. + */
  38822. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file_ops.c linux-5.16.14/fs/reiser4/plugin/file_ops.c
  38823. --- linux-5.16.14.orig/fs/reiser4/plugin/file_ops.c 1970-01-01 01:00:00.000000000 +0100
  38824. +++ linux-5.16.14/fs/reiser4/plugin/file_ops.c 2022-03-12 13:26:19.672892777 +0100
  38825. @@ -0,0 +1,119 @@
  38826. +/* Copyright 2005 by Hans Reiser, licensing governed by
  38827. + reiser4/README */
  38828. +
  38829. +/* this file contains typical implementations for some of methods of
  38830. + struct file_operations and of struct address_space_operations
  38831. +*/
  38832. +
  38833. +#include "../inode.h"
  38834. +#include "object.h"
  38835. +
  38836. +/* file operations */
  38837. +
  38838. +/* implementation of vfs's llseek method of struct file_operations for
  38839. + typical directory can be found in file_ops_readdir.c
  38840. +*/
  38841. +loff_t reiser4_llseek_dir_common(struct file *, loff_t, int origin);
  38842. +
  38843. +/* implementation of vfs's iterate method of struct file_operations for
  38844. + typical directory can be found in file_ops_readdir.c
  38845. +*/
  38846. +int reiser4_iterate_common(struct file *, struct dir_context *);
  38847. +
  38848. +/**
  38849. + * reiser4_release_dir_common - release of struct file_operations
  38850. + * @inode: inode of released file
  38851. + * @file: file to release
  38852. + *
  38853. + * Implementation of release method of struct file_operations for typical
  38854. + * directory. All it does is freeing of reiser4 specific file data.
  38855. +*/
  38856. +int reiser4_release_dir_common(struct inode *inode, struct file *file)
  38857. +{
  38858. + reiser4_context *ctx;
  38859. +
  38860. + ctx = reiser4_init_context(inode->i_sb);
  38861. + if (IS_ERR(ctx))
  38862. + return PTR_ERR(ctx);
  38863. + reiser4_free_file_fsdata(file);
  38864. + reiser4_exit_context(ctx);
  38865. + return 0;
  38866. +}
  38867. +
  38868. +/* this is common implementation of vfs's fsync method of struct
  38869. + file_operations
  38870. +*/
  38871. +int reiser4_sync_common(struct file *file, loff_t start,
  38872. + loff_t end, int datasync)
  38873. +{
  38874. + reiser4_context *ctx;
  38875. + int result;
  38876. + struct dentry *dentry = file->f_path.dentry;
  38877. +
  38878. + ctx = reiser4_init_context(dentry->d_inode->i_sb);
  38879. + if (IS_ERR(ctx))
  38880. + return PTR_ERR(ctx);
  38881. + result = txnmgr_force_commit_all(dentry->d_inode->i_sb, 0);
  38882. +
  38883. + context_set_commit_async(ctx);
  38884. + reiser4_exit_context(ctx);
  38885. + return result;
  38886. +}
  38887. +
  38888. +/*
  38889. + * common sync method for regular files.
  38890. + *
  38891. + * We are trying to be smart here. Instead of committing all atoms (original
  38892. + * solution), we scan dirty pages of this file and commit all atoms they are
  38893. + * part of.
  38894. + *
  38895. + * Situation is complicated by anonymous pages: i.e., extent-less pages
  38896. + * dirtied through mmap. Fortunately sys_fsync() first calls
  38897. + * filemap_fdatawrite() that will ultimately call reiser4_writepages_dispatch,
  38898. + * insert all missing extents and capture anonymous pages.
  38899. + */
  38900. +int reiser4_sync_file_common(struct file *file, loff_t start, loff_t end, int datasync)
  38901. +{
  38902. + reiser4_context *ctx;
  38903. + txn_atom *atom;
  38904. + reiser4_block_nr reserve;
  38905. + struct dentry *dentry = file->f_path.dentry;
  38906. + struct inode *inode = file->f_mapping->host;
  38907. +
  38908. + int err = filemap_write_and_wait_range(file->f_mapping->host->i_mapping, start, end);
  38909. + if (err)
  38910. + return err;
  38911. +
  38912. + ctx = reiser4_init_context(dentry->d_inode->i_sb);
  38913. + if (IS_ERR(ctx))
  38914. + return PTR_ERR(ctx);
  38915. +
  38916. + inode_lock(inode);
  38917. +
  38918. + reserve = estimate_update_common(dentry->d_inode);
  38919. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
  38920. + reiser4_exit_context(ctx);
  38921. + inode_unlock(inode);
  38922. + return RETERR(-ENOSPC);
  38923. + }
  38924. + write_sd_by_inode_common(dentry->d_inode);
  38925. +
  38926. + atom = get_current_atom_locked();
  38927. + spin_lock_txnh(ctx->trans);
  38928. + force_commit_atom(ctx->trans);
  38929. + reiser4_exit_context(ctx);
  38930. + inode_unlock(inode);
  38931. +
  38932. + return 0;
  38933. +}
  38934. +
  38935. +/*
  38936. + * Local variables:
  38937. + * c-indentation-style: "K&R"
  38938. + * mode-name: "LC"
  38939. + * c-basic-offset: 8
  38940. + * tab-width: 8
  38941. + * fill-column: 79
  38942. + * scroll-step: 1
  38943. + * End:
  38944. + */
  38945. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file_ops_readdir.c linux-5.16.14/fs/reiser4/plugin/file_ops_readdir.c
  38946. --- linux-5.16.14.orig/fs/reiser4/plugin/file_ops_readdir.c 1970-01-01 01:00:00.000000000 +0100
  38947. +++ linux-5.16.14/fs/reiser4/plugin/file_ops_readdir.c 2022-03-12 13:26:19.673892780 +0100
  38948. @@ -0,0 +1,659 @@
  38949. +/* Copyright 2005 by Hans Reiser, licensing governed by
  38950. + * reiser4/README */
  38951. +
  38952. +#include <linux/iversion.h>
  38953. +#include "../inode.h"
  38954. +
  38955. +/* return true, iff @coord points to the valid directory item that is part of
  38956. + * @inode directory. */
  38957. +static int is_valid_dir_coord(struct inode *inode, coord_t *coord)
  38958. +{
  38959. + return plugin_of_group(item_plugin_by_coord(coord),
  38960. + DIR_ENTRY_ITEM_TYPE) &&
  38961. + inode_file_plugin(inode)->owns_item(inode, coord);
  38962. +}
  38963. +
  38964. +/* compare two logical positions within the same directory */
  38965. +static cmp_t dir_pos_cmp(const struct dir_pos *p1, const struct dir_pos *p2)
  38966. +{
  38967. + cmp_t result;
  38968. +
  38969. + assert("nikita-2534", p1 != NULL);
  38970. + assert("nikita-2535", p2 != NULL);
  38971. +
  38972. + result = de_id_cmp(&p1->dir_entry_key, &p2->dir_entry_key);
  38973. + if (result == EQUAL_TO) {
  38974. + int diff;
  38975. +
  38976. + diff = p1->pos - p2->pos;
  38977. + result =
  38978. + (diff < 0) ? LESS_THAN : (diff ? GREATER_THAN : EQUAL_TO);
  38979. + }
  38980. + return result;
  38981. +}
  38982. +
  38983. +/* see comment before reiser4_readdir_common() for overview of why "adjustment"
  38984. + * is necessary. */
  38985. +static void
  38986. +adjust_dir_pos(struct file *dir, struct readdir_pos *readdir_spot,
  38987. + const struct dir_pos *mod_point, int adj)
  38988. +{
  38989. + struct dir_pos *pos;
  38990. +
  38991. + /*
  38992. + * new directory entry was added (adj == +1) or removed (adj == -1) at
  38993. + * the @mod_point. Directory file descriptor @dir is doing readdir and
  38994. + * is currently positioned at @readdir_spot. Latter has to be updated
  38995. + * to maintain stable readdir.
  38996. + */
  38997. + /* directory is positioned to the beginning. */
  38998. + if (readdir_spot->entry_no == 0)
  38999. + return;
  39000. +
  39001. + pos = &readdir_spot->position;
  39002. + switch (dir_pos_cmp(mod_point, pos)) {
  39003. + case LESS_THAN:
  39004. + /* @mod_pos is _before_ @readdir_spot, that is, entry was
  39005. + * added/removed on the left (in key order) of current
  39006. + * position. */
  39007. + /* logical number of directory entry readdir is "looking" at
  39008. + * changes */
  39009. + readdir_spot->entry_no += adj;
  39010. + assert("nikita-2577",
  39011. + ergo(dir != NULL,
  39012. + reiser4_get_dir_fpos(dir, dir->f_pos) + adj >= 0));
  39013. + if (de_id_cmp(&pos->dir_entry_key,
  39014. + &mod_point->dir_entry_key) == EQUAL_TO) {
  39015. + assert("nikita-2575", mod_point->pos < pos->pos);
  39016. + /*
  39017. + * if entry added/removed has the same key as current
  39018. + * for readdir, update counter of duplicate keys in
  39019. + * @readdir_spot.
  39020. + */
  39021. + pos->pos += adj;
  39022. + }
  39023. + break;
  39024. + case GREATER_THAN:
  39025. + /* directory is modified after @pos: nothing to do. */
  39026. + break;
  39027. + case EQUAL_TO:
  39028. + /* cannot insert an entry readdir is looking at, because it
  39029. + already exists. */
  39030. + assert("nikita-2576", adj < 0);
  39031. + /* directory entry to which @pos points to is being
  39032. + removed.
  39033. +
  39034. + NOTE-NIKITA: Right thing to do is to update @pos to point
  39035. + to the next entry. This is complex (we are under spin-lock
  39036. + for one thing). Just rewind it to the beginning. Next
  39037. + readdir will have to scan the beginning of
  39038. + directory. Proper solution is to use semaphore in
  39039. + spin lock's stead and use rewind_right() here.
  39040. +
  39041. + NOTE-NIKITA: now, semaphore is used, so...
  39042. + */
  39043. + memset(readdir_spot, 0, sizeof *readdir_spot);
  39044. + }
  39045. +}
  39046. +
  39047. +/* scan all file-descriptors for this directory and adjust their
  39048. + positions respectively. Should be used by implementations of
  39049. + add_entry and rem_entry of dir plugin */
  39050. +void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de,
  39051. + int offset, int adj)
  39052. +{
  39053. + reiser4_file_fsdata *scan;
  39054. + struct dir_pos mod_point;
  39055. +
  39056. + assert("nikita-2536", dir != NULL);
  39057. + assert("nikita-2538", de != NULL);
  39058. + assert("nikita-2539", adj != 0);
  39059. +
  39060. + build_de_id(dir, &de->d_name, &mod_point.dir_entry_key);
  39061. + mod_point.pos = offset;
  39062. +
  39063. + spin_lock_inode(dir);
  39064. +
  39065. + /*
  39066. + * new entry was added/removed in directory @dir. Scan all file
  39067. + * descriptors for @dir that are currently involved into @readdir and
  39068. + * update them.
  39069. + */
  39070. +
  39071. + list_for_each_entry(scan, get_readdir_list(dir), dir.linkage)
  39072. + adjust_dir_pos(scan->back, &scan->dir.readdir, &mod_point, adj);
  39073. +
  39074. + spin_unlock_inode(dir);
  39075. +}
  39076. +
  39077. +/*
  39078. + * traverse tree to start/continue readdir from the readdir position @pos.
  39079. + */
  39080. +static int dir_go_to(struct file *dir, struct readdir_pos *pos, tap_t *tap)
  39081. +{
  39082. + reiser4_key key;
  39083. + int result;
  39084. + struct inode *inode;
  39085. +
  39086. + assert("nikita-2554", pos != NULL);
  39087. +
  39088. + inode = file_inode(dir);
  39089. + result = inode_dir_plugin(inode)->build_readdir_key(dir, &key);
  39090. + if (result != 0)
  39091. + return result;
  39092. + result = reiser4_object_lookup(inode,
  39093. + &key,
  39094. + tap->coord,
  39095. + tap->lh,
  39096. + tap->mode,
  39097. + FIND_EXACT,
  39098. + LEAF_LEVEL, LEAF_LEVEL,
  39099. + 0, &tap->ra_info);
  39100. + if (result == CBK_COORD_FOUND)
  39101. + result = rewind_right(tap, (int)pos->position.pos);
  39102. + else {
  39103. + tap->coord->node = NULL;
  39104. + done_lh(tap->lh);
  39105. + result = RETERR(-EIO);
  39106. + }
  39107. + return result;
  39108. +}
  39109. +
  39110. +/*
  39111. + * handling of non-unique keys: calculate at what ordinal position within
  39112. + * sequence of directory items with identical keys @pos is.
  39113. + */
  39114. +static int set_pos(struct inode *inode, struct readdir_pos *pos, tap_t *tap)
  39115. +{
  39116. + int result;
  39117. + coord_t coord;
  39118. + lock_handle lh;
  39119. + tap_t scan;
  39120. + de_id *did;
  39121. + reiser4_key de_key;
  39122. +
  39123. + coord_init_zero(&coord);
  39124. + init_lh(&lh);
  39125. + reiser4_tap_init(&scan, &coord, &lh, ZNODE_READ_LOCK);
  39126. + reiser4_tap_copy(&scan, tap);
  39127. + reiser4_tap_load(&scan);
  39128. + pos->position.pos = 0;
  39129. +
  39130. + did = &pos->position.dir_entry_key;
  39131. +
  39132. + if (is_valid_dir_coord(inode, scan.coord)) {
  39133. +
  39134. + build_de_id_by_key(unit_key_by_coord(scan.coord, &de_key), did);
  39135. +
  39136. + while (1) {
  39137. +
  39138. + result = go_prev_unit(&scan);
  39139. + if (result != 0)
  39140. + break;
  39141. +
  39142. + if (!is_valid_dir_coord(inode, scan.coord)) {
  39143. + result = -EINVAL;
  39144. + break;
  39145. + }
  39146. +
  39147. + /* get key of directory entry */
  39148. + unit_key_by_coord(scan.coord, &de_key);
  39149. + if (de_id_key_cmp(did, &de_key) != EQUAL_TO) {
  39150. + /* duplicate-sequence is over */
  39151. + break;
  39152. + }
  39153. + pos->position.pos++;
  39154. + }
  39155. + } else
  39156. + result = RETERR(-ENOENT);
  39157. + reiser4_tap_relse(&scan);
  39158. + reiser4_tap_done(&scan);
  39159. + return result;
  39160. +}
  39161. +
  39162. +/*
  39163. + * "rewind" directory to @offset, i.e., set @pos and @tap correspondingly.
  39164. + */
  39165. +static int dir_rewind(struct file *dir, loff_t *fpos, struct readdir_pos *pos, tap_t *tap)
  39166. +{
  39167. + __u64 destination;
  39168. + __s64 shift;
  39169. + int result;
  39170. + struct inode *inode;
  39171. + loff_t dirpos;
  39172. +
  39173. + assert("nikita-2553", dir != NULL);
  39174. + assert("nikita-2548", pos != NULL);
  39175. + assert("nikita-2551", tap->coord != NULL);
  39176. + assert("nikita-2552", tap->lh != NULL);
  39177. +
  39178. + dirpos = reiser4_get_dir_fpos(dir, *fpos);
  39179. + shift = dirpos - pos->fpos;
  39180. + /* this is logical directory entry within @dir which we are rewinding
  39181. + * to */
  39182. + destination = pos->entry_no + shift;
  39183. +
  39184. + inode = file_inode(dir);
  39185. + if (dirpos < 0)
  39186. + return RETERR(-EINVAL);
  39187. + else if (destination == 0ll || dirpos == 0) {
  39188. + /* rewind to the beginning of directory */
  39189. + memset(pos, 0, sizeof *pos);
  39190. + return dir_go_to(dir, pos, tap);
  39191. + } else if (destination >= inode->i_size)
  39192. + return RETERR(-ENOENT);
  39193. +
  39194. + if (shift < 0) {
  39195. + /* I am afraid of negative numbers */
  39196. + shift = -shift;
  39197. + /* rewinding to the left */
  39198. + if (shift <= (int)pos->position.pos) {
  39199. + /* destination is within sequence of entries with
  39200. + duplicate keys. */
  39201. + result = dir_go_to(dir, pos, tap);
  39202. + } else {
  39203. + shift -= pos->position.pos;
  39204. + while (1) {
  39205. + /* repetitions: deadlock is possible when
  39206. + going to the left. */
  39207. + result = dir_go_to(dir, pos, tap);
  39208. + if (result == 0) {
  39209. + result = rewind_left(tap, shift);
  39210. + if (result == -E_DEADLOCK) {
  39211. + reiser4_tap_done(tap);
  39212. + continue;
  39213. + }
  39214. + }
  39215. + break;
  39216. + }
  39217. + }
  39218. + } else {
  39219. + /* rewinding to the right */
  39220. + result = dir_go_to(dir, pos, tap);
  39221. + if (result == 0)
  39222. + result = rewind_right(tap, shift);
  39223. + }
  39224. + if (result == 0) {
  39225. + result = set_pos(inode, pos, tap);
  39226. + if (result == 0) {
  39227. + /* update pos->position.pos */
  39228. + pos->entry_no = destination;
  39229. + pos->fpos = dirpos;
  39230. + }
  39231. + }
  39232. + return result;
  39233. +}
  39234. +
  39235. +/*
  39236. + * Function that is called by common_readdir() on each directory entry while
  39237. + * doing readdir. ->filldir callback may block, so we had to release long term
  39238. + * lock while calling it. To avoid repeating tree traversal, seal is used. If
  39239. + * seal is broken, we return -E_REPEAT. Node is unlocked in this case.
  39240. + *
  39241. + * Whether node is unlocked in case of any other error is undefined. It is
  39242. + * guaranteed to be still locked if success (0) is returned.
  39243. + *
  39244. + * When ->filldir() wants no more, feed_entry() returns 1, and node is
  39245. + * unlocked.
  39246. + */
  39247. +static int
  39248. +feed_entry(tap_t *tap, struct dir_context *context)
  39249. +{
  39250. + item_plugin *iplug;
  39251. + char *name;
  39252. + reiser4_key sd_key;
  39253. + int result;
  39254. + char buf[DE_NAME_BUF_LEN];
  39255. + char name_buf[32];
  39256. + char *local_name;
  39257. + unsigned file_type;
  39258. + seal_t seal;
  39259. + coord_t *coord;
  39260. + reiser4_key entry_key;
  39261. +
  39262. + coord = tap->coord;
  39263. + iplug = item_plugin_by_coord(coord);
  39264. +
  39265. + /* pointer to name within the node */
  39266. + name = iplug->s.dir.extract_name(coord, buf);
  39267. + assert("nikita-1371", name != NULL);
  39268. +
  39269. + /* key of object the entry points to */
  39270. + if (iplug->s.dir.extract_key(coord, &sd_key) != 0)
  39271. + return RETERR(-EIO);
  39272. +
  39273. + /* we must release longterm znode lock before calling filldir to avoid
  39274. + deadlock which may happen if filldir causes page fault. So, copy
  39275. + name to intermediate buffer */
  39276. + if (strlen(name) + 1 > sizeof(name_buf)) {
  39277. + local_name = kmalloc(strlen(name) + 1,
  39278. + reiser4_ctx_gfp_mask_get());
  39279. + if (local_name == NULL)
  39280. + return RETERR(-ENOMEM);
  39281. + } else
  39282. + local_name = name_buf;
  39283. +
  39284. + strcpy(local_name, name);
  39285. + file_type = iplug->s.dir.extract_file_type(coord);
  39286. +
  39287. + unit_key_by_coord(coord, &entry_key);
  39288. + reiser4_seal_init(&seal, coord, &entry_key);
  39289. +
  39290. + longterm_unlock_znode(tap->lh);
  39291. +
  39292. + /*
  39293. + * send information about directory entry to the ->filldir() filler
  39294. + * supplied to us by caller (VFS).
  39295. + *
  39296. + * ->filldir is entitled to do weird things. For example, ->filldir
  39297. + * supplied by knfsd re-enters file system. Make sure no locks are
  39298. + * held.
  39299. + */
  39300. + assert("nikita-3436", lock_stack_isclean(get_current_lock_stack()));
  39301. +
  39302. + reiser4_txn_restart_current();
  39303. + if (!dir_emit(context, name, (int)strlen(name),
  39304. + /* inode number of object bounden by this entry */
  39305. + oid_to_uino(get_key_objectid(&sd_key)), file_type))
  39306. + /* ->filldir() is satisfied. (no space in buffer, IOW) */
  39307. + result = 1;
  39308. + else
  39309. + result = reiser4_seal_validate(&seal, coord, &entry_key,
  39310. + tap->lh, tap->mode,
  39311. + ZNODE_LOCK_HIPRI);
  39312. +
  39313. + if (local_name != name_buf)
  39314. + kfree(local_name);
  39315. +
  39316. + return result;
  39317. +}
  39318. +
  39319. +static void move_entry(struct readdir_pos *pos, coord_t *coord)
  39320. +{
  39321. + reiser4_key de_key;
  39322. + de_id *did;
  39323. +
  39324. + /* update @pos */
  39325. + ++pos->entry_no;
  39326. + did = &pos->position.dir_entry_key;
  39327. +
  39328. + /* get key of directory entry */
  39329. + unit_key_by_coord(coord, &de_key);
  39330. +
  39331. + if (de_id_key_cmp(did, &de_key) == EQUAL_TO)
  39332. + /* we are within sequence of directory entries
  39333. + with duplicate keys. */
  39334. + ++pos->position.pos;
  39335. + else {
  39336. + pos->position.pos = 0;
  39337. + build_de_id_by_key(&de_key, did);
  39338. + }
  39339. + ++pos->fpos;
  39340. +}
  39341. +
  39342. +/*
  39343. + * STATELESS READDIR
  39344. + *
  39345. + * readdir support in reiser4 relies on ability to update readdir_pos embedded
  39346. + * into reiser4_file_fsdata on each directory modification (name insertion and
  39347. + * removal), see reiser4_readdir_common() function below. This obviously doesn't
  39348. + * work when reiser4 is accessed over NFS, because NFS doesn't keep any state
  39349. + * across client READDIR requests for the same directory.
  39350. + *
  39351. + * To address this we maintain a "pool" of detached reiser4_file_fsdata
  39352. + * (d_cursor). Whenever NFS readdir request comes, we detect this, and try to
  39353. + * find detached reiser4_file_fsdata corresponding to previous readdir
  39354. + * request. In other words, additional state is maintained on the
  39355. + * server. (This is somewhat contrary to the design goals of NFS protocol.)
  39356. + *
  39357. + * To efficiently detect when our ->readdir() method is called by NFS server,
  39358. + * dentry is marked as "stateless" in reiser4_decode_fh() (this is checked by
  39359. + * file_is_stateless() function).
  39360. + *
  39361. + * To find out d_cursor in the pool, we encode client id (cid) in the highest
  39362. + * bits of NFS readdir cookie: when first readdir request comes to the given
  39363. + * directory from the given client, cookie is set to 0. This situation is
  39364. + * detected, global cid_counter is incremented, and stored in highest bits of
  39365. + * all direntry offsets returned to the client, including last one. As the
  39366. + * only valid readdir cookie is one obtained as direntry->offset, we are
  39367. + * guaranteed that next readdir request (continuing current one) will have
  39368. + * current cid in the highest bits of starting readdir cookie. All d_cursors
  39369. + * are hashed into per-super-block hash table by (oid, cid) key.
  39370. + *
  39371. + * In addition d_cursors are placed into per-super-block radix tree where they
  39372. + * are keyed by oid alone. This is necessary to efficiently remove them during
  39373. + * rmdir.
  39374. + *
  39375. + * At last, currently unused d_cursors are linked into special list. This list
  39376. + * is used d_cursor_shrink to reclaim d_cursors on memory pressure.
  39377. + *
  39378. + */
  39379. +
  39380. +/*
  39381. + * prepare for readdir.
  39382. + *
  39383. + * NOTE: @f->f_pos may be out-of-date (iterate() vs readdir()).
  39384. + * @fpos is effective position.
  39385. + */
  39386. +static int dir_readdir_init(struct file *f, loff_t* fpos, tap_t *tap,
  39387. + struct readdir_pos **pos)
  39388. +{
  39389. + struct inode *inode;
  39390. + reiser4_file_fsdata *fsdata;
  39391. + int result;
  39392. +
  39393. + assert("nikita-1359", f != NULL);
  39394. + inode = file_inode(f);
  39395. + assert("nikita-1360", inode != NULL);
  39396. +
  39397. + if (!S_ISDIR(inode->i_mode))
  39398. + return RETERR(-ENOTDIR);
  39399. +
  39400. + /* try to find detached readdir state */
  39401. + result = reiser4_attach_fsdata(f, fpos, inode);
  39402. + if (result != 0)
  39403. + return result;
  39404. +
  39405. + fsdata = reiser4_get_file_fsdata(f);
  39406. + assert("nikita-2571", fsdata != NULL);
  39407. + if (IS_ERR(fsdata))
  39408. + return PTR_ERR(fsdata);
  39409. +
  39410. + /* add file descriptor to the readdir list hanging of directory
  39411. + * inode. This list is used to scan "readdirs-in-progress" while
  39412. + * inserting or removing names in the directory. */
  39413. + spin_lock_inode(inode);
  39414. + if (list_empty_careful(&fsdata->dir.linkage))
  39415. + list_add(&fsdata->dir.linkage, get_readdir_list(inode));
  39416. + *pos = &fsdata->dir.readdir;
  39417. + spin_unlock_inode(inode);
  39418. +
  39419. + /* move @tap to the current position */
  39420. + return dir_rewind(f, fpos, *pos, tap);
  39421. +}
  39422. +
  39423. +/* this is implementation of vfs's llseek method of struct file_operations for
  39424. + typical directory
  39425. + See comment before reiser4_iterate_common() for explanation.
  39426. +*/
  39427. +loff_t reiser4_llseek_dir_common(struct file *file, loff_t off, int origin)
  39428. +{
  39429. + reiser4_context *ctx;
  39430. + loff_t result;
  39431. + struct inode *inode;
  39432. +
  39433. + inode = file_inode(file);
  39434. +
  39435. + ctx = reiser4_init_context(inode->i_sb);
  39436. + if (IS_ERR(ctx))
  39437. + return PTR_ERR(ctx);
  39438. +
  39439. + inode_lock(inode);
  39440. +
  39441. + /* update ->f_pos */
  39442. + result = default_llseek_unlocked(file, off, origin);
  39443. + if (result >= 0) {
  39444. + int ff;
  39445. + coord_t coord;
  39446. + lock_handle lh;
  39447. + tap_t tap;
  39448. + struct readdir_pos *pos;
  39449. +
  39450. + coord_init_zero(&coord);
  39451. + init_lh(&lh);
  39452. + reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
  39453. +
  39454. + ff = dir_readdir_init(file, &file->f_pos, &tap, &pos);
  39455. + reiser4_detach_fsdata(file);
  39456. + if (ff != 0)
  39457. + result = (loff_t) ff;
  39458. + reiser4_tap_done(&tap);
  39459. + }
  39460. + reiser4_detach_fsdata(file);
  39461. + inode_unlock(inode);
  39462. +
  39463. + reiser4_exit_context(ctx);
  39464. + return result;
  39465. +}
  39466. +
  39467. +/* this is common implementation of vfs's readdir method of struct
  39468. + file_operations
  39469. +
  39470. + readdir problems:
  39471. +
  39472. + readdir(2)/getdents(2) interface is based on implicit assumption that
  39473. + readdir can be restarted from any particular point by supplying file system
  39474. + with off_t-full of data. That is, file system fills ->d_off field in struct
  39475. + dirent and later user passes ->d_off to the seekdir(3), which is, actually,
  39476. + implemented by glibc as lseek(2) on directory.
  39477. +
  39478. + Reiser4 cannot restart readdir from 64 bits of data, because two last
  39479. + components of the key of directory entry are unknown, which given 128 bits:
  39480. + locality and type fields in the key of directory entry are always known, to
  39481. + start readdir() from given point objectid and offset fields have to be
  39482. + filled.
  39483. +
  39484. + Traditional UNIX API for scanning through directory
  39485. + (readdir/seekdir/telldir/opendir/closedir/rewindir/getdents) is based on the
  39486. + assumption that directory is structured very much like regular file, in
  39487. + particular, it is implied that each name within given directory (directory
  39488. + entry) can be uniquely identified by scalar offset and that such offset is
  39489. + stable across the life-time of the name is identifies.
  39490. +
  39491. + This is manifestly not so for reiser4. In reiser4 the only stable unique
  39492. + identifies for the directory entry is its key that doesn't fit into
  39493. + seekdir/telldir API.
  39494. +
  39495. + solution:
  39496. +
  39497. + Within each file descriptor participating in readdir-ing of directory
  39498. + plugin/dir/dir.h:readdir_pos is maintained. This structure keeps track of
  39499. + the "current" directory entry that file descriptor looks at. It contains a
  39500. + key of directory entry (plus some additional info to deal with non-unique
  39501. + keys that we wouldn't dwell onto here) and a logical position of this
  39502. + directory entry starting from the beginning of the directory, that is
  39503. + ordinal number of this entry in the readdir order.
  39504. +
  39505. + Obviously this logical position is not stable in the face of directory
  39506. + modifications. To work around this, on each addition or removal of directory
  39507. + entry all file descriptors for directory inode are scanned and their
  39508. + readdir_pos are updated accordingly (adjust_dir_pos()).
  39509. +*/
  39510. +int reiser4_iterate_common(struct file *f /* directory file being read */,
  39511. + struct dir_context *context /* callback data passed to us by VFS */)
  39512. +{
  39513. + reiser4_context *ctx;
  39514. + int result;
  39515. + struct inode *inode;
  39516. + coord_t coord;
  39517. + lock_handle lh;
  39518. + tap_t tap;
  39519. + struct readdir_pos *pos;
  39520. +
  39521. + assert("nikita-1359", f != NULL);
  39522. + inode = file_inode(f);
  39523. + assert("nikita-1360", inode != NULL);
  39524. +
  39525. + if (!S_ISDIR(inode->i_mode))
  39526. + return RETERR(-ENOTDIR);
  39527. +
  39528. + ctx = reiser4_init_context(inode->i_sb);
  39529. + if (IS_ERR(ctx))
  39530. + return PTR_ERR(ctx);
  39531. +
  39532. + coord_init_zero(&coord);
  39533. + init_lh(&lh);
  39534. + reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
  39535. +
  39536. + reiser4_readdir_readahead_init(inode, &tap);
  39537. +
  39538. +repeat:
  39539. + result = dir_readdir_init(f, &context->pos, &tap, &pos);
  39540. + if (result == 0) {
  39541. + result = reiser4_tap_load(&tap);
  39542. + /* scan entries one by one feeding them to @filld */
  39543. + while (result == 0) {
  39544. + coord_t *coord;
  39545. +
  39546. + coord = tap.coord;
  39547. + assert("nikita-2572", coord_is_existing_unit(coord));
  39548. + assert("nikita-3227", is_valid_dir_coord(inode, coord));
  39549. +
  39550. + result = feed_entry(&tap, context);
  39551. + if (result > 0) {
  39552. + break;
  39553. + } else if (result == 0) {
  39554. + ++context->pos;
  39555. + result = go_next_unit(&tap);
  39556. + if (result == -E_NO_NEIGHBOR ||
  39557. + result == -ENOENT) {
  39558. + result = 0;
  39559. + break;
  39560. + } else if (result == 0) {
  39561. + if (is_valid_dir_coord(inode, coord))
  39562. + move_entry(pos, coord);
  39563. + else
  39564. + break;
  39565. + }
  39566. + } else if (result == -E_REPEAT) {
  39567. + /* feed_entry() had to restart. */
  39568. + ++context->pos;
  39569. + reiser4_tap_relse(&tap);
  39570. + goto repeat;
  39571. + } else
  39572. + warning("vs-1617",
  39573. + "reiser4_readdir_common: unexpected error %d",
  39574. + result);
  39575. + }
  39576. + reiser4_tap_relse(&tap);
  39577. +
  39578. + if (result >= 0)
  39579. + f->f_version = inode_query_iversion(inode);
  39580. + } else if (result == -E_NO_NEIGHBOR || result == -ENOENT)
  39581. + result = 0;
  39582. + reiser4_tap_done(&tap);
  39583. + reiser4_detach_fsdata(f);
  39584. +
  39585. + /* try to update directory's atime */
  39586. + if (reiser4_grab_space_force(inode_file_plugin(inode)->estimate.update(inode),
  39587. + BA_CAN_COMMIT) != 0)
  39588. + warning("", "failed to update atime on readdir: %llu",
  39589. + get_inode_oid(inode));
  39590. + else
  39591. + file_accessed(f);
  39592. +
  39593. + context_set_commit_async(ctx);
  39594. + reiser4_exit_context(ctx);
  39595. +
  39596. + return (result <= 0) ? result : 0;
  39597. +}
  39598. +
  39599. +/*
  39600. + * Local variables:
  39601. + * c-indentation-style: "K&R"
  39602. + * mode-name: "LC"
  39603. + * c-basic-offset: 8
  39604. + * tab-width: 8
  39605. + * fill-column: 79
  39606. + * End:
  39607. + */
  39608. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/file_plugin_common.c linux-5.16.14/fs/reiser4/plugin/file_plugin_common.c
  39609. --- linux-5.16.14.orig/fs/reiser4/plugin/file_plugin_common.c 1970-01-01 01:00:00.000000000 +0100
  39610. +++ linux-5.16.14/fs/reiser4/plugin/file_plugin_common.c 2022-03-12 13:26:19.673892780 +0100
  39611. @@ -0,0 +1,992 @@
  39612. +/* Copyright 2005 by Hans Reiser, licensing governed by
  39613. + reiser4/README */
  39614. +
  39615. +/* this file contains typical implementations for most of methods of
  39616. + file plugin
  39617. +*/
  39618. +
  39619. +#include "../inode.h"
  39620. +#include "object.h"
  39621. +#include "../safe_link.h"
  39622. +
  39623. +static int insert_new_sd(struct inode *inode);
  39624. +static int update_sd(struct inode *inode);
  39625. +
  39626. +/* this is common implementation of write_sd_by_inode method of file plugin
  39627. + either insert stat data or update it
  39628. + */
  39629. +int write_sd_by_inode_common(struct inode *inode/* object to save */)
  39630. +{
  39631. + int result;
  39632. +
  39633. + assert("nikita-730", inode != NULL);
  39634. +
  39635. + if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
  39636. + /* object doesn't have stat-data yet */
  39637. + result = insert_new_sd(inode);
  39638. + else
  39639. + result = update_sd(inode);
  39640. + if (result != 0 && result != -ENAMETOOLONG && result != -ENOMEM)
  39641. + /* Don't issue warnings about "name is too long" */
  39642. + warning("nikita-2221", "Failed to save sd for %llu: %i",
  39643. + (unsigned long long)get_inode_oid(inode), result);
  39644. + return result;
  39645. +}
  39646. +
  39647. +/* this is common implementation of key_by_inode method of file plugin
  39648. + */
  39649. +int
  39650. +key_by_inode_and_offset_common(struct inode *inode, loff_t off,
  39651. + reiser4_key * key)
  39652. +{
  39653. + reiser4_key_init(key);
  39654. + set_key_locality(key, reiser4_inode_data(inode)->locality_id);
  39655. + set_key_ordering(key, get_inode_ordering(inode));
  39656. + set_key_objectid(key, get_inode_oid(inode)); /*FIXME: inode->i_ino */
  39657. + set_key_type(key, KEY_BODY_MINOR);
  39658. + set_key_offset(key, (__u64) off);
  39659. + return 0;
  39660. +}
  39661. +
  39662. +/* this is common implementation of set_plug_in_inode method of file plugin
  39663. + */
  39664. +int set_plug_in_inode_common(struct inode *object /* inode to set plugin on */ ,
  39665. + struct inode *parent /* parent object */ ,
  39666. + reiser4_object_create_data * data /* creational
  39667. + * data */ )
  39668. +{
  39669. + __u64 mask;
  39670. +
  39671. + object->i_mode = data->mode;
  39672. + /* this should be plugin decision */
  39673. + object->i_uid = current_fsuid();
  39674. + object->i_mtime = object->i_atime = object->i_ctime = current_time(object);
  39675. +
  39676. + /* support for BSD style group-id assignment. See mount's manual page
  39677. + description of bsdgroups ext2 mount options for more details */
  39678. + if (reiser4_is_set(object->i_sb, REISER4_BSD_GID))
  39679. + object->i_gid = parent->i_gid;
  39680. + else if (parent->i_mode & S_ISGID) {
  39681. + /* parent directory has sguid bit */
  39682. + object->i_gid = parent->i_gid;
  39683. + if (S_ISDIR(object->i_mode))
  39684. + /* sguid is inherited by sub-directories */
  39685. + object->i_mode |= S_ISGID;
  39686. + } else
  39687. + object->i_gid = current_fsgid();
  39688. +
  39689. + /* this object doesn't have stat-data yet */
  39690. + reiser4_inode_set_flag(object, REISER4_NO_SD);
  39691. +#if 0
  39692. + /* this is now called after all inode plugins are initialized:
  39693. + do_create_vfs_child after adjust_to_parent */
  39694. + /* setup inode and file-operations for this inode */
  39695. + setup_inode_ops(object, data);
  39696. +#endif
  39697. + reiser4_seal_init(&reiser4_inode_data(object)->sd_seal, NULL, NULL);
  39698. + mask = (1 << UNIX_STAT) | (1 << LIGHT_WEIGHT_STAT);
  39699. + if (!reiser4_is_set(object->i_sb, REISER4_32_BIT_TIMES))
  39700. + mask |= (1 << LARGE_TIMES_STAT);
  39701. +
  39702. + reiser4_inode_data(object)->extmask = mask;
  39703. + return 0;
  39704. +}
  39705. +
  39706. +/* this is common implementation of adjust_to_parent method of file plugin for
  39707. + regular files
  39708. + */
  39709. +int adjust_to_parent_common(struct inode *object /* new object */ ,
  39710. + struct inode *parent /* parent directory */ ,
  39711. + struct inode *root/* root directory */)
  39712. +{
  39713. + assert("nikita-2165", object != NULL);
  39714. + if (parent == NULL)
  39715. + parent = root;
  39716. + assert("nikita-2069", parent != NULL);
  39717. +
  39718. + /*
  39719. + * inherit missing plugins from parent
  39720. + */
  39721. +
  39722. + grab_plugin_pset(object, parent, PSET_FILE);
  39723. + grab_plugin_pset(object, parent, PSET_SD);
  39724. + grab_plugin_pset(object, parent, PSET_FORMATTING);
  39725. + grab_plugin_pset(object, parent, PSET_PERM);
  39726. + return 0;
  39727. +}
  39728. +
  39729. +/* this is common implementation of adjust_to_parent method of file plugin for
  39730. + typical directories
  39731. + */
  39732. +int adjust_to_parent_common_dir(struct inode *object /* new object */ ,
  39733. + struct inode *parent /* parent directory */ ,
  39734. + struct inode *root/* root directory */)
  39735. +{
  39736. + int result = 0;
  39737. + pset_member memb;
  39738. +
  39739. + assert("nikita-2166", object != NULL);
  39740. + if (parent == NULL)
  39741. + parent = root;
  39742. + assert("nikita-2167", parent != NULL);
  39743. +
  39744. + /*
  39745. + * inherit missing plugins from parent
  39746. + */
  39747. + for (memb = 0; memb < PSET_LAST; ++memb) {
  39748. + result = grab_plugin_pset(object, parent, memb);
  39749. + if (result != 0)
  39750. + break;
  39751. + }
  39752. + return result;
  39753. +}
  39754. +
  39755. +int adjust_to_parent_cryptcompress(struct inode *object /* new object */ ,
  39756. + struct inode *parent /* parent directory */,
  39757. + struct inode *root/* root directory */)
  39758. +{
  39759. + int result;
  39760. + result = adjust_to_parent_common(object, parent, root);
  39761. + if (result)
  39762. + return result;
  39763. + assert("edward-1416", parent != NULL);
  39764. +
  39765. + grab_plugin_pset(object, parent, PSET_CLUSTER);
  39766. + grab_plugin_pset(object, parent, PSET_CIPHER);
  39767. + grab_plugin_pset(object, parent, PSET_DIGEST);
  39768. + grab_plugin_pset(object, parent, PSET_COMPRESSION);
  39769. + grab_plugin_pset(object, parent, PSET_COMPRESSION_MODE);
  39770. +
  39771. + return 0;
  39772. +}
  39773. +
  39774. +/* this is common implementation of create_object method of file plugin
  39775. + */
  39776. +int reiser4_create_object_common(struct inode *object, struct inode *parent,
  39777. + reiser4_object_create_data * data)
  39778. +{
  39779. + reiser4_block_nr reserve;
  39780. + assert("nikita-744", object != NULL);
  39781. + assert("nikita-745", parent != NULL);
  39782. + assert("nikita-747", data != NULL);
  39783. + assert("nikita-748", reiser4_inode_get_flag(object, REISER4_NO_SD));
  39784. +
  39785. + reserve = estimate_create_common(object);
  39786. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
  39787. + return RETERR(-ENOSPC);
  39788. + return write_sd_by_inode_common(object);
  39789. +}
  39790. +
  39791. +static int common_object_delete_no_reserve(struct inode *inode);
  39792. +
  39793. +/**
  39794. + * reiser4_delete_object_common - delete_object of file_plugin
  39795. + * @inode: inode to be deleted
  39796. + *
  39797. + * This is common implementation of delete_object method of file_plugin. It
  39798. + * applies to object its deletion consists of removing two items - stat data
  39799. + * and safe-link.
  39800. + */
  39801. +int reiser4_delete_object_common(struct inode *inode)
  39802. +{
  39803. + int result;
  39804. +
  39805. + assert("nikita-1477", inode != NULL);
  39806. + /* FIXME: if file body deletion failed (i/o error, for instance),
  39807. + inode->i_size can be != 0 here */
  39808. + assert("nikita-3420", inode->i_size == 0 || S_ISLNK(inode->i_mode));
  39809. + assert("nikita-3421", inode->i_nlink == 0);
  39810. +
  39811. + if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) {
  39812. + reiser4_block_nr reserve;
  39813. +
  39814. + /* grab space which is needed to remove 2 items from the tree:
  39815. + stat data and safe-link */
  39816. + reserve = 2 *
  39817. + estimate_one_item_removal(reiser4_tree_by_inode(inode));
  39818. + if (reiser4_grab_space_force(reserve,
  39819. + BA_RESERVED | BA_CAN_COMMIT))
  39820. + return RETERR(-ENOSPC);
  39821. + result = common_object_delete_no_reserve(inode);
  39822. + } else
  39823. + result = 0;
  39824. + return result;
  39825. +}
  39826. +
  39827. +/**
  39828. + * reiser4_delete_dir_common - delete_object of file_plugin
  39829. + * @inode: inode to be deleted
  39830. + *
  39831. + * This is common implementation of delete_object method of file_plugin for
  39832. + * typical directory. It calls done method of dir_plugin to remove "." and
  39833. + * removes stat data and safe-link.
  39834. + */
  39835. +int reiser4_delete_dir_common(struct inode *inode)
  39836. +{
  39837. + int result;
  39838. + dir_plugin *dplug;
  39839. +
  39840. + assert("", (get_current_context() &&
  39841. + get_current_context()->trans->atom == NULL));
  39842. +
  39843. + dplug = inode_dir_plugin(inode);
  39844. + assert("vs-1101", dplug && dplug->done);
  39845. +
  39846. + /* kill cursors which might be attached to inode */
  39847. + reiser4_kill_cursors(inode);
  39848. +
  39849. + /* grab space enough for removing two items */
  39850. + if (reiser4_grab_space
  39851. + (2 * estimate_one_item_removal(reiser4_tree_by_inode(inode)),
  39852. + BA_RESERVED | BA_CAN_COMMIT))
  39853. + return RETERR(-ENOSPC);
  39854. +
  39855. + result = dplug->done(inode);
  39856. + if (!result)
  39857. + result = common_object_delete_no_reserve(inode);
  39858. + return result;
  39859. +}
  39860. +
  39861. +/* this is common implementation of add_link method of file plugin
  39862. + */
  39863. +int reiser4_add_link_common(struct inode *object, struct inode *parent)
  39864. +{
  39865. + /*
  39866. + * increment ->i_nlink and update ->i_ctime
  39867. + */
  39868. +
  39869. + INODE_INC_NLINK(object);
  39870. + object->i_ctime = current_time(object);
  39871. + return 0;
  39872. +}
  39873. +
  39874. +/* this is common implementation of rem_link method of file plugin
  39875. + */
  39876. +int reiser4_rem_link_common(struct inode *object, struct inode *parent)
  39877. +{
  39878. + assert("nikita-2021", object != NULL);
  39879. + assert("nikita-2163", object->i_nlink > 0);
  39880. +
  39881. + /*
  39882. + * decrement ->i_nlink and update ->i_ctime
  39883. + */
  39884. +
  39885. + INODE_DROP_NLINK(object);
  39886. + object->i_ctime = current_time(object);
  39887. + return 0;
  39888. +}
  39889. +
  39890. +/* this is common implementation of rem_link method of file plugin for typical
  39891. + directory
  39892. +*/
  39893. +int rem_link_common_dir(struct inode *object, struct inode *parent UNUSED_ARG)
  39894. +{
  39895. + assert("nikita-20211", object != NULL);
  39896. + assert("nikita-21631", object->i_nlink > 0);
  39897. +
  39898. + /*
  39899. + * decrement ->i_nlink and update ->i_ctime
  39900. + */
  39901. + if(object->i_nlink == 2)
  39902. + INODE_SET_NLINK(object, 0);
  39903. +
  39904. + else
  39905. + INODE_DROP_NLINK(object);
  39906. + object->i_ctime = current_time(object);
  39907. + return 0;
  39908. +}
  39909. +
  39910. +/* this is common implementation of owns_item method of file plugin
  39911. + compare objectids of keys in inode and coord */
  39912. +int owns_item_common(const struct inode *inode, /* object to check
  39913. + * against */
  39914. + const coord_t *coord/* coord to check */)
  39915. +{
  39916. + reiser4_key item_key;
  39917. + reiser4_key file_key;
  39918. +
  39919. + assert("nikita-760", inode != NULL);
  39920. + assert("nikita-761", coord != NULL);
  39921. +
  39922. + return coord_is_existing_item(coord) &&
  39923. + (get_key_objectid(build_sd_key(inode, &file_key)) ==
  39924. + get_key_objectid(item_key_by_coord(coord, &item_key)));
  39925. +}
  39926. +
  39927. +/* this is common implementation of owns_item method of file plugin
  39928. + for typical directory
  39929. +*/
  39930. +int owns_item_common_dir(const struct inode *inode,/* object to check against */
  39931. + const coord_t *coord/* coord of item to check */)
  39932. +{
  39933. + reiser4_key item_key;
  39934. +
  39935. + assert("nikita-1335", inode != NULL);
  39936. + assert("nikita-1334", coord != NULL);
  39937. +
  39938. + if (plugin_of_group(item_plugin_by_coord(coord), DIR_ENTRY_ITEM_TYPE))
  39939. + return get_key_locality(item_key_by_coord(coord, &item_key)) ==
  39940. + get_inode_oid(inode);
  39941. + else
  39942. + return owns_item_common(inode, coord);
  39943. +}
  39944. +
  39945. +/* this is common implementation of can_add_link method of file plugin
  39946. + checks whether yet another hard links to this object can be added
  39947. +*/
  39948. +int can_add_link_common(const struct inode *object/* object to check */)
  39949. +{
  39950. + assert("nikita-732", object != NULL);
  39951. +
  39952. + /* inode->i_nlink is unsigned int, so just check for integer
  39953. + overflow */
  39954. + return object->i_nlink + 1 != 0;
  39955. +}
  39956. +
  39957. +/* this is common implementation of can_rem_link method of file plugin for
  39958. + typical directory
  39959. +*/
  39960. +int can_rem_link_common_dir(const struct inode *inode)
  39961. +{
  39962. + /* is_dir_empty() returns 0 is dir is empty */
  39963. + return !is_dir_empty(inode);
  39964. +}
  39965. +
  39966. +/* this is common implementation of detach method of file plugin for typical
  39967. + directory
  39968. +*/
  39969. +int reiser4_detach_common_dir(struct inode *child, struct inode *parent)
  39970. +{
  39971. + dir_plugin *dplug;
  39972. +
  39973. + dplug = inode_dir_plugin(child);
  39974. + assert("nikita-2883", dplug != NULL);
  39975. + assert("nikita-2884", dplug->detach != NULL);
  39976. + return dplug->detach(child, parent);
  39977. +}
  39978. +
  39979. +static int process_truncate(struct inode *, __u64 size);
  39980. +
  39981. +/* this is common implementation of safelink method of file plugin
  39982. + */
  39983. +int safelink_common(struct inode *object, reiser4_safe_link_t link, __u64 value)
  39984. +{
  39985. + int result;
  39986. +
  39987. + assert("vs-1705", get_current_context()->trans->atom == NULL);
  39988. + if (link == SAFE_UNLINK)
  39989. + /* nothing to do. iput() in the caller (process_safelink) will
  39990. + * finish with file */
  39991. + result = 0;
  39992. + else if (link == SAFE_TRUNCATE)
  39993. + result = process_truncate(object, value);
  39994. + else {
  39995. + warning("nikita-3438", "Unrecognized safe-link type: %i", link);
  39996. + result = RETERR(-EIO);
  39997. + }
  39998. + return result;
  39999. +}
  40000. +
  40001. +/* this is common implementation of estimate.create method of file plugin
  40002. + can be used when object creation involves insertion of one item (usually stat
  40003. + data) into tree
  40004. +*/
  40005. +reiser4_block_nr estimate_create_common(const struct inode *object)
  40006. +{
  40007. + return estimate_one_insert_item(reiser4_tree_by_inode(object));
  40008. +}
  40009. +
  40010. +/* this is common implementation of estimate.create method of file plugin for
  40011. + typical directory
  40012. + can be used when directory creation involves insertion of two items (usually
  40013. + stat data and item containing "." and "..") into tree
  40014. +*/
  40015. +reiser4_block_nr estimate_create_common_dir(const struct inode *object)
  40016. +{
  40017. + return 2 * estimate_one_insert_item(reiser4_tree_by_inode(object));
  40018. +}
  40019. +
  40020. +/* this is common implementation of estimate.update method of file plugin
  40021. + can be used when stat data update does not do more than inserting a unit
  40022. + into a stat data item which is probably true for most cases
  40023. +*/
  40024. +reiser4_block_nr estimate_update_common(const struct inode *inode)
  40025. +{
  40026. + return estimate_one_insert_into_item(reiser4_tree_by_inode(inode));
  40027. +}
  40028. +
  40029. +/* this is common implementation of estimate.unlink method of file plugin
  40030. + */
  40031. +reiser4_block_nr
  40032. +estimate_unlink_common(const struct inode *object UNUSED_ARG,
  40033. + const struct inode *parent UNUSED_ARG)
  40034. +{
  40035. + return 0;
  40036. +}
  40037. +
  40038. +/* this is common implementation of estimate.unlink method of file plugin for
  40039. + typical directory
  40040. +*/
  40041. +reiser4_block_nr
  40042. +estimate_unlink_common_dir(const struct inode *object,
  40043. + const struct inode *parent)
  40044. +{
  40045. + dir_plugin *dplug;
  40046. +
  40047. + dplug = inode_dir_plugin(object);
  40048. + assert("nikita-2888", dplug != NULL);
  40049. + assert("nikita-2887", dplug->estimate.unlink != NULL);
  40050. + return dplug->estimate.unlink(object, parent);
  40051. +}
  40052. +
  40053. +char *wire_write_common(struct inode *inode, char *start)
  40054. +{
  40055. + return build_inode_onwire(inode, start);
  40056. +}
  40057. +
  40058. +char *wire_read_common(char *addr, reiser4_object_on_wire * obj)
  40059. +{
  40060. + if (!obj)
  40061. + return locate_obj_key_id_onwire(addr);
  40062. + return extract_obj_key_id_from_onwire(addr, &obj->u.std.key_id);
  40063. +}
  40064. +
  40065. +struct dentry *wire_get_common(struct super_block *sb,
  40066. + reiser4_object_on_wire * obj)
  40067. +{
  40068. + struct inode *inode;
  40069. + struct dentry *dentry;
  40070. + reiser4_key key;
  40071. +
  40072. + extract_key_from_id(&obj->u.std.key_id, &key);
  40073. + inode = reiser4_iget(sb, &key, 1);
  40074. + if (!IS_ERR(inode)) {
  40075. + reiser4_iget_complete(inode);
  40076. + dentry = d_obtain_alias(inode);
  40077. + if (!IS_ERR(dentry))
  40078. + dentry->d_op = &get_super_private(sb)->ops.dentry;
  40079. + } else if (PTR_ERR(inode) == -ENOENT)
  40080. + /*
  40081. + * inode wasn't found at the key encoded in the file
  40082. + * handle. Hence, file handle is stale.
  40083. + */
  40084. + dentry = ERR_PTR(RETERR(-ESTALE));
  40085. + else
  40086. + dentry = (void *)inode;
  40087. + return dentry;
  40088. +}
  40089. +
  40090. +int wire_size_common(struct inode *inode)
  40091. +{
  40092. + return inode_onwire_size(inode);
  40093. +}
  40094. +
  40095. +void wire_done_common(reiser4_object_on_wire * obj)
  40096. +{
  40097. + /* nothing to do */
  40098. +}
  40099. +
  40100. +/* helper function to print errors */
  40101. +static void key_warning(const reiser4_key * key /* key to print */ ,
  40102. + const struct inode *inode,
  40103. + int code/* error code to print */)
  40104. +{
  40105. + assert("nikita-716", key != NULL);
  40106. +
  40107. + if (code != -ENOMEM) {
  40108. + warning("nikita-717", "Error for inode %llu (%i)",
  40109. + (unsigned long long)get_key_objectid(key), code);
  40110. + reiser4_print_key("for key", key);
  40111. + }
  40112. +}
  40113. +
  40114. +/* NIKITA-FIXME-HANS: perhaps this function belongs in another file? */
  40115. +#if REISER4_DEBUG
  40116. +static void
  40117. +check_inode_seal(const struct inode *inode,
  40118. + const coord_t *coord, const reiser4_key * key)
  40119. +{
  40120. + reiser4_key unit_key;
  40121. +
  40122. + unit_key_by_coord(coord, &unit_key);
  40123. + assert("nikita-2752",
  40124. + WITH_DATA_RET(coord->node, 1, keyeq(key, &unit_key)));
  40125. + assert("nikita-2753", get_inode_oid(inode) == get_key_objectid(key));
  40126. +}
  40127. +
  40128. +static void check_sd_coord(coord_t *coord, const reiser4_key * key)
  40129. +{
  40130. + reiser4_key ukey;
  40131. +
  40132. + coord_clear_iplug(coord);
  40133. + if (zload(coord->node))
  40134. + return;
  40135. +
  40136. + if (!coord_is_existing_unit(coord) ||
  40137. + !item_plugin_by_coord(coord) ||
  40138. + !keyeq(unit_key_by_coord(coord, &ukey), key) ||
  40139. + (znode_get_level(coord->node) != LEAF_LEVEL) ||
  40140. + !item_is_statdata(coord)) {
  40141. + warning("nikita-1901", "Conspicuous seal");
  40142. + reiser4_print_key("key", key);
  40143. + print_coord("coord", coord, 1);
  40144. + impossible("nikita-2877", "no way");
  40145. + }
  40146. + zrelse(coord->node);
  40147. +}
  40148. +
  40149. +#else
  40150. +#define check_inode_seal(inode, coord, key) noop
  40151. +#define check_sd_coord(coord, key) noop
  40152. +#endif
  40153. +
  40154. +/* insert new stat-data into tree. Called with inode state
  40155. + locked. Return inode state locked. */
  40156. +static int insert_new_sd(struct inode *inode/* inode to create sd for */)
  40157. +{
  40158. + int result;
  40159. + reiser4_key key;
  40160. + coord_t coord;
  40161. + reiser4_item_data data;
  40162. + char *area;
  40163. + reiser4_inode *ref;
  40164. + lock_handle lh;
  40165. + oid_t oid;
  40166. +
  40167. + assert("nikita-723", inode != NULL);
  40168. + assert("nikita-3406", reiser4_inode_get_flag(inode, REISER4_NO_SD));
  40169. +
  40170. + ref = reiser4_inode_data(inode);
  40171. + spin_lock_inode(inode);
  40172. +
  40173. + if (ref->plugin_mask != 0)
  40174. + /* inode has non-standard plugins */
  40175. + inode_set_extension(inode, PLUGIN_STAT);
  40176. + /*
  40177. + * prepare specification of new item to be inserted
  40178. + */
  40179. +
  40180. + data.iplug = inode_sd_plugin(inode);
  40181. + data.length = data.iplug->s.sd.save_len(inode);
  40182. + spin_unlock_inode(inode);
  40183. +
  40184. + data.data = NULL;
  40185. + data.user = 0;
  40186. +/* could be optimized for case where there is only one node format in
  40187. + * use in the filesystem, probably there are lots of such
  40188. + * places we could optimize for only one node layout.... -Hans */
  40189. + if (data.length > reiser4_tree_by_inode(inode)->nplug->max_item_size()) {
  40190. + /* This is silly check, but we don't know actual node where
  40191. + insertion will go into. */
  40192. + return RETERR(-ENAMETOOLONG);
  40193. + }
  40194. + oid = oid_allocate(inode->i_sb);
  40195. +/* NIKITA-FIXME-HANS: what is your opinion on whether this error check should be
  40196. + * encapsulated into oid_allocate? */
  40197. + if (oid == ABSOLUTE_MAX_OID)
  40198. + return RETERR(-EOVERFLOW);
  40199. +
  40200. + set_inode_oid(inode, oid);
  40201. +
  40202. + coord_init_zero(&coord);
  40203. + init_lh(&lh);
  40204. +
  40205. + result = insert_by_key(reiser4_tree_by_inode(inode),
  40206. + build_sd_key(inode, &key), &data, &coord, &lh,
  40207. + /* stat data lives on a leaf level */
  40208. + LEAF_LEVEL, CBK_UNIQUE);
  40209. +
  40210. + /* we don't want to re-check that somebody didn't insert
  40211. + stat-data while we were doing io, because if it did,
  40212. + insert_by_key() returned error. */
  40213. + /* but what _is_ possible is that plugin for inode's stat-data,
  40214. + list of non-standard plugins or their state would change
  40215. + during io, so that stat-data wouldn't fit into sd. To avoid
  40216. + this race we keep inode_state lock. This lock has to be
  40217. + taken each time you access inode in a way that would cause
  40218. + changes in sd size: changing plugins etc.
  40219. + */
  40220. +
  40221. + if (result == IBK_INSERT_OK) {
  40222. + coord_clear_iplug(&coord);
  40223. + result = zload(coord.node);
  40224. + if (result == 0) {
  40225. + /* have we really inserted stat data? */
  40226. + assert("nikita-725", item_is_statdata(&coord));
  40227. +
  40228. + /* inode was just created. It is inserted into hash
  40229. + table, but no directory entry was yet inserted into
  40230. + parent. So, inode is inaccessible through
  40231. + ->lookup(). All places that directly grab inode
  40232. + from hash-table (like old knfsd), should check
  40233. + IMMUTABLE flag that is set by common_create_child.
  40234. + */
  40235. + assert("nikita-3240", data.iplug != NULL);
  40236. + assert("nikita-3241", data.iplug->s.sd.save != NULL);
  40237. + area = item_body_by_coord(&coord);
  40238. + result = data.iplug->s.sd.save(inode, &area);
  40239. + znode_make_dirty(coord.node);
  40240. + if (result == 0) {
  40241. + /* object has stat-data now */
  40242. + reiser4_inode_clr_flag(inode, REISER4_NO_SD);
  40243. + reiser4_inode_set_flag(inode,
  40244. + REISER4_SDLEN_KNOWN);
  40245. + /* initialise stat-data seal */
  40246. + reiser4_seal_init(&ref->sd_seal, &coord, &key);
  40247. + ref->sd_coord = coord;
  40248. + check_inode_seal(inode, &coord, &key);
  40249. + } else if (result != -ENOMEM)
  40250. + /*
  40251. + * convert any other error code to -EIO to
  40252. + * avoid confusing user level with unexpected
  40253. + * errors.
  40254. + */
  40255. + result = RETERR(-EIO);
  40256. + zrelse(coord.node);
  40257. + }
  40258. + }
  40259. + done_lh(&lh);
  40260. +
  40261. + if (result != 0)
  40262. + key_warning(&key, inode, result);
  40263. + else
  40264. + oid_count_allocated();
  40265. +
  40266. + return result;
  40267. +}
  40268. +
  40269. +/* find sd of inode in a tree, deal with errors */
  40270. +int lookup_sd(struct inode *inode /* inode to look sd for */ ,
  40271. + znode_lock_mode lock_mode /* lock mode */ ,
  40272. + coord_t *coord /* resulting coord */ ,
  40273. + lock_handle * lh /* resulting lock handle */ ,
  40274. + const reiser4_key * key /* resulting key */ ,
  40275. + int silent)
  40276. +{
  40277. + int result;
  40278. + __u32 flags;
  40279. +
  40280. + assert("nikita-1692", inode != NULL);
  40281. + assert("nikita-1693", coord != NULL);
  40282. + assert("nikita-1694", key != NULL);
  40283. +
  40284. + /* look for the object's stat data in a tree.
  40285. + This returns in "node" pointer to a locked znode and in "pos"
  40286. + position of an item found in node. Both are only valid if
  40287. + coord_found is returned. */
  40288. + flags = (lock_mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0;
  40289. + flags |= CBK_UNIQUE;
  40290. + /*
  40291. + * traverse tree to find stat data. We cannot use vroot here, because
  40292. + * it only covers _body_ of the file, and stat data don't belong
  40293. + * there.
  40294. + */
  40295. + result = coord_by_key(reiser4_tree_by_inode(inode),
  40296. + key,
  40297. + coord,
  40298. + lh,
  40299. + lock_mode,
  40300. + FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL, flags, NULL);
  40301. + if (REISER4_DEBUG && result == 0)
  40302. + check_sd_coord(coord, key);
  40303. +
  40304. + if (result != 0 && !silent)
  40305. + key_warning(key, inode, result);
  40306. + return result;
  40307. +}
  40308. +
  40309. +static int
  40310. +locate_inode_sd(struct inode *inode,
  40311. + reiser4_key * key, coord_t *coord, lock_handle * lh)
  40312. +{
  40313. + reiser4_inode *state;
  40314. + seal_t seal;
  40315. + int result;
  40316. +
  40317. + assert("nikita-3483", inode != NULL);
  40318. +
  40319. + state = reiser4_inode_data(inode);
  40320. + spin_lock_inode(inode);
  40321. + *coord = state->sd_coord;
  40322. + coord_clear_iplug(coord);
  40323. + seal = state->sd_seal;
  40324. + spin_unlock_inode(inode);
  40325. +
  40326. + build_sd_key(inode, key);
  40327. + /* first, try to use seal */
  40328. + if (reiser4_seal_is_set(&seal)) {
  40329. + result = reiser4_seal_validate(&seal,
  40330. + coord,
  40331. + key,
  40332. + lh, ZNODE_WRITE_LOCK,
  40333. + ZNODE_LOCK_LOPRI);
  40334. + if (result == 0) {
  40335. + check_sd_coord(coord, key);
  40336. + return 0;
  40337. + }
  40338. + }
  40339. + /* hint is invalid,
  40340. + * so traverse tree
  40341. + */
  40342. + coord_init_zero(coord);
  40343. + return lookup_sd(inode, ZNODE_WRITE_LOCK, coord, lh, key, 0);
  40344. +}
  40345. +
  40346. +#if REISER4_DEBUG
  40347. +static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
  40348. +{
  40349. + return (get_key_locality(k1) == get_key_locality(k2) &&
  40350. + get_key_type(k1) == get_key_type(k2) &&
  40351. + get_key_band(k1) == get_key_band(k2) &&
  40352. + get_key_ordering(k1) == get_key_ordering(k2) &&
  40353. + get_key_objectid(k1) == get_key_objectid(k2));
  40354. +}
  40355. +
  40356. +#include "../tree_walk.h"
  40357. +
  40358. +/* make some checks before and after stat-data resize operation */
  40359. +static int check_sd_resize(struct inode *inode, coord_t *coord,
  40360. + int length, int progress/* 1 means after resize */)
  40361. +{
  40362. + int ret = 0;
  40363. + lock_handle left_lock;
  40364. + coord_t left_coord;
  40365. + reiser4_key left_key;
  40366. + reiser4_key key;
  40367. +
  40368. + if (inode_file_plugin(inode) !=
  40369. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID))
  40370. + return 0;
  40371. + if (!length)
  40372. + return 0;
  40373. + if (coord->item_pos != 0)
  40374. + return 0;
  40375. +
  40376. + init_lh(&left_lock);
  40377. + ret = reiser4_get_left_neighbor(&left_lock,
  40378. + coord->node,
  40379. + ZNODE_WRITE_LOCK,
  40380. + GN_CAN_USE_UPPER_LEVELS);
  40381. + if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR ||
  40382. + ret == -ENOENT || ret == -EINVAL
  40383. + || ret == -E_DEADLOCK) {
  40384. + ret = 0;
  40385. + goto exit;
  40386. + }
  40387. + ret = zload(left_lock.node);
  40388. + if (ret)
  40389. + goto exit;
  40390. + coord_init_last_unit(&left_coord, left_lock.node);
  40391. + item_key_by_coord(&left_coord, &left_key);
  40392. + item_key_by_coord(coord, &key);
  40393. +
  40394. + if (all_but_offset_key_eq(&key, &left_key))
  40395. + /* corruption occured */
  40396. + ret = 1;
  40397. + zrelse(left_lock.node);
  40398. + exit:
  40399. + done_lh(&left_lock);
  40400. + return ret;
  40401. +}
  40402. +#endif
  40403. +
  40404. +/* update stat-data at @coord */
  40405. +static int
  40406. +update_sd_at(struct inode *inode, coord_t *coord, reiser4_key * key,
  40407. + lock_handle * lh)
  40408. +{
  40409. + int result;
  40410. + reiser4_item_data data;
  40411. + char *area;
  40412. + reiser4_inode *state;
  40413. + znode *loaded;
  40414. +
  40415. + state = reiser4_inode_data(inode);
  40416. +
  40417. + coord_clear_iplug(coord);
  40418. + result = zload(coord->node);
  40419. + if (result != 0)
  40420. + return result;
  40421. + loaded = coord->node;
  40422. +
  40423. + spin_lock_inode(inode);
  40424. + assert("nikita-728", inode_sd_plugin(inode) != NULL);
  40425. + data.iplug = inode_sd_plugin(inode);
  40426. +
  40427. + /* if inode has non-standard plugins, add appropriate stat data
  40428. + * extension */
  40429. + if (state->extmask & (1 << PLUGIN_STAT)) {
  40430. + if (state->plugin_mask == 0)
  40431. + inode_clr_extension(inode, PLUGIN_STAT);
  40432. + } else if (state->plugin_mask != 0)
  40433. + inode_set_extension(inode, PLUGIN_STAT);
  40434. +
  40435. + if (state->extmask & (1 << HEIR_STAT)) {
  40436. + if (state->heir_mask == 0)
  40437. + inode_clr_extension(inode, HEIR_STAT);
  40438. + } else if (state->heir_mask != 0)
  40439. + inode_set_extension(inode, HEIR_STAT);
  40440. +
  40441. + /* data.length is how much space to add to (or remove
  40442. + from if negative) sd */
  40443. + if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) {
  40444. + /* recalculate stat-data length */
  40445. + data.length =
  40446. + data.iplug->s.sd.save_len(inode) -
  40447. + item_length_by_coord(coord);
  40448. + reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
  40449. + } else
  40450. + data.length = 0;
  40451. + spin_unlock_inode(inode);
  40452. +
  40453. + /* if on-disk stat data is of different length than required
  40454. + for this inode, resize it */
  40455. +
  40456. + if (data.length != 0) {
  40457. + data.data = NULL;
  40458. + data.user = 0;
  40459. +
  40460. + assert("edward-1441",
  40461. + !check_sd_resize(inode, coord,
  40462. + data.length, 0/* before resize */));
  40463. +
  40464. + /* insertion code requires that insertion point (coord) was
  40465. + * between units. */
  40466. + coord->between = AFTER_UNIT;
  40467. + result = reiser4_resize_item(coord, &data, key, lh,
  40468. + COPI_DONT_SHIFT_LEFT);
  40469. + if (result != 0) {
  40470. + key_warning(key, inode, result);
  40471. + zrelse(loaded);
  40472. + return result;
  40473. + }
  40474. + if (loaded != coord->node) {
  40475. + /* reiser4_resize_item moved coord to another node.
  40476. + Zload it */
  40477. + zrelse(loaded);
  40478. + coord_clear_iplug(coord);
  40479. + result = zload(coord->node);
  40480. + if (result != 0)
  40481. + return result;
  40482. + loaded = coord->node;
  40483. + }
  40484. + assert("edward-1442",
  40485. + !check_sd_resize(inode, coord,
  40486. + data.length, 1/* after resize */));
  40487. + }
  40488. + area = item_body_by_coord(coord);
  40489. + spin_lock_inode(inode);
  40490. + result = data.iplug->s.sd.save(inode, &area);
  40491. + znode_make_dirty(coord->node);
  40492. +
  40493. + /* re-initialise stat-data seal */
  40494. +
  40495. + /*
  40496. + * coord.between was possibly skewed from AT_UNIT when stat-data size
  40497. + * was changed and new extensions were pasted into item.
  40498. + */
  40499. + coord->between = AT_UNIT;
  40500. + reiser4_seal_init(&state->sd_seal, coord, key);
  40501. + state->sd_coord = *coord;
  40502. + spin_unlock_inode(inode);
  40503. + check_inode_seal(inode, coord, key);
  40504. + zrelse(loaded);
  40505. + return result;
  40506. +}
  40507. +
  40508. +/* Update existing stat-data in a tree. Called with inode state locked. Return
  40509. + inode state locked. */
  40510. +static int update_sd(struct inode *inode/* inode to update sd for */)
  40511. +{
  40512. + int result;
  40513. + reiser4_key key;
  40514. + coord_t coord;
  40515. + lock_handle lh;
  40516. +
  40517. + assert("nikita-726", inode != NULL);
  40518. +
  40519. + /* no stat-data, nothing to update?! */
  40520. + assert("nikita-3482", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  40521. +
  40522. + init_lh(&lh);
  40523. +
  40524. + result = locate_inode_sd(inode, &key, &coord, &lh);
  40525. + if (result == 0)
  40526. + result = update_sd_at(inode, &coord, &key, &lh);
  40527. + done_lh(&lh);
  40528. +
  40529. + return result;
  40530. +}
  40531. +
  40532. +/* helper for reiser4_delete_object_common and reiser4_delete_dir_common.
  40533. + Remove object stat data. Space for that must be reserved by caller before
  40534. +*/
  40535. +static int
  40536. +common_object_delete_no_reserve(struct inode *inode/* object to remove */)
  40537. +{
  40538. + int result;
  40539. +
  40540. + assert("nikita-1477", inode != NULL);
  40541. +
  40542. + if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) {
  40543. + reiser4_key sd_key;
  40544. +
  40545. + build_sd_key(inode, &sd_key);
  40546. + result =
  40547. + reiser4_cut_tree(reiser4_tree_by_inode(inode),
  40548. + &sd_key, &sd_key, NULL, 0);
  40549. + if (result == 0) {
  40550. + reiser4_inode_set_flag(inode, REISER4_NO_SD);
  40551. + result = oid_release(inode->i_sb, get_inode_oid(inode));
  40552. + if (result == 0) {
  40553. + oid_count_released();
  40554. +
  40555. + result = safe_link_del(reiser4_tree_by_inode(inode),
  40556. + get_inode_oid(inode),
  40557. + SAFE_UNLINK);
  40558. + }
  40559. + }
  40560. + } else
  40561. + result = 0;
  40562. + return result;
  40563. +}
  40564. +
  40565. +/* helper for safelink_common */
  40566. +static int process_truncate(struct inode *inode, __u64 size)
  40567. +{
  40568. + int result;
  40569. + struct iattr attr;
  40570. + file_plugin *fplug;
  40571. + reiser4_context *ctx;
  40572. + struct dentry dentry;
  40573. +
  40574. + assert("vs-21", is_in_reiser4_context());
  40575. + ctx = reiser4_init_context(inode->i_sb);
  40576. + assert("vs-22", !IS_ERR(ctx));
  40577. +
  40578. + attr.ia_size = size;
  40579. + attr.ia_valid = ATTR_SIZE | ATTR_CTIME;
  40580. + fplug = inode_file_plugin(inode);
  40581. +
  40582. + inode_lock(inode);
  40583. + assert("vs-1704", get_current_context()->trans->atom == NULL);
  40584. + dentry.d_inode = inode;
  40585. + result = inode->i_op->setattr(&init_user_ns, &dentry, &attr);
  40586. + inode_unlock(inode);
  40587. +
  40588. + context_set_commit_async(ctx);
  40589. + reiser4_exit_context(ctx);
  40590. +
  40591. + return result;
  40592. +}
  40593. +
  40594. +/*
  40595. + Local variables:
  40596. + c-indentation-style: "K&R"
  40597. + mode-name: "LC"
  40598. + c-basic-offset: 8
  40599. + tab-width: 8
  40600. + fill-column: 80
  40601. + scroll-step: 1
  40602. + End:
  40603. +*/
  40604. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/hash.c linux-5.16.14/fs/reiser4/plugin/hash.c
  40605. --- linux-5.16.14.orig/fs/reiser4/plugin/hash.c 1970-01-01 01:00:00.000000000 +0100
  40606. +++ linux-5.16.14/fs/reiser4/plugin/hash.c 2022-03-12 13:26:19.674892782 +0100
  40607. @@ -0,0 +1,339 @@
  40608. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  40609. + * reiser4/README */
  40610. +
  40611. +/* Hash functions */
  40612. +
  40613. +#include "../debug.h"
  40614. +#include "plugin_header.h"
  40615. +#include "plugin.h"
  40616. +#include "../super.h"
  40617. +#include "../inode.h"
  40618. +
  40619. +#include <linux/types.h>
  40620. +
  40621. +/* old rupasov (yura) hash */
  40622. +static __u64 hash_rupasov(const unsigned char *name /* name to hash */ ,
  40623. + int len/* @name's length */)
  40624. +{
  40625. + int i;
  40626. + int j;
  40627. + int pow;
  40628. + __u64 a;
  40629. + __u64 c;
  40630. +
  40631. + assert("nikita-672", name != NULL);
  40632. + assert("nikita-673", len >= 0);
  40633. +
  40634. + for (pow = 1, i = 1; i < len; ++i)
  40635. + pow = pow * 10;
  40636. +
  40637. + if (len == 1)
  40638. + a = name[0] - 48;
  40639. + else
  40640. + a = (name[0] - 48) * pow;
  40641. +
  40642. + for (i = 1; i < len; ++i) {
  40643. + c = name[i] - 48;
  40644. + for (pow = 1, j = i; j < len - 1; ++j)
  40645. + pow = pow * 10;
  40646. + a = a + c * pow;
  40647. + }
  40648. + for (; i < 40; ++i) {
  40649. + c = '0' - 48;
  40650. + for (pow = 1, j = i; j < len - 1; ++j)
  40651. + pow = pow * 10;
  40652. + a = a + c * pow;
  40653. + }
  40654. +
  40655. + for (; i < 256; ++i) {
  40656. + c = i;
  40657. + for (pow = 1, j = i; j < len - 1; ++j)
  40658. + pow = pow * 10;
  40659. + a = a + c * pow;
  40660. + }
  40661. +
  40662. + a = a << 7;
  40663. + return a;
  40664. +}
  40665. +
  40666. +/* r5 hash */
  40667. +static __u64 hash_r5(const unsigned char *name /* name to hash */ ,
  40668. + int len UNUSED_ARG/* @name's length */)
  40669. +{
  40670. + __u64 a = 0;
  40671. +
  40672. + assert("nikita-674", name != NULL);
  40673. + assert("nikita-675", len >= 0);
  40674. +
  40675. + while (*name) {
  40676. + a += *name << 4;
  40677. + a += *name >> 4;
  40678. + a *= 11;
  40679. + name++;
  40680. + }
  40681. + return a;
  40682. +}
  40683. +
  40684. +/* Keyed 32-bit hash function using TEA in a Davis-Meyer function
  40685. + H0 = Key
  40686. + Hi = E Mi(Hi-1) + Hi-1
  40687. +
  40688. + (see Applied Cryptography, 2nd edition, p448).
  40689. +
  40690. + Jeremy Fitzhardinge <jeremy@zip.com.au> 1998
  40691. +
  40692. + Jeremy has agreed to the contents of reiserfs/README. -Hans
  40693. +
  40694. + This code was blindly upgraded to __u64 by s/__u32/__u64/g.
  40695. +*/
  40696. +static __u64 hash_tea(const unsigned char *name /* name to hash */ ,
  40697. + int len/* @name's length */)
  40698. +{
  40699. + __u64 k[] = { 0x9464a485u, 0x542e1a94u, 0x3e846bffu, 0xb75bcfc3u };
  40700. +
  40701. + __u64 h0 = k[0], h1 = k[1];
  40702. + __u64 a, b, c, d;
  40703. + __u64 pad;
  40704. + int i;
  40705. +
  40706. + assert("nikita-676", name != NULL);
  40707. + assert("nikita-677", len >= 0);
  40708. +
  40709. +#define DELTA 0x9E3779B9u
  40710. +#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
  40711. +#define PARTROUNDS 6 /* 6 gets complete mixing */
  40712. +
  40713. +/* a, b, c, d - data; h0, h1 - accumulated hash */
  40714. +#define TEACORE(rounds) \
  40715. + do { \
  40716. + __u64 sum = 0; \
  40717. + int n = rounds; \
  40718. + __u64 b0, b1; \
  40719. + \
  40720. + b0 = h0; \
  40721. + b1 = h1; \
  40722. + \
  40723. + do { \
  40724. + sum += DELTA; \
  40725. + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); \
  40726. + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); \
  40727. + } while (--n); \
  40728. + \
  40729. + h0 += b0; \
  40730. + h1 += b1; \
  40731. + } while (0)
  40732. +
  40733. + pad = (__u64) len | ((__u64) len << 8);
  40734. + pad |= pad << 16;
  40735. +
  40736. + while (len >= 16) {
  40737. + a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
  40738. + 16 | (__u64) name[3] << 24;
  40739. + b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
  40740. + 16 | (__u64) name[7] << 24;
  40741. + c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] <<
  40742. + 16 | (__u64) name[11] << 24;
  40743. + d = (__u64) name[12] | (__u64) name[13] << 8 | (__u64) name[14]
  40744. + << 16 | (__u64) name[15] << 24;
  40745. +
  40746. + TEACORE(PARTROUNDS);
  40747. +
  40748. + len -= 16;
  40749. + name += 16;
  40750. + }
  40751. +
  40752. + if (len >= 12) {
  40753. + a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
  40754. + 16 | (__u64) name[3] << 24;
  40755. + b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
  40756. + 16 | (__u64) name[7] << 24;
  40757. + c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] <<
  40758. + 16 | (__u64) name[11] << 24;
  40759. +
  40760. + d = pad;
  40761. + for (i = 12; i < len; i++) {
  40762. + d <<= 8;
  40763. + d |= name[i];
  40764. + }
  40765. + } else if (len >= 8) {
  40766. + a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
  40767. + 16 | (__u64) name[3] << 24;
  40768. + b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
  40769. + 16 | (__u64) name[7] << 24;
  40770. +
  40771. + c = d = pad;
  40772. + for (i = 8; i < len; i++) {
  40773. + c <<= 8;
  40774. + c |= name[i];
  40775. + }
  40776. + } else if (len >= 4) {
  40777. + a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
  40778. + 16 | (__u64) name[3] << 24;
  40779. +
  40780. + b = c = d = pad;
  40781. + for (i = 4; i < len; i++) {
  40782. + b <<= 8;
  40783. + b |= name[i];
  40784. + }
  40785. + } else {
  40786. + a = b = c = d = pad;
  40787. + for (i = 0; i < len; i++) {
  40788. + a <<= 8;
  40789. + a |= name[i];
  40790. + }
  40791. + }
  40792. +
  40793. + TEACORE(FULLROUNDS);
  40794. +
  40795. +/* return 0;*/
  40796. + return h0 ^ h1;
  40797. +
  40798. +}
  40799. +
  40800. +/* classical 64 bit Fowler/Noll/Vo-1 (FNV-1) hash.
  40801. +
  40802. + See http://www.isthe.com/chongo/tech/comp/fnv/ for details.
  40803. +
  40804. + Excerpts:
  40805. +
  40806. + FNV hashes are designed to be fast while maintaining a low collision
  40807. + rate.
  40808. +
  40809. + [This version also seems to preserve lexicographical order locally.]
  40810. +
  40811. + FNV hash algorithms and source code have been released into the public
  40812. + domain.
  40813. +
  40814. +*/
  40815. +static __u64 hash_fnv1(const unsigned char *name /* name to hash */ ,
  40816. + int len UNUSED_ARG/* @name's length */)
  40817. +{
  40818. + unsigned long long a = 0xcbf29ce484222325ull;
  40819. + const unsigned long long fnv_64_prime = 0x100000001b3ull;
  40820. +
  40821. + assert("nikita-678", name != NULL);
  40822. + assert("nikita-679", len >= 0);
  40823. +
  40824. + /* FNV-1 hash each octet in the buffer */
  40825. + for (; *name; ++name) {
  40826. + /* multiply by the 32 bit FNV magic prime mod 2^64 */
  40827. + a *= fnv_64_prime;
  40828. + /* xor the bottom with the current octet */
  40829. + a ^= (unsigned long long)(*name);
  40830. + }
  40831. + /* return our new hash value */
  40832. + return a;
  40833. +}
  40834. +
  40835. +/* degenerate hash function used to simplify testing of non-unique key
  40836. + handling */
  40837. +static __u64 hash_deg(const unsigned char *name UNUSED_ARG /* name to hash */ ,
  40838. + int len UNUSED_ARG/* @name's length */)
  40839. +{
  40840. + return 0xc0c0c0c010101010ull;
  40841. +}
  40842. +
  40843. +static int change_hash(struct inode *inode,
  40844. + reiser4_plugin * plugin,
  40845. + pset_member memb)
  40846. +{
  40847. + int result;
  40848. +
  40849. + assert("nikita-3503", inode != NULL);
  40850. + assert("nikita-3504", plugin != NULL);
  40851. +
  40852. + assert("nikita-3505", is_reiser4_inode(inode));
  40853. + assert("nikita-3507", plugin->h.type_id == REISER4_HASH_PLUGIN_TYPE);
  40854. +
  40855. + if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
  40856. + return RETERR(-EINVAL);
  40857. +
  40858. + result = 0;
  40859. + if (inode_hash_plugin(inode) == NULL ||
  40860. + inode_hash_plugin(inode)->h.id != plugin->h.id) {
  40861. + if (is_dir_empty(inode) == 0)
  40862. + result = aset_set_unsafe(&reiser4_inode_data(inode)->pset,
  40863. + PSET_HASH, plugin);
  40864. + else
  40865. + result = RETERR(-ENOTEMPTY);
  40866. +
  40867. + }
  40868. + return result;
  40869. +}
  40870. +
  40871. +static reiser4_plugin_ops hash_plugin_ops = {
  40872. + .init = NULL,
  40873. + .load = NULL,
  40874. + .save_len = NULL,
  40875. + .save = NULL,
  40876. + .change = change_hash
  40877. +};
  40878. +
  40879. +/* hash plugins */
  40880. +hash_plugin hash_plugins[LAST_HASH_ID] = {
  40881. + [RUPASOV_HASH_ID] = {
  40882. + .h = {
  40883. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  40884. + .id = RUPASOV_HASH_ID,
  40885. + .pops = &hash_plugin_ops,
  40886. + .label = "rupasov",
  40887. + .desc = "Original Yura's hash",
  40888. + .linkage = {NULL, NULL}
  40889. + },
  40890. + .hash = hash_rupasov
  40891. + },
  40892. + [R5_HASH_ID] = {
  40893. + .h = {
  40894. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  40895. + .id = R5_HASH_ID,
  40896. + .pops = &hash_plugin_ops,
  40897. + .label = "r5",
  40898. + .desc = "r5 hash",
  40899. + .linkage = {NULL, NULL}
  40900. + },
  40901. + .hash = hash_r5
  40902. + },
  40903. + [TEA_HASH_ID] = {
  40904. + .h = {
  40905. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  40906. + .id = TEA_HASH_ID,
  40907. + .pops = &hash_plugin_ops,
  40908. + .label = "tea",
  40909. + .desc = "tea hash",
  40910. + .linkage = {NULL, NULL}
  40911. + },
  40912. + .hash = hash_tea
  40913. + },
  40914. + [FNV1_HASH_ID] = {
  40915. + .h = {
  40916. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  40917. + .id = FNV1_HASH_ID,
  40918. + .pops = &hash_plugin_ops,
  40919. + .label = "fnv1",
  40920. + .desc = "fnv1 hash",
  40921. + .linkage = {NULL, NULL}
  40922. + },
  40923. + .hash = hash_fnv1
  40924. + },
  40925. + [DEGENERATE_HASH_ID] = {
  40926. + .h = {
  40927. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  40928. + .id = DEGENERATE_HASH_ID,
  40929. + .pops = &hash_plugin_ops,
  40930. + .label = "degenerate hash",
  40931. + .desc = "Degenerate hash: only for testing",
  40932. + .linkage = {NULL, NULL}
  40933. + },
  40934. + .hash = hash_deg
  40935. + }
  40936. +};
  40937. +
  40938. +/* Make Linus happy.
  40939. + Local variables:
  40940. + c-indentation-style: "K&R"
  40941. + mode-name: "LC"
  40942. + c-basic-offset: 8
  40943. + tab-width: 8
  40944. + fill-column: 120
  40945. + End:
  40946. +*/
  40947. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/inode_ops.c linux-5.16.14/fs/reiser4/plugin/inode_ops.c
  40948. --- linux-5.16.14.orig/fs/reiser4/plugin/inode_ops.c 1970-01-01 01:00:00.000000000 +0100
  40949. +++ linux-5.16.14/fs/reiser4/plugin/inode_ops.c 2022-03-12 13:26:19.674892782 +0100
  40950. @@ -0,0 +1,898 @@
  40951. +/*
  40952. + * Copyright 2005 by Hans Reiser, licensing governed by reiser4/README
  40953. + */
  40954. +
  40955. +/*
  40956. + * this file contains typical implementations for most of methods of struct
  40957. + * inode_operations
  40958. + */
  40959. +
  40960. +#include "../inode.h"
  40961. +#include "../safe_link.h"
  40962. +
  40963. +#include <linux/namei.h>
  40964. +
  40965. +static int create_vfs_object(struct inode *parent, struct dentry *dentry,
  40966. + reiser4_object_create_data *data);
  40967. +
  40968. +/**
  40969. + * reiser4_create_common - create of inode operations
  40970. + * @parent: inode of parent directory
  40971. + * @dentry: dentry of new object to create
  40972. + * @mode: the permissions to use
  40973. + * @exclusive:
  40974. + *
  40975. + * This is common implementation of vfs's create method of struct
  40976. + * inode_operations.
  40977. + * Creates regular file using file plugin from parent directory plugin set.
  40978. + */
  40979. +int reiser4_create_common(struct user_namespace *mnt_userns,
  40980. + struct inode *parent, struct dentry *dentry,
  40981. + umode_t mode, bool exclusive)
  40982. +{
  40983. + reiser4_object_create_data data;
  40984. + file_plugin *fplug;
  40985. +
  40986. + memset(&data, 0, sizeof data);
  40987. + data.mode = S_IFREG | mode;
  40988. + fplug = child_create_plugin(parent) ? : inode_create_plugin(parent);
  40989. + if (!plugin_of_group(fplug, REISER4_REGULAR_FILE)) {
  40990. + warning("vpf-1900", "'%s' is not a regular file plugin.",
  40991. + fplug->h.label);
  40992. + return RETERR(-EIO);
  40993. + }
  40994. + data.id = fplug->h.id;
  40995. + return create_vfs_object(parent, dentry, &data);
  40996. +}
  40997. +
  40998. +int reiser4_lookup_name(struct inode *dir, struct dentry *, reiser4_key *);
  40999. +void check_light_weight(struct inode *inode, struct inode *parent);
  41000. +
  41001. +/**
  41002. + * reiser4_lookup_common - lookup of inode operations
  41003. + * @parent: inode of directory to lookup into
  41004. + * @dentry: name to look for
  41005. + * @flags:
  41006. + *
  41007. + * This is common implementation of vfs's lookup method of struct
  41008. + * inode_operations.
  41009. + */
  41010. +struct dentry *reiser4_lookup_common(struct inode *parent,
  41011. + struct dentry *dentry,
  41012. + unsigned int flags)
  41013. +{
  41014. + reiser4_context *ctx;
  41015. + int result;
  41016. + struct dentry *new;
  41017. + struct inode *inode;
  41018. + reiser4_dir_entry_desc entry;
  41019. +
  41020. + ctx = reiser4_init_context(parent->i_sb);
  41021. + if (IS_ERR(ctx))
  41022. + return (struct dentry *)ctx;
  41023. +
  41024. + /* set up operations on dentry. */
  41025. + dentry->d_op = &get_super_private(parent->i_sb)->ops.dentry;
  41026. +
  41027. + result = reiser4_lookup_name(parent, dentry, &entry.key);
  41028. + if (result) {
  41029. + context_set_commit_async(ctx);
  41030. + reiser4_exit_context(ctx);
  41031. + if (result == -ENOENT) {
  41032. + /* object not found */
  41033. + if (!IS_DEADDIR(parent))
  41034. + d_add(dentry, NULL);
  41035. + return NULL;
  41036. + }
  41037. + return ERR_PTR(result);
  41038. + }
  41039. +
  41040. + inode = reiser4_iget(parent->i_sb, &entry.key, 0);
  41041. + if (IS_ERR(inode)) {
  41042. + context_set_commit_async(ctx);
  41043. + reiser4_exit_context(ctx);
  41044. + return ERR_PTR(PTR_ERR(inode));
  41045. + }
  41046. +
  41047. + /* success */
  41048. + check_light_weight(inode, parent);
  41049. + new = d_splice_alias(inode, dentry);
  41050. + reiser4_iget_complete(inode);
  41051. +
  41052. + /* prevent balance_dirty_pages() from being called: we don't want to
  41053. + * do this under directory i_mutex. */
  41054. + context_set_commit_async(ctx);
  41055. + reiser4_exit_context(ctx);
  41056. + return new;
  41057. +}
  41058. +
  41059. +static reiser4_block_nr common_estimate_link(struct inode *parent,
  41060. + struct inode *object);
  41061. +int reiser4_update_dir(struct inode *);
  41062. +
  41063. +static inline void reiser4_check_immutable(struct inode *inode)
  41064. +{
  41065. + do {
  41066. + if (!reiser4_inode_get_flag(inode, REISER4_IMMUTABLE))
  41067. + break;
  41068. + yield();
  41069. + } while (1);
  41070. +}
  41071. +
  41072. +/**
  41073. + * reiser4_link_common - link of inode operations
  41074. + * @existing: dentry of object which is to get new name
  41075. + * @parent: directory where new name is to be created
  41076. + * @newname: new name
  41077. + *
  41078. + * This is common implementation of vfs's link method of struct
  41079. + * inode_operations.
  41080. + */
  41081. +int reiser4_link_common(struct dentry *existing, struct inode *parent,
  41082. + struct dentry *newname)
  41083. +{
  41084. + reiser4_context *ctx;
  41085. + int result;
  41086. + struct inode *object;
  41087. + dir_plugin *parent_dplug;
  41088. + reiser4_dir_entry_desc entry;
  41089. + reiser4_object_create_data data;
  41090. + reiser4_block_nr reserve;
  41091. +
  41092. + ctx = reiser4_init_context(parent->i_sb);
  41093. + if (IS_ERR(ctx))
  41094. + return PTR_ERR(ctx);
  41095. +
  41096. + assert("nikita-1431", existing != NULL);
  41097. + assert("nikita-1432", parent != NULL);
  41098. + assert("nikita-1433", newname != NULL);
  41099. +
  41100. + object = existing->d_inode;
  41101. + assert("nikita-1434", object != NULL);
  41102. +
  41103. + /* check for race with create_object() */
  41104. + reiser4_check_immutable(object);
  41105. +
  41106. + parent_dplug = inode_dir_plugin(parent);
  41107. +
  41108. + memset(&entry, 0, sizeof entry);
  41109. + entry.obj = object;
  41110. +
  41111. + data.mode = object->i_mode;
  41112. + data.id = inode_file_plugin(object)->h.id;
  41113. +
  41114. + reserve = common_estimate_link(parent, existing->d_inode);
  41115. + if ((__s64) reserve < 0) {
  41116. + context_set_commit_async(ctx);
  41117. + reiser4_exit_context(ctx);
  41118. + return reserve;
  41119. + }
  41120. +
  41121. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
  41122. + context_set_commit_async(ctx);
  41123. + reiser4_exit_context(ctx);
  41124. + return RETERR(-ENOSPC);
  41125. + }
  41126. +
  41127. + /*
  41128. + * Subtle race handling: sys_link() doesn't take i_mutex on @parent. It
  41129. + * means that link(2) can race against unlink(2) or rename(2), and
  41130. + * inode is dead (->i_nlink == 0) when reiser4_link() is entered.
  41131. + *
  41132. + * For such inode we have to undo special processing done in
  41133. + * reiser4_unlink() viz. creation of safe-link.
  41134. + */
  41135. + if (unlikely(object->i_nlink == 0)) {
  41136. + result = safe_link_del(reiser4_tree_by_inode(object),
  41137. + get_inode_oid(object), SAFE_UNLINK);
  41138. + if (result != 0) {
  41139. + context_set_commit_async(ctx);
  41140. + reiser4_exit_context(ctx);
  41141. + return result;
  41142. + }
  41143. + }
  41144. +
  41145. + /* increment nlink of @existing and update its stat data */
  41146. + result = reiser4_add_nlink(object, parent, 1);
  41147. + if (result == 0) {
  41148. + /* add entry to the parent */
  41149. + result =
  41150. + parent_dplug->add_entry(parent, newname, &data, &entry);
  41151. + if (result != 0) {
  41152. + /* failed to add entry to the parent, decrement nlink
  41153. + of @existing */
  41154. + reiser4_del_nlink(object, parent, 1);
  41155. + /*
  41156. + * now, if that failed, we have a file with too big
  41157. + * nlink---space leak, much better than directory
  41158. + * entry pointing to nowhere
  41159. + */
  41160. + }
  41161. + }
  41162. + if (result == 0) {
  41163. + atomic_inc(&object->i_count);
  41164. + /*
  41165. + * Upon successful completion, link() shall mark for update
  41166. + * the st_ctime field of the file. Also, the st_ctime and
  41167. + * st_mtime fields of the directory that contains the new
  41168. + * entry shall be marked for update. --SUS
  41169. + */
  41170. + result = reiser4_update_dir(parent);
  41171. + }
  41172. + if (result == 0)
  41173. + d_instantiate(newname, existing->d_inode);
  41174. +
  41175. + context_set_commit_async(ctx);
  41176. + reiser4_exit_context(ctx);
  41177. + return result;
  41178. +}
  41179. +
  41180. +static int unlink_check_and_grab(struct inode *parent, struct dentry *victim);
  41181. +
  41182. +/**
  41183. + * reiser4_unlink_common - unlink of inode operations
  41184. + * @parent: inode of directory to remove name from
  41185. + * @victim: name to be removed
  41186. + *
  41187. + * This is common implementation of vfs's unlink method of struct
  41188. + * inode_operations.
  41189. + */
  41190. +int reiser4_unlink_common(struct inode *parent, struct dentry *victim)
  41191. +{
  41192. + reiser4_context *ctx;
  41193. + int result;
  41194. + struct inode *object;
  41195. + file_plugin *fplug;
  41196. +
  41197. + ctx = reiser4_init_context(parent->i_sb);
  41198. + if (IS_ERR(ctx))
  41199. + return PTR_ERR(ctx);
  41200. +
  41201. + object = victim->d_inode;
  41202. + fplug = inode_file_plugin(object);
  41203. +
  41204. + result = unlink_check_and_grab(parent, victim);
  41205. + if (result != 0) {
  41206. + context_set_commit_async(ctx);
  41207. + reiser4_exit_context(ctx);
  41208. + return result;
  41209. + }
  41210. +
  41211. + if (fplug->detach)
  41212. + result = fplug->detach(object, parent);
  41213. + if (result == 0) {
  41214. + dir_plugin *parent_dplug;
  41215. + reiser4_dir_entry_desc entry;
  41216. +
  41217. + parent_dplug = inode_dir_plugin(parent);
  41218. + memset(&entry, 0, sizeof entry);
  41219. +
  41220. + /* first, delete directory entry */
  41221. + result = parent_dplug->rem_entry(parent, victim, &entry);
  41222. + if (result == 0) {
  41223. + /*
  41224. + * if name was removed successfully, we _have_ to
  41225. + * return 0 from this function, because upper level
  41226. + * caller (vfs_{rmdir,unlink}) expect this.
  41227. + *
  41228. + * now that directory entry is removed, update
  41229. + * stat-data
  41230. + */
  41231. + reiser4_del_nlink(object, parent, 1);
  41232. + /*
  41233. + * Upon successful completion, unlink() shall mark for
  41234. + * update the st_ctime and st_mtime fields of the
  41235. + * parent directory. Also, if the file's link count is
  41236. + * not 0, the st_ctime field of the file shall be
  41237. + * marked for update. --SUS
  41238. + */
  41239. + reiser4_update_dir(parent);
  41240. + /* add safe-link for this file */
  41241. + if (object->i_nlink == 0)
  41242. + safe_link_add(object, SAFE_UNLINK);
  41243. + }
  41244. + }
  41245. +
  41246. + if (unlikely(result != 0)) {
  41247. + if (result != -ENOMEM)
  41248. + warning("nikita-3398", "Cannot unlink %llu (%i)",
  41249. + (unsigned long long)get_inode_oid(object),
  41250. + result);
  41251. + /* if operation failed commit pending inode modifications to
  41252. + * the stat-data */
  41253. + reiser4_update_sd(object);
  41254. + reiser4_update_sd(parent);
  41255. + }
  41256. +
  41257. + reiser4_release_reserved(object->i_sb);
  41258. +
  41259. + /* @object's i_ctime was updated by ->rem_link() method(). */
  41260. +
  41261. + /* @victim can be already removed from the disk by this time. Inode is
  41262. + then marked so that iput() wouldn't try to remove stat data. But
  41263. + inode itself is still there.
  41264. + */
  41265. +
  41266. + /*
  41267. + * we cannot release directory semaphore here, because name has
  41268. + * already been deleted, but dentry (@victim) still exists. Prevent
  41269. + * balance_dirty_pages() from being called on exiting this context: we
  41270. + * don't want to do this under directory i_mutex.
  41271. + */
  41272. + context_set_commit_async(ctx);
  41273. + reiser4_exit_context(ctx);
  41274. + return result;
  41275. +}
  41276. +
  41277. +/**
  41278. + * reiser4_symlink_common - symlink of inode operations
  41279. + * @parent: inode of parent directory
  41280. + * @dentry: dentry of object to be created
  41281. + * @linkname: string symlink is to contain
  41282. + *
  41283. + * This is common implementation of vfs's symlink method of struct
  41284. + * inode_operations.
  41285. + * Creates object using file plugin SYMLINK_FILE_PLUGIN_ID.
  41286. + */
  41287. +int reiser4_symlink_common(struct user_namespace *mnt_userns,
  41288. + struct inode *parent, struct dentry *dentry,
  41289. + const char *linkname)
  41290. +{
  41291. + reiser4_object_create_data data;
  41292. +
  41293. + memset(&data, 0, sizeof data);
  41294. + data.name = linkname;
  41295. + data.id = SYMLINK_FILE_PLUGIN_ID;
  41296. + data.mode = S_IFLNK | S_IRWXUGO;
  41297. + return create_vfs_object(parent, dentry, &data);
  41298. +}
  41299. +
  41300. +/**
  41301. + * reiser4_mkdir_common - mkdir of inode operations
  41302. + * @parent: inode of parent directory
  41303. + * @dentry: dentry of object to be created
  41304. + * @mode: the permissions to use
  41305. + *
  41306. + * This is common implementation of vfs's mkdir method of struct
  41307. + * inode_operations.
  41308. + * Creates object using file plugin DIRECTORY_FILE_PLUGIN_ID.
  41309. + */
  41310. +int reiser4_mkdir_common(struct user_namespace *mnt_userns,
  41311. + struct inode *parent, struct dentry *dentry, umode_t mode)
  41312. +{
  41313. + reiser4_object_create_data data;
  41314. +
  41315. + memset(&data, 0, sizeof data);
  41316. + data.mode = S_IFDIR | mode;
  41317. + data.id = DIRECTORY_FILE_PLUGIN_ID;
  41318. + return create_vfs_object(parent, dentry, &data);
  41319. +}
  41320. +
  41321. +/**
  41322. + * reiser4_mknod_common - mknod of inode operations
  41323. + * @parent: inode of parent directory
  41324. + * @dentry: dentry of object to be created
  41325. + * @mode: the permissions to use and file type
  41326. + * @rdev: minor and major of new device file
  41327. + *
  41328. + * This is common implementation of vfs's mknod method of struct
  41329. + * inode_operations.
  41330. + * Creates object using file plugin SPECIAL_FILE_PLUGIN_ID.
  41331. + */
  41332. +int reiser4_mknod_common(struct user_namespace *mnt_userns,
  41333. + struct inode *parent, struct dentry *dentry,
  41334. + umode_t mode, dev_t rdev)
  41335. +{
  41336. + reiser4_object_create_data data;
  41337. +
  41338. + memset(&data, 0, sizeof data);
  41339. + data.mode = mode;
  41340. + data.rdev = rdev;
  41341. + data.id = SPECIAL_FILE_PLUGIN_ID;
  41342. + return create_vfs_object(parent, dentry, &data);
  41343. +}
  41344. +
  41345. +/*
  41346. + * implementation of vfs's rename method of struct inode_operations for typical
  41347. + * directory is in inode_ops_rename.c
  41348. + */
  41349. +
  41350. +/**
  41351. + * reiser4_get_link_common: ->get_link() of inode_operations
  41352. + * @dentry: dentry of symlink
  41353. + *
  41354. + * Assumes that inode's i_private points to the content of symbolic link.
  41355. + */
  41356. +const char *reiser4_get_link_common(struct dentry *dentry,
  41357. + struct inode *inode,
  41358. + struct delayed_call *done)
  41359. +{
  41360. + if (!dentry)
  41361. + return ERR_PTR(-ECHILD);
  41362. +
  41363. + assert("vs-851", S_ISLNK(dentry->d_inode->i_mode));
  41364. +
  41365. + if (!dentry->d_inode->i_private ||
  41366. + !reiser4_inode_get_flag(dentry->d_inode, REISER4_GENERIC_PTR_USED))
  41367. + return ERR_PTR(RETERR(-EINVAL));
  41368. +
  41369. + return dentry->d_inode->i_private;
  41370. +}
  41371. +
  41372. +/**
  41373. + * reiser4_permission_common - permission of inode operations
  41374. + * @inode: inode to check permissions for
  41375. + * @mask: mode bits to check permissions for
  41376. + * @flags:
  41377. + *
  41378. + * Uses generic function to check for rwx permissions.
  41379. + */
  41380. +int reiser4_permission_common(struct user_namespace *mnt_userns,
  41381. + struct inode *inode, int mask)
  41382. +{
  41383. + // generic_permission() says that it's rcu-aware...
  41384. +#if 0
  41385. + if (mask & MAY_NOT_BLOCK)
  41386. + return -ECHILD;
  41387. +#endif
  41388. + return generic_permission(&init_user_ns, inode, mask);
  41389. +}
  41390. +
  41391. +static int setattr_reserve(reiser4_tree *);
  41392. +
  41393. +/* this is common implementation of vfs's setattr method of struct
  41394. + inode_operations
  41395. +*/
  41396. +int reiser4_setattr_common(struct user_namespace *mnt_userns,
  41397. + struct dentry *dentry, struct iattr *attr)
  41398. +{
  41399. + reiser4_context *ctx;
  41400. + struct inode *inode;
  41401. + int result;
  41402. +
  41403. + inode = dentry->d_inode;
  41404. + result = setattr_prepare(&init_user_ns, dentry, attr);
  41405. + if (result)
  41406. + return result;
  41407. +
  41408. + ctx = reiser4_init_context(inode->i_sb);
  41409. + if (IS_ERR(ctx))
  41410. + return PTR_ERR(ctx);
  41411. +
  41412. + assert("nikita-3119", !(attr->ia_valid & ATTR_SIZE));
  41413. +
  41414. + /*
  41415. + * grab disk space and call standard
  41416. + * setattr_copy();
  41417. + * mark_inode_dirty().
  41418. + */
  41419. + result = setattr_reserve(reiser4_tree_by_inode(inode));
  41420. + if (!result) {
  41421. + setattr_copy(&init_user_ns, inode, attr);
  41422. + mark_inode_dirty(inode);
  41423. + result = reiser4_update_sd(inode);
  41424. + }
  41425. + context_set_commit_async(ctx);
  41426. + reiser4_exit_context(ctx);
  41427. + return result;
  41428. +}
  41429. +
  41430. +/* this is common implementation of vfs's getattr method of struct
  41431. + inode_operations
  41432. +*/
  41433. +int reiser4_getattr_common(struct user_namespace *mnt_userns,
  41434. + const struct path *path, struct kstat *stat,
  41435. + u32 request_mask, unsigned int flags)
  41436. +{
  41437. + struct inode *obj;
  41438. +
  41439. + assert("nikita-2298", path != NULL);
  41440. + assert("nikita-2299", stat != NULL);
  41441. +
  41442. + obj = d_inode(path->dentry);
  41443. +
  41444. + stat->dev = obj->i_sb->s_dev;
  41445. + stat->ino = oid_to_uino(get_inode_oid(obj));
  41446. + stat->mode = obj->i_mode;
  41447. + /* don't confuse userland with huge nlink. This is not entirely
  41448. + * correct, because nlink_t is not necessary 16 bit signed. */
  41449. + stat->nlink = min(obj->i_nlink, (typeof(obj->i_nlink)) 0x7fff);
  41450. + stat->uid = obj->i_uid;
  41451. + stat->gid = obj->i_gid;
  41452. + stat->rdev = obj->i_rdev;
  41453. + stat->atime = obj->i_atime;
  41454. + stat->mtime = obj->i_mtime;
  41455. + stat->ctime = obj->i_ctime;
  41456. + stat->size = obj->i_size;
  41457. + stat->blocks =
  41458. + (inode_get_bytes(obj) + VFS_BLKSIZE - 1) >> VFS_BLKSIZE_BITS;
  41459. + /* "preferred" blocksize for efficient file system I/O */
  41460. + stat->blksize = get_super_private(obj->i_sb)->optimal_io_size;
  41461. +
  41462. + return 0;
  41463. +}
  41464. +
  41465. +/* Estimate the maximum amount of nodes which might be allocated or changed on
  41466. + typical new object creation. Typical creation consists of calling create
  41467. + method of file plugin, adding directory entry to parent and update parent
  41468. + directory's stat data.
  41469. +*/
  41470. +static reiser4_block_nr estimate_create_vfs_object(struct inode *parent,
  41471. + /* parent object */
  41472. + struct inode *object
  41473. + /* object */)
  41474. +{
  41475. + assert("vpf-309", parent != NULL);
  41476. + assert("vpf-307", object != NULL);
  41477. +
  41478. + return
  41479. + /* object creation estimation */
  41480. + inode_file_plugin(object)->estimate.create(object) +
  41481. + /* stat data of parent directory estimation */
  41482. + inode_file_plugin(parent)->estimate.update(parent) +
  41483. + /* adding entry estimation */
  41484. + inode_dir_plugin(parent)->estimate.add_entry(parent) +
  41485. + /* to undo in the case of failure */
  41486. + inode_dir_plugin(parent)->estimate.rem_entry(parent);
  41487. +}
  41488. +
  41489. +/* Create child in directory.
  41490. +
  41491. + . get object's plugin
  41492. + . get fresh inode
  41493. + . initialize inode
  41494. + . add object's stat-data
  41495. + . initialize object's directory
  41496. + . add entry to the parent
  41497. + . instantiate dentry
  41498. +
  41499. +*/
  41500. +static int do_create_vfs_child(reiser4_object_create_data * data,/* parameters
  41501. + of new
  41502. + object */
  41503. + struct inode **retobj)
  41504. +{
  41505. + int result;
  41506. +
  41507. + struct dentry *dentry; /* parent object */
  41508. + struct inode *parent; /* new name */
  41509. +
  41510. + dir_plugin *par_dir; /* directory plugin on the parent */
  41511. + dir_plugin *obj_dir; /* directory plugin on the new object */
  41512. + file_plugin *obj_plug; /* object plugin on the new object */
  41513. + struct inode *object; /* new object */
  41514. + reiser4_block_nr reserve;
  41515. +
  41516. + reiser4_dir_entry_desc entry; /* new directory entry */
  41517. +
  41518. + assert("nikita-1420", data != NULL);
  41519. + parent = data->parent;
  41520. + dentry = data->dentry;
  41521. +
  41522. + assert("nikita-1418", parent != NULL);
  41523. + assert("nikita-1419", dentry != NULL);
  41524. +
  41525. + /* check, that name is acceptable for parent */
  41526. + par_dir = inode_dir_plugin(parent);
  41527. + if (par_dir->is_name_acceptable &&
  41528. + !par_dir->is_name_acceptable(parent,
  41529. + dentry->d_name.name,
  41530. + (int)dentry->d_name.len))
  41531. + return RETERR(-ENAMETOOLONG);
  41532. +
  41533. + result = 0;
  41534. + obj_plug = file_plugin_by_id((int)data->id);
  41535. + if (obj_plug == NULL) {
  41536. + warning("nikita-430", "Cannot find plugin %i", data->id);
  41537. + return RETERR(-ENOENT);
  41538. + }
  41539. + object = new_inode(parent->i_sb);
  41540. + if (object == NULL)
  41541. + return RETERR(-ENOMEM);
  41542. + /* new_inode() initializes i_ino to "arbitrary" value. Reset it to 0,
  41543. + * to simplify error handling: if some error occurs before i_ino is
  41544. + * initialized with oid, i_ino should already be set to some
  41545. + * distinguished value. */
  41546. + object->i_ino = 0;
  41547. +
  41548. + /* So that on error iput will be called. */
  41549. + *retobj = object;
  41550. +
  41551. + memset(&entry, 0, sizeof entry);
  41552. + entry.obj = object;
  41553. +
  41554. + set_plugin(&reiser4_inode_data(object)->pset, PSET_FILE,
  41555. + file_plugin_to_plugin(obj_plug));
  41556. + result = obj_plug->set_plug_in_inode(object, parent, data);
  41557. + if (result) {
  41558. + warning("nikita-431", "Cannot install plugin %i on %llx",
  41559. + data->id, (unsigned long long)get_inode_oid(object));
  41560. + return result;
  41561. + }
  41562. +
  41563. + /* reget plugin after installation */
  41564. + obj_plug = inode_file_plugin(object);
  41565. +
  41566. + if (obj_plug->create_object == NULL) {
  41567. + return RETERR(-EPERM);
  41568. + }
  41569. +
  41570. + /* if any of hash, tail, sd or permission plugins for newly created
  41571. + object are not set yet set them here inheriting them from parent
  41572. + directory
  41573. + */
  41574. + assert("nikita-2070", obj_plug->adjust_to_parent != NULL);
  41575. + result = obj_plug->adjust_to_parent(object,
  41576. + parent,
  41577. + object->i_sb->s_root->d_inode);
  41578. + if (result == 0)
  41579. + result = finish_pset(object);
  41580. + if (result != 0) {
  41581. + warning("nikita-432", "Cannot inherit from %llx to %llx",
  41582. + (unsigned long long)get_inode_oid(parent),
  41583. + (unsigned long long)get_inode_oid(object));
  41584. + return result;
  41585. + }
  41586. +
  41587. + /* setup inode and file-operations for this inode */
  41588. + setup_inode_ops(object, data);
  41589. +
  41590. + /* call file plugin's method to initialize plugin specific part of
  41591. + * inode */
  41592. + if (obj_plug->init_inode_data)
  41593. + obj_plug->init_inode_data(object, data, 1/*create */);
  41594. +
  41595. + /* obtain directory plugin (if any) for new object. */
  41596. + obj_dir = inode_dir_plugin(object);
  41597. + if (obj_dir != NULL && obj_dir->init == NULL) {
  41598. + return RETERR(-EPERM);
  41599. + }
  41600. +
  41601. + reiser4_inode_data(object)->locality_id = get_inode_oid(parent);
  41602. +
  41603. + reserve = estimate_create_vfs_object(parent, object);
  41604. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
  41605. + return RETERR(-ENOSPC);
  41606. + }
  41607. +
  41608. + /* mark inode `immutable'. We disable changes to the file being
  41609. + created until valid directory entry for it is inserted. Otherwise,
  41610. + if file were expanded and insertion of directory entry fails, we
  41611. + have to remove file, but we only alloted enough space in
  41612. + transaction to remove _empty_ file. 3.x code used to remove stat
  41613. + data in different transaction thus possibly leaking disk space on
  41614. + crash. This all only matters if it's possible to access file
  41615. + without name, for example, by inode number
  41616. + */
  41617. + reiser4_inode_set_flag(object, REISER4_IMMUTABLE);
  41618. +
  41619. + /* create empty object, this includes allocation of new objectid. For
  41620. + directories this implies creation of dot and dotdot */
  41621. + assert("nikita-2265", reiser4_inode_get_flag(object, REISER4_NO_SD));
  41622. +
  41623. + /* mark inode as `loaded'. From this point onward
  41624. + reiser4_delete_inode() will try to remove its stat-data. */
  41625. + reiser4_inode_set_flag(object, REISER4_LOADED);
  41626. +
  41627. + result = obj_plug->create_object(object, parent, data);
  41628. + if (result != 0) {
  41629. + reiser4_inode_clr_flag(object, REISER4_IMMUTABLE);
  41630. + if (result != -ENAMETOOLONG && result != -ENOMEM)
  41631. + warning("nikita-2219",
  41632. + "Failed to create sd for %llu",
  41633. + (unsigned long long)get_inode_oid(object));
  41634. + return result;
  41635. + }
  41636. +
  41637. + if (obj_dir != NULL)
  41638. + result = obj_dir->init(object, parent, data);
  41639. + if (result == 0) {
  41640. + assert("nikita-434", !reiser4_inode_get_flag(object,
  41641. + REISER4_NO_SD));
  41642. + /* insert inode into VFS hash table */
  41643. + insert_inode_hash(object);
  41644. + /* create entry */
  41645. + result = par_dir->add_entry(parent, dentry, data, &entry);
  41646. + if (result == 0) {
  41647. + /* If O_CREAT is set and the file did not previously
  41648. + exist, upon successful completion, open() shall
  41649. + mark for update the st_atime, st_ctime, and
  41650. + st_mtime fields of the file and the st_ctime and
  41651. + st_mtime fields of the parent directory. --SUS
  41652. + */
  41653. + object->i_ctime = current_time(object);
  41654. + reiser4_update_dir(parent);
  41655. + }
  41656. + if (result != 0 && obj_plug->detach)
  41657. + /* cleanup failure to add entry */
  41658. + obj_plug->detach(object, parent);
  41659. + } else if (result != -ENOMEM)
  41660. + warning("nikita-2219", "Failed to initialize dir for %llu: %i",
  41661. + (unsigned long long)get_inode_oid(object), result);
  41662. +
  41663. + /*
  41664. + * update stat-data, committing all pending modifications to the inode
  41665. + * fields.
  41666. + */
  41667. + reiser4_update_sd(object);
  41668. + if (result != 0) {
  41669. + /* if everything was ok (result == 0), parent stat-data is
  41670. + * already updated above (update_parent_dir()) */
  41671. + reiser4_update_sd(parent);
  41672. + /* failure to create entry, remove object */
  41673. + obj_plug->delete_object(object);
  41674. + }
  41675. +
  41676. + /* file has name now, clear immutable flag */
  41677. + reiser4_inode_clr_flag(object, REISER4_IMMUTABLE);
  41678. +
  41679. + /* on error, iput() will call ->delete_inode(). We should keep track
  41680. + of the existence of stat-data for this inode and avoid attempt to
  41681. + remove it in reiser4_delete_inode(). This is accomplished through
  41682. + REISER4_NO_SD bit in inode.u.reiser4_i.plugin.flags
  41683. + */
  41684. + return result;
  41685. +}
  41686. +
  41687. +/* this is helper for common implementations of reiser4_mkdir, reiser4_create,
  41688. + reiser4_mknod and reiser4_symlink
  41689. +*/
  41690. +static int
  41691. +create_vfs_object(struct inode *parent,
  41692. + struct dentry *dentry, reiser4_object_create_data * data)
  41693. +{
  41694. + reiser4_context *ctx;
  41695. + int result;
  41696. + struct inode *child;
  41697. +
  41698. + ctx = reiser4_init_context(parent->i_sb);
  41699. + if (IS_ERR(ctx))
  41700. + return PTR_ERR(ctx);
  41701. + context_set_commit_async(ctx);
  41702. +
  41703. + data->parent = parent;
  41704. + data->dentry = dentry;
  41705. + child = NULL;
  41706. + result = do_create_vfs_child(data, &child);
  41707. + if (unlikely(result != 0)) {
  41708. + if (child != NULL) {
  41709. + /* for unlinked inode accounting in iput() */
  41710. + clear_nlink(child);
  41711. + reiser4_make_bad_inode(child);
  41712. + iput(child);
  41713. + }
  41714. + } else
  41715. + d_instantiate(dentry, child);
  41716. +
  41717. + reiser4_exit_context(ctx);
  41718. + return result;
  41719. +}
  41720. +
  41721. +/**
  41722. + * helper for link_common. Estimate disk space necessary to add a link
  41723. + * from @parent to @object
  41724. + */
  41725. +static reiser4_block_nr common_estimate_link(struct inode *parent /* parent
  41726. + * directory
  41727. + */,
  41728. + struct inode *object /* object to
  41729. + * which new
  41730. + * link is
  41731. + * being
  41732. + * created */)
  41733. +{
  41734. + reiser4_block_nr res = 0;
  41735. + file_plugin *fplug;
  41736. + dir_plugin *dplug;
  41737. +
  41738. + assert("vpf-317", object != NULL);
  41739. + assert("vpf-318", parent != NULL);
  41740. +
  41741. + fplug = inode_file_plugin(object);
  41742. + dplug = inode_dir_plugin(parent);
  41743. + /* VS-FIXME-HANS: why do we do fplug->estimate.update(object) twice
  41744. + * instead of multiplying by 2? */
  41745. + /* reiser4_add_nlink(object) */
  41746. + res += fplug->estimate.update(object);
  41747. + /* add_entry(parent) */
  41748. + res += dplug->estimate.add_entry(parent);
  41749. + /* reiser4_del_nlink(object) */
  41750. + res += fplug->estimate.update(object);
  41751. + /* update_dir(parent) */
  41752. + res += inode_file_plugin(parent)->estimate.update(parent);
  41753. + /* safe-link */
  41754. + res += estimate_one_item_removal(reiser4_tree_by_inode(object));
  41755. +
  41756. + return res;
  41757. +}
  41758. +
  41759. +/* Estimate disk space necessary to remove a link between @parent and
  41760. + @object.
  41761. +*/
  41762. +static reiser4_block_nr estimate_unlink(struct inode *parent /* parent
  41763. + * directory */,
  41764. + struct inode *object /* object to which
  41765. + * new link is
  41766. + * being created
  41767. + */)
  41768. +{
  41769. + reiser4_block_nr res = 0;
  41770. + file_plugin *fplug;
  41771. + dir_plugin *dplug;
  41772. +
  41773. + assert("vpf-317", object != NULL);
  41774. + assert("vpf-318", parent != NULL);
  41775. +
  41776. + fplug = inode_file_plugin(object);
  41777. + dplug = inode_dir_plugin(parent);
  41778. +
  41779. + /* rem_entry(parent) */
  41780. + res += dplug->estimate.rem_entry(parent);
  41781. + /* reiser4_del_nlink(object) */
  41782. + res += fplug->estimate.update(object);
  41783. + /* update_dir(parent) */
  41784. + res += inode_file_plugin(parent)->estimate.update(parent);
  41785. + /* fplug->unlink */
  41786. + res += fplug->estimate.unlink(object, parent);
  41787. + /* safe-link */
  41788. + res += estimate_one_insert_item(reiser4_tree_by_inode(object));
  41789. +
  41790. + return res;
  41791. +}
  41792. +
  41793. +/* helper for reiser4_unlink_common. Estimate and grab space for unlink. */
  41794. +static int unlink_check_and_grab(struct inode *parent, struct dentry *victim)
  41795. +{
  41796. + file_plugin *fplug;
  41797. + struct inode *child;
  41798. + int result;
  41799. +
  41800. + result = 0;
  41801. + child = victim->d_inode;
  41802. + fplug = inode_file_plugin(child);
  41803. +
  41804. + /* check for race with create_object() */
  41805. + reiser4_check_immutable(child);
  41806. +
  41807. + /* object being deleted should have stat data */
  41808. + assert("vs-949", !reiser4_inode_get_flag(child, REISER4_NO_SD));
  41809. +
  41810. + /* ask object plugin */
  41811. + if (fplug->can_rem_link != NULL && !fplug->can_rem_link(child))
  41812. + return RETERR(-ENOTEMPTY);
  41813. +
  41814. + result = (int)estimate_unlink(parent, child);
  41815. + if (result < 0)
  41816. + return result;
  41817. +
  41818. + return reiser4_grab_reserved(child->i_sb, result, BA_CAN_COMMIT);
  41819. +}
  41820. +
  41821. +/* helper for reiser4_setattr_common */
  41822. +static int setattr_reserve(reiser4_tree * tree)
  41823. +{
  41824. + assert("vs-1096", is_grab_enabled(get_current_context()));
  41825. + return reiser4_grab_space(estimate_one_insert_into_item(tree),
  41826. + BA_CAN_COMMIT);
  41827. +}
  41828. +
  41829. +/* helper function. Standards require that for many file-system operations
  41830. + on success ctime and mtime of parent directory is to be updated. */
  41831. +int reiser4_update_dir(struct inode *dir)
  41832. +{
  41833. + assert("nikita-2525", dir != NULL);
  41834. +
  41835. + dir->i_ctime = dir->i_mtime = current_time(dir);
  41836. + return reiser4_update_sd(dir);
  41837. +}
  41838. +
  41839. +/*
  41840. + Local variables:
  41841. + c-indentation-style: "K&R"
  41842. + mode-name: "LC"
  41843. + c-basic-offset: 8
  41844. + tab-width: 8
  41845. + fill-column: 80
  41846. + scroll-step: 1
  41847. + End:
  41848. +*/
  41849. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/inode_ops_rename.c linux-5.16.14/fs/reiser4/plugin/inode_ops_rename.c
  41850. --- linux-5.16.14.orig/fs/reiser4/plugin/inode_ops_rename.c 1970-01-01 01:00:00.000000000 +0100
  41851. +++ linux-5.16.14/fs/reiser4/plugin/inode_ops_rename.c 2022-03-12 13:26:19.675892784 +0100
  41852. @@ -0,0 +1,960 @@
  41853. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  41854. + * reiser4/README */
  41855. +
  41856. +#include "../inode.h"
  41857. +#include "../safe_link.h"
  41858. +
  41859. +static const char *possible_leak = "Possible disk space leak.";
  41860. +
  41861. +/* re-bind existing name at @from_coord in @from_dir to point to @to_inode.
  41862. +
  41863. + Helper function called from hashed_rename() */
  41864. +static int replace_name(struct inode *to_inode, /* inode where @from_coord is
  41865. + * to be re-targeted at */
  41866. + struct inode *from_dir, /* directory where @from_coord
  41867. + * lives */
  41868. + struct inode *from_inode, /* inode @from_coord
  41869. + * originally point to */
  41870. + coord_t *from_coord, /* where directory entry is in
  41871. + * the tree */
  41872. + lock_handle * from_lh/* lock handle on @from_coord */)
  41873. +{
  41874. + item_plugin *from_item;
  41875. + int result;
  41876. + znode *node;
  41877. +
  41878. + coord_clear_iplug(from_coord);
  41879. + node = from_coord->node;
  41880. + result = zload(node);
  41881. + if (result != 0)
  41882. + return result;
  41883. + from_item = item_plugin_by_coord(from_coord);
  41884. + if (plugin_of_group(item_plugin_by_coord(from_coord),
  41885. + DIR_ENTRY_ITEM_TYPE)) {
  41886. + reiser4_key to_key;
  41887. +
  41888. + build_sd_key(to_inode, &to_key);
  41889. +
  41890. + /* everything is found and prepared to change directory entry
  41891. + at @from_coord to point to @to_inode.
  41892. +
  41893. + @to_inode is just about to get new name, so bump its link
  41894. + counter.
  41895. +
  41896. + */
  41897. + result = reiser4_add_nlink(to_inode, from_dir, 0);
  41898. + if (result != 0) {
  41899. + /* Don't issue warning: this may be plain -EMLINK */
  41900. + zrelse(node);
  41901. + return result;
  41902. + }
  41903. +
  41904. + result =
  41905. + from_item->s.dir.update_key(from_coord, &to_key, from_lh);
  41906. + if (result != 0) {
  41907. + reiser4_del_nlink(to_inode, from_dir, 0);
  41908. + zrelse(node);
  41909. + return result;
  41910. + }
  41911. +
  41912. + /* @from_inode just lost its name, he-he.
  41913. +
  41914. + If @from_inode was directory, it contained dotdot pointing
  41915. + to @from_dir. @from_dir i_nlink will be decreased when
  41916. + iput() will be called on @from_inode.
  41917. +
  41918. + If file-system is not ADG (hard-links are
  41919. + supported on directories), iput(from_inode) will not remove
  41920. + @from_inode, and thus above is incorrect, but hard-links on
  41921. + directories are problematic in many other respects.
  41922. + */
  41923. + result = reiser4_del_nlink(from_inode, from_dir, 0);
  41924. + if (result != 0) {
  41925. + warning("nikita-2330",
  41926. + "Cannot remove link from source: %i. %s",
  41927. + result, possible_leak);
  41928. + }
  41929. + /* Has to return success, because entry is already
  41930. + * modified. */
  41931. + result = 0;
  41932. +
  41933. + /* NOTE-NIKITA consider calling plugin method in stead of
  41934. + accessing inode fields directly. */
  41935. + from_dir->i_mtime = current_time(from_dir);
  41936. + } else {
  41937. + warning("nikita-2326", "Unexpected item type");
  41938. + result = RETERR(-EIO);
  41939. + }
  41940. + zrelse(node);
  41941. + return result;
  41942. +}
  41943. +
  41944. +/* add new entry pointing to @inode into @dir at @coord, locked by @lh
  41945. +
  41946. + Helper function used by hashed_rename(). */
  41947. +static int add_name(struct inode *inode, /* inode where @coord is to be
  41948. + * re-targeted at */
  41949. + struct inode *dir, /* directory where @coord lives */
  41950. + struct dentry *name, /* new name */
  41951. + coord_t *coord, /* where directory entry is in the tree
  41952. + */
  41953. + lock_handle * lh, /* lock handle on @coord */
  41954. + int is_dir/* true, if @inode is directory */)
  41955. +{
  41956. + int result;
  41957. + reiser4_dir_entry_desc entry;
  41958. +
  41959. + assert("nikita-2333", lh->node == coord->node);
  41960. + assert("nikita-2334", is_dir == S_ISDIR(inode->i_mode));
  41961. +
  41962. + memset(&entry, 0, sizeof entry);
  41963. + entry.obj = inode;
  41964. + /* build key of directory entry description */
  41965. + inode_dir_plugin(dir)->build_entry_key(dir, &name->d_name, &entry.key);
  41966. +
  41967. + /* ext2 does this in different order: first inserts new entry,
  41968. + then increases directory nlink. We don't want do this,
  41969. + because reiser4_add_nlink() calls ->add_link() plugin
  41970. + method that can fail for whatever reason, leaving as with
  41971. + cleanup problems.
  41972. + */
  41973. + /* @inode is getting new name */
  41974. + reiser4_add_nlink(inode, dir, 0);
  41975. + /* create @new_name in @new_dir pointing to
  41976. + @old_inode */
  41977. + result = WITH_COORD(coord,
  41978. + inode_dir_item_plugin(dir)->s.dir.add_entry(dir,
  41979. + coord,
  41980. + lh,
  41981. + name,
  41982. + &entry));
  41983. + if (result != 0) {
  41984. + int result2;
  41985. + result2 = reiser4_del_nlink(inode, dir, 0);
  41986. + if (result2 != 0) {
  41987. + warning("nikita-2327",
  41988. + "Cannot drop link on %lli %i. %s",
  41989. + (unsigned long long)get_inode_oid(inode),
  41990. + result2, possible_leak);
  41991. + }
  41992. + } else
  41993. + INODE_INC_FIELD(dir, i_size);
  41994. + return result;
  41995. +}
  41996. +
  41997. +static reiser4_block_nr estimate_rename(struct inode *old_dir, /* directory
  41998. + * where @old is
  41999. + * located */
  42000. + struct dentry *old_name,/* old name */
  42001. + struct inode *new_dir, /* directory
  42002. + * where @new is
  42003. + * located */
  42004. + struct dentry *new_name /* new name */)
  42005. +{
  42006. + reiser4_block_nr res1, res2;
  42007. + dir_plugin * p_parent_old, *p_parent_new;
  42008. + file_plugin * p_child_old, *p_child_new;
  42009. +
  42010. + assert("vpf-311", old_dir != NULL);
  42011. + assert("vpf-312", new_dir != NULL);
  42012. + assert("vpf-313", old_name != NULL);
  42013. + assert("vpf-314", new_name != NULL);
  42014. +
  42015. + p_parent_old = inode_dir_plugin(old_dir);
  42016. + p_parent_new = inode_dir_plugin(new_dir);
  42017. + p_child_old = inode_file_plugin(old_name->d_inode);
  42018. + if (new_name->d_inode)
  42019. + p_child_new = inode_file_plugin(new_name->d_inode);
  42020. + else
  42021. + p_child_new = NULL;
  42022. +
  42023. + /* find_entry - can insert one leaf. */
  42024. + res1 = res2 = 1;
  42025. +
  42026. + /* replace_name */
  42027. + {
  42028. + /* reiser4_add_nlink(p_child_old) and
  42029. + * reiser4_del_nlink(p_child_old) */
  42030. + res1 += 2 * p_child_old->estimate.update(old_name->d_inode);
  42031. + /* update key */
  42032. + res1 += 1;
  42033. + /* reiser4_del_nlink(p_child_new) */
  42034. + if (p_child_new)
  42035. + res1 += p_child_new->estimate.update(new_name->d_inode);
  42036. + }
  42037. +
  42038. + /* else add_name */
  42039. + {
  42040. + /* reiser4_add_nlink(p_parent_new) and
  42041. + * reiser4_del_nlink(p_parent_new) */
  42042. + res2 +=
  42043. + 2 * inode_file_plugin(new_dir)->estimate.update(new_dir);
  42044. + /* reiser4_add_nlink(p_parent_old) */
  42045. + res2 += p_child_old->estimate.update(old_name->d_inode);
  42046. + /* add_entry(p_parent_new) */
  42047. + res2 += p_parent_new->estimate.add_entry(new_dir);
  42048. + /* reiser4_del_nlink(p_parent_old) */
  42049. + res2 += p_child_old->estimate.update(old_name->d_inode);
  42050. + }
  42051. +
  42052. + res1 = res1 < res2 ? res2 : res1;
  42053. +
  42054. + /* reiser4_write_sd(p_parent_new) */
  42055. + res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
  42056. +
  42057. + /* reiser4_write_sd(p_child_new) */
  42058. + if (p_child_new)
  42059. + res1 += p_child_new->estimate.update(new_name->d_inode);
  42060. +
  42061. + /* hashed_rem_entry(p_parent_old) */
  42062. + res1 += p_parent_old->estimate.rem_entry(old_dir);
  42063. +
  42064. + /* reiser4_del_nlink(p_child_old) */
  42065. + res1 += p_child_old->estimate.update(old_name->d_inode);
  42066. +
  42067. + /* replace_name */
  42068. + {
  42069. + /* reiser4_add_nlink(p_parent_dir_new) */
  42070. + res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
  42071. + /* update_key */
  42072. + res1 += 1;
  42073. + /* reiser4_del_nlink(p_parent_new) */
  42074. + res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
  42075. + /* reiser4_del_nlink(p_parent_old) */
  42076. + res1 += inode_file_plugin(old_dir)->estimate.update(old_dir);
  42077. + }
  42078. +
  42079. + /* reiser4_write_sd(p_parent_old) */
  42080. + res1 += inode_file_plugin(old_dir)->estimate.update(old_dir);
  42081. +
  42082. + /* reiser4_write_sd(p_child_old) */
  42083. + res1 += p_child_old->estimate.update(old_name->d_inode);
  42084. +
  42085. + return res1;
  42086. +}
  42087. +
  42088. +static int hashed_rename_estimate_and_grab(struct inode *old_dir, /* directory
  42089. + * where @old
  42090. + * is located
  42091. + */
  42092. + struct dentry *old_name,/* old name
  42093. + */
  42094. + struct inode *new_dir, /* directory
  42095. + * where @new
  42096. + * is located
  42097. + */
  42098. + struct dentry *new_name /* new name
  42099. + */)
  42100. +{
  42101. + reiser4_block_nr reserve;
  42102. +
  42103. + reserve = estimate_rename(old_dir, old_name, new_dir, new_name);
  42104. +
  42105. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
  42106. + return RETERR(-ENOSPC);
  42107. +
  42108. + return 0;
  42109. +}
  42110. +
  42111. +/* check whether @old_inode and @new_inode can be moved within file system
  42112. + * tree. This singles out attempts to rename pseudo-files, for example. */
  42113. +static int can_rename(struct inode *old_dir, struct inode *old_inode,
  42114. + struct inode *new_dir, struct inode *new_inode)
  42115. +{
  42116. + file_plugin *fplug;
  42117. + dir_plugin *dplug;
  42118. +
  42119. + assert("nikita-3370", old_inode != NULL);
  42120. +
  42121. + dplug = inode_dir_plugin(new_dir);
  42122. + fplug = inode_file_plugin(old_inode);
  42123. +
  42124. + if (dplug == NULL)
  42125. + return RETERR(-ENOTDIR);
  42126. + else if (new_dir->i_op->create == NULL)
  42127. + return RETERR(-EPERM);
  42128. + else if (!fplug->can_add_link(old_inode))
  42129. + return RETERR(-EMLINK);
  42130. + else if (new_inode != NULL) {
  42131. + fplug = inode_file_plugin(new_inode);
  42132. + if (fplug->can_rem_link != NULL &&
  42133. + !fplug->can_rem_link(new_inode))
  42134. + return RETERR(-EBUSY);
  42135. + }
  42136. + return 0;
  42137. +}
  42138. +
  42139. +int reiser4_find_entry(struct inode *, struct dentry *, lock_handle * ,
  42140. + znode_lock_mode, reiser4_dir_entry_desc *);
  42141. +int reiser4_update_dir(struct inode *);
  42142. +
  42143. +/* this is common implementation of vfs's rename2 method of struct
  42144. + inode_operations
  42145. + See comments in the body.
  42146. +
  42147. + It is arguable that this function can be made generic so, that it
  42148. + will be applicable to any kind of directory plugin that deals with
  42149. + directories composed out of directory entries. The only obstacle
  42150. + here is that we don't have any data-type to represent directory
  42151. + entry. This should be re-considered when more than one different
  42152. + directory plugin will be implemented.
  42153. +*/
  42154. +int reiser4_rename2_common(struct user_namespace *mnt_userns,
  42155. + struct inode *old_dir /* directory where @old
  42156. + * is located */ ,
  42157. + struct dentry *old_name /* old name */ ,
  42158. + struct inode *new_dir /* directory where @new
  42159. + * is located */ ,
  42160. + struct dentry *new_name /* new name */ ,
  42161. + unsigned flags /* specific flags */)
  42162. +{
  42163. + /* From `The Open Group Base Specifications Issue 6'
  42164. +
  42165. + If either the old or new argument names a symbolic link, rename()
  42166. + shall operate on the symbolic link itself, and shall not resolve
  42167. + the last component of the argument. If the old argument and the new
  42168. + argument resolve to the same existing file, rename() shall return
  42169. + successfully and perform no other action.
  42170. +
  42171. + [this is done by VFS: vfs_rename()]
  42172. +
  42173. + If the old argument points to the pathname of a file that is not a
  42174. + directory, the new argument shall not point to the pathname of a
  42175. + directory.
  42176. +
  42177. + [checked by VFS: vfs_rename->may_delete()]
  42178. +
  42179. + If the link named by the new argument exists, it shall
  42180. + be removed and old renamed to new. In this case, a link named new
  42181. + shall remain visible to other processes throughout the renaming
  42182. + operation and refer either to the file referred to by new or old
  42183. + before the operation began.
  42184. +
  42185. + [we should assure this]
  42186. +
  42187. + Write access permission is required for
  42188. + both the directory containing old and the directory containing new.
  42189. +
  42190. + [checked by VFS: vfs_rename->may_delete(), may_create()]
  42191. +
  42192. + If the old argument points to the pathname of a directory, the new
  42193. + argument shall not point to the pathname of a file that is not a
  42194. + directory.
  42195. +
  42196. + [checked by VFS: vfs_rename->may_delete()]
  42197. +
  42198. + If the directory named by the new argument exists, it
  42199. + shall be removed and old renamed to new. In this case, a link named
  42200. + new shall exist throughout the renaming operation and shall refer
  42201. + either to the directory referred to by new or old before the
  42202. + operation began.
  42203. +
  42204. + [we should assure this]
  42205. +
  42206. + If new names an existing directory, it shall be
  42207. + required to be an empty directory.
  42208. +
  42209. + [we should check this]
  42210. +
  42211. + If the old argument points to a pathname of a symbolic link, the
  42212. + symbolic link shall be renamed. If the new argument points to a
  42213. + pathname of a symbolic link, the symbolic link shall be removed.
  42214. +
  42215. + The new pathname shall not contain a path prefix that names
  42216. + old. Write access permission is required for the directory
  42217. + containing old and the directory containing new. If the old
  42218. + argument points to the pathname of a directory, write access
  42219. + permission may be required for the directory named by old, and, if
  42220. + it exists, the directory named by new.
  42221. +
  42222. + [checked by VFS: vfs_rename(), vfs_rename_dir()]
  42223. +
  42224. + If the link named by the new argument exists and the file's link
  42225. + count becomes 0 when it is removed and no process has the file
  42226. + open, the space occupied by the file shall be freed and the file
  42227. + shall no longer be accessible. If one or more processes have the
  42228. + file open when the last link is removed, the link shall be removed
  42229. + before rename() returns, but the removal of the file contents shall
  42230. + be postponed until all references to the file are closed.
  42231. +
  42232. + [iput() handles this, but we can do this manually, a la
  42233. + reiser4_unlink()]
  42234. +
  42235. + Upon successful completion, rename() shall mark for update the
  42236. + st_ctime and st_mtime fields of the parent directory of each file.
  42237. +
  42238. + [N/A]
  42239. +
  42240. + */
  42241. +
  42242. + /* From Documentation/filesystems/vfs.txt:
  42243. +
  42244. + rename2: this has an additional flags argument compared to rename.
  42245. + f no flags are supported by the filesystem then this method
  42246. + need not be implemented. If some flags are supported then the
  42247. + filesystem must return -EINVAL for any unsupported or unknown
  42248. + flags. Currently the following flags are implemented:
  42249. + (1) RENAME_NOREPLACE: this flag indicates that if the target
  42250. + of the rename exists the rename should fail with -EEXIST
  42251. + instead of replacing the target. The VFS already checks for
  42252. + existence, so for local filesystems the RENAME_NOREPLACE
  42253. + implementation is equivalent to plain rename.
  42254. + (2) RENAME_EXCHANGE: exchange source and target. Both must
  42255. + exist; this is checked by the VFS. Unlike plain rename,
  42256. + source and target may be of different type.
  42257. + */
  42258. +
  42259. + static const unsigned supported_flags = RENAME_NOREPLACE;
  42260. +
  42261. + reiser4_context *ctx;
  42262. + int result;
  42263. + int is_dir; /* is @old_name directory */
  42264. +
  42265. + struct inode *old_inode;
  42266. + struct inode *new_inode;
  42267. + coord_t *new_coord;
  42268. +
  42269. + struct reiser4_dentry_fsdata *new_fsdata;
  42270. + dir_plugin *dplug;
  42271. + file_plugin *fplug;
  42272. +
  42273. + reiser4_dir_entry_desc *old_entry, *new_entry, *dotdot_entry;
  42274. + lock_handle * new_lh, *dotdot_lh;
  42275. + struct dentry *dotdot_name;
  42276. + struct reiser4_dentry_fsdata *dataonstack;
  42277. +
  42278. + ctx = reiser4_init_context(old_dir->i_sb);
  42279. + if (IS_ERR(ctx))
  42280. + return PTR_ERR(ctx);
  42281. +
  42282. + /*
  42283. + * Check rename2() flags.
  42284. + *
  42285. + * "If some flags are supported then the filesystem must return
  42286. + * -EINVAL for any unsupported or unknown flags."
  42287. + *
  42288. + * We support:
  42289. + * - RENAME_NOREPLACE (no-op)
  42290. + */
  42291. + if ((flags & supported_flags) != flags)
  42292. + return RETERR(-EINVAL);
  42293. +
  42294. + old_entry = kzalloc(3 * sizeof(*old_entry) + 2 * sizeof(*new_lh) +
  42295. + sizeof(*dotdot_name) + sizeof(*dataonstack),
  42296. + reiser4_ctx_gfp_mask_get());
  42297. + if (!old_entry) {
  42298. + context_set_commit_async(ctx);
  42299. + reiser4_exit_context(ctx);
  42300. + return RETERR(-ENOMEM);
  42301. + }
  42302. +
  42303. + new_entry = old_entry + 1;
  42304. + dotdot_entry = old_entry + 2;
  42305. + new_lh = (lock_handle *)(old_entry + 3);
  42306. + dotdot_lh = new_lh + 1;
  42307. + dotdot_name = (struct dentry *)(new_lh + 2);
  42308. + dataonstack = (struct reiser4_dentry_fsdata *)(dotdot_name + 1);
  42309. +
  42310. + assert("nikita-2318", old_dir != NULL);
  42311. + assert("nikita-2319", new_dir != NULL);
  42312. + assert("nikita-2320", old_name != NULL);
  42313. + assert("nikita-2321", new_name != NULL);
  42314. +
  42315. + old_inode = old_name->d_inode;
  42316. + new_inode = new_name->d_inode;
  42317. +
  42318. + dplug = inode_dir_plugin(old_dir);
  42319. + fplug = NULL;
  42320. +
  42321. + new_fsdata = reiser4_get_dentry_fsdata(new_name);
  42322. + if (IS_ERR(new_fsdata)) {
  42323. + kfree(old_entry);
  42324. + context_set_commit_async(ctx);
  42325. + reiser4_exit_context(ctx);
  42326. + return PTR_ERR(new_fsdata);
  42327. + }
  42328. +
  42329. + new_coord = &new_fsdata->dec.entry_coord;
  42330. + coord_clear_iplug(new_coord);
  42331. +
  42332. + is_dir = S_ISDIR(old_inode->i_mode);
  42333. +
  42334. + assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir);
  42335. +
  42336. + /* if target is existing directory and it's not empty---return error.
  42337. +
  42338. + This check is done specifically, because is_dir_empty() requires
  42339. + tree traversal and have to be done before locks are taken.
  42340. + */
  42341. + if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0) {
  42342. + kfree(old_entry);
  42343. + context_set_commit_async(ctx);
  42344. + reiser4_exit_context(ctx);
  42345. + return RETERR(-ENOTEMPTY);
  42346. + }
  42347. +
  42348. + result = can_rename(old_dir, old_inode, new_dir, new_inode);
  42349. + if (result != 0) {
  42350. + kfree(old_entry);
  42351. + context_set_commit_async(ctx);
  42352. + reiser4_exit_context(ctx);
  42353. + return result;
  42354. + }
  42355. +
  42356. + result = hashed_rename_estimate_and_grab(old_dir, old_name,
  42357. + new_dir, new_name);
  42358. + if (result != 0) {
  42359. + kfree(old_entry);
  42360. + context_set_commit_async(ctx);
  42361. + reiser4_exit_context(ctx);
  42362. + return result;
  42363. + }
  42364. +
  42365. + init_lh(new_lh);
  42366. +
  42367. + /* find entry for @new_name */
  42368. + result = reiser4_find_entry(new_dir, new_name, new_lh, ZNODE_WRITE_LOCK,
  42369. + new_entry);
  42370. +
  42371. + if (IS_CBKERR(result)) {
  42372. + done_lh(new_lh);
  42373. + kfree(old_entry);
  42374. + context_set_commit_async(ctx);
  42375. + reiser4_exit_context(ctx);
  42376. + return result;
  42377. + }
  42378. +
  42379. + reiser4_seal_done(&new_fsdata->dec.entry_seal);
  42380. +
  42381. + /* add or replace name for @old_inode as @new_name */
  42382. + if (new_inode != NULL) {
  42383. + /* target (@new_name) exists. */
  42384. + /* Not clear what to do with objects that are
  42385. + both directories and files at the same time. */
  42386. + if (result == CBK_COORD_FOUND) {
  42387. + result = replace_name(old_inode,
  42388. + new_dir,
  42389. + new_inode, new_coord, new_lh);
  42390. + if (result == 0)
  42391. + fplug = inode_file_plugin(new_inode);
  42392. + } else if (result == CBK_COORD_NOTFOUND) {
  42393. + /* VFS told us that @new_name is bound to existing
  42394. + inode, but we failed to find directory entry. */
  42395. + warning("nikita-2324", "Target not found");
  42396. + result = RETERR(-ENOENT);
  42397. + }
  42398. + } else {
  42399. + /* target (@new_name) doesn't exists. */
  42400. + if (result == CBK_COORD_NOTFOUND)
  42401. + result = add_name(old_inode,
  42402. + new_dir,
  42403. + new_name, new_coord, new_lh, is_dir);
  42404. + else if (result == CBK_COORD_FOUND) {
  42405. + /* VFS told us that @new_name is "negative" dentry,
  42406. + but we found directory entry. */
  42407. + warning("nikita-2331", "Target found unexpectedly");
  42408. + result = RETERR(-EIO);
  42409. + }
  42410. + }
  42411. +
  42412. + assert("nikita-3462", ergo(result == 0,
  42413. + old_inode->i_nlink >= 2 + !!is_dir));
  42414. +
  42415. + /* We are done with all modifications to the @new_dir, release lock on
  42416. + node. */
  42417. + done_lh(new_lh);
  42418. +
  42419. + if (fplug && fplug->detach) {
  42420. + /* detach @new_inode from name-space */
  42421. + result = fplug->detach(new_inode, new_dir);
  42422. + if (result != 0)
  42423. + warning("nikita-2330", "Cannot detach %lli: %i. %s",
  42424. + (unsigned long long)get_inode_oid(new_inode),
  42425. + result, possible_leak);
  42426. + }
  42427. +
  42428. + if (new_inode != NULL)
  42429. + reiser4_update_sd(new_inode);
  42430. +
  42431. + if (result == 0) {
  42432. + old_entry->obj = old_inode;
  42433. +
  42434. + dplug->build_entry_key(old_dir,
  42435. + &old_name->d_name, &old_entry->key);
  42436. +
  42437. + /* At this stage new name was introduced for
  42438. + @old_inode. @old_inode, @new_dir, and @new_inode i_nlink
  42439. + counters were updated.
  42440. +
  42441. + We want to remove @old_name now. If @old_inode wasn't
  42442. + directory this is simple.
  42443. + */
  42444. + result = dplug->rem_entry(old_dir, old_name, old_entry);
  42445. + if (result != 0 && result != -ENOMEM) {
  42446. + warning("nikita-2335",
  42447. + "Cannot remove old name: %i", result);
  42448. + } else {
  42449. + result = reiser4_del_nlink(old_inode, old_dir, 0);
  42450. + if (result != 0 && result != -ENOMEM) {
  42451. + warning("nikita-2337",
  42452. + "Cannot drop link on old: %i", result);
  42453. + }
  42454. + }
  42455. +
  42456. + if (result == 0 && is_dir) {
  42457. + /* @old_inode is directory. We also have to update
  42458. + dotdot entry. */
  42459. + coord_t *dotdot_coord;
  42460. +
  42461. + memset(dataonstack, 0, sizeof(*dataonstack));
  42462. + memset(dotdot_entry, 0, sizeof(*dotdot_entry));
  42463. + dotdot_entry->obj = old_dir;
  42464. + memset(dotdot_name, 0, sizeof(*dotdot_name));
  42465. + dotdot_name->d_name.name = "..";
  42466. + dotdot_name->d_name.len = 2;
  42467. + /*
  42468. + * allocate ->d_fsdata on the stack to avoid using
  42469. + * reiser4_get_dentry_fsdata(). Locking is not needed,
  42470. + * because dentry is private to the current thread.
  42471. + */
  42472. + dotdot_name->d_fsdata = dataonstack;
  42473. + init_lh(dotdot_lh);
  42474. +
  42475. + dotdot_coord = &dataonstack->dec.entry_coord;
  42476. + coord_clear_iplug(dotdot_coord);
  42477. +
  42478. + result = reiser4_find_entry(old_inode, dotdot_name,
  42479. + dotdot_lh, ZNODE_WRITE_LOCK,
  42480. + dotdot_entry);
  42481. + if (result == 0) {
  42482. + /* replace_name() decreases i_nlink on
  42483. + * @old_dir */
  42484. + result = replace_name(new_dir,
  42485. + old_inode,
  42486. + old_dir,
  42487. + dotdot_coord, dotdot_lh);
  42488. + } else
  42489. + result = RETERR(-EIO);
  42490. + done_lh(dotdot_lh);
  42491. + }
  42492. + }
  42493. + reiser4_update_dir(new_dir);
  42494. + reiser4_update_dir(old_dir);
  42495. + reiser4_update_sd(old_inode);
  42496. + if (result == 0) {
  42497. + file_plugin *fplug;
  42498. +
  42499. + if (new_inode != NULL) {
  42500. + /* add safe-link for target file (in case we removed
  42501. + * last reference to the poor fellow */
  42502. + fplug = inode_file_plugin(new_inode);
  42503. + if (new_inode->i_nlink == 0)
  42504. + result = safe_link_add(new_inode, SAFE_UNLINK);
  42505. + }
  42506. + }
  42507. + kfree(old_entry);
  42508. + context_set_commit_async(ctx);
  42509. + reiser4_exit_context(ctx);
  42510. + return result;
  42511. +}
  42512. +
  42513. +#if 0
  42514. +int reiser4_rename_common(struct user_namespace *mnt_userns,
  42515. + struct inode *old_dir /* directory where @old
  42516. + * is located */ ,
  42517. + struct dentry *old_name /* old name */ ,
  42518. + struct inode *new_dir /* directory where @new
  42519. + * is located */ ,
  42520. + struct dentry *new_name/* new name */)
  42521. +{
  42522. + /* From `The Open Group Base Specifications Issue 6'
  42523. +
  42524. + If either the old or new argument names a symbolic link, rename()
  42525. + shall operate on the symbolic link itself, and shall not resolve
  42526. + the last component of the argument. If the old argument and the new
  42527. + argument resolve to the same existing file, rename() shall return
  42528. + successfully and perform no other action.
  42529. +
  42530. + [this is done by VFS: vfs_rename()]
  42531. +
  42532. + If the old argument points to the pathname of a file that is not a
  42533. + directory, the new argument shall not point to the pathname of a
  42534. + directory.
  42535. +
  42536. + [checked by VFS: vfs_rename->may_delete()]
  42537. +
  42538. + If the link named by the new argument exists, it shall
  42539. + be removed and old renamed to new. In this case, a link named new
  42540. + shall remain visible to other processes throughout the renaming
  42541. + operation and refer either to the file referred to by new or old
  42542. + before the operation began.
  42543. +
  42544. + [we should assure this]
  42545. +
  42546. + Write access permission is required for
  42547. + both the directory containing old and the directory containing new.
  42548. +
  42549. + [checked by VFS: vfs_rename->may_delete(), may_create()]
  42550. +
  42551. + If the old argument points to the pathname of a directory, the new
  42552. + argument shall not point to the pathname of a file that is not a
  42553. + directory.
  42554. +
  42555. + [checked by VFS: vfs_rename->may_delete()]
  42556. +
  42557. + If the directory named by the new argument exists, it
  42558. + shall be removed and old renamed to new. In this case, a link named
  42559. + new shall exist throughout the renaming operation and shall refer
  42560. + either to the directory referred to by new or old before the
  42561. + operation began.
  42562. +
  42563. + [we should assure this]
  42564. +
  42565. + If new names an existing directory, it shall be
  42566. + required to be an empty directory.
  42567. +
  42568. + [we should check this]
  42569. +
  42570. + If the old argument points to a pathname of a symbolic link, the
  42571. + symbolic link shall be renamed. If the new argument points to a
  42572. + pathname of a symbolic link, the symbolic link shall be removed.
  42573. +
  42574. + The new pathname shall not contain a path prefix that names
  42575. + old. Write access permission is required for the directory
  42576. + containing old and the directory containing new. If the old
  42577. + argument points to the pathname of a directory, write access
  42578. + permission may be required for the directory named by old, and, if
  42579. + it exists, the directory named by new.
  42580. +
  42581. + [checked by VFS: vfs_rename(), vfs_rename_dir()]
  42582. +
  42583. + If the link named by the new argument exists and the file's link
  42584. + count becomes 0 when it is removed and no process has the file
  42585. + open, the space occupied by the file shall be freed and the file
  42586. + shall no longer be accessible. If one or more processes have the
  42587. + file open when the last link is removed, the link shall be removed
  42588. + before rename() returns, but the removal of the file contents shall
  42589. + be postponed until all references to the file are closed.
  42590. +
  42591. + [iput() handles this, but we can do this manually, a la
  42592. + reiser4_unlink()]
  42593. +
  42594. + Upon successful completion, rename() shall mark for update the
  42595. + st_ctime and st_mtime fields of the parent directory of each file.
  42596. +
  42597. + [N/A]
  42598. +
  42599. + */
  42600. + reiser4_context *ctx;
  42601. + int result;
  42602. + int is_dir; /* is @old_name directory */
  42603. + struct inode *old_inode;
  42604. + struct inode *new_inode;
  42605. + reiser4_dir_entry_desc old_entry;
  42606. + reiser4_dir_entry_desc new_entry;
  42607. + coord_t *new_coord;
  42608. + struct reiser4_dentry_fsdata *new_fsdata;
  42609. + lock_handle new_lh;
  42610. + dir_plugin *dplug;
  42611. + file_plugin *fplug;
  42612. +
  42613. + ctx = reiser4_init_context(old_dir->i_sb);
  42614. + if (IS_ERR(ctx))
  42615. + return PTR_ERR(ctx);
  42616. +
  42617. + assert("nikita-2318", old_dir != NULL);
  42618. + assert("nikita-2319", new_dir != NULL);
  42619. + assert("nikita-2320", old_name != NULL);
  42620. + assert("nikita-2321", new_name != NULL);
  42621. +
  42622. + old_inode = old_name->d_inode;
  42623. + new_inode = new_name->d_inode;
  42624. +
  42625. + dplug = inode_dir_plugin(old_dir);
  42626. + fplug = NULL;
  42627. +
  42628. + new_fsdata = reiser4_get_dentry_fsdata(new_name);
  42629. + if (IS_ERR(new_fsdata)) {
  42630. + result = PTR_ERR(new_fsdata);
  42631. + goto exit;
  42632. + }
  42633. +
  42634. + new_coord = &new_fsdata->dec.entry_coord;
  42635. + coord_clear_iplug(new_coord);
  42636. +
  42637. + is_dir = S_ISDIR(old_inode->i_mode);
  42638. +
  42639. + assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir);
  42640. +
  42641. + /* if target is existing directory and it's not empty---return error.
  42642. +
  42643. + This check is done specifically, because is_dir_empty() requires
  42644. + tree traversal and have to be done before locks are taken.
  42645. + */
  42646. + if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0)
  42647. + return RETERR(-ENOTEMPTY);
  42648. +
  42649. + result = can_rename(old_dir, old_inode, new_dir, new_inode);
  42650. + if (result != 0)
  42651. + goto exit;
  42652. +
  42653. + result = hashed_rename_estimate_and_grab(old_dir, old_name,
  42654. + new_dir, new_name);
  42655. + if (result != 0)
  42656. + goto exit;
  42657. +
  42658. + init_lh(&new_lh);
  42659. +
  42660. + /* find entry for @new_name */
  42661. + result = reiser4_find_entry(new_dir, new_name, &new_lh,
  42662. + ZNODE_WRITE_LOCK, &new_entry);
  42663. +
  42664. + if (IS_CBKERR(result)) {
  42665. + done_lh(&new_lh);
  42666. + goto exit;
  42667. + }
  42668. +
  42669. + reiser4_seal_done(&new_fsdata->dec.entry_seal);
  42670. +
  42671. + /* add or replace name for @old_inode as @new_name */
  42672. + if (new_inode != NULL) {
  42673. + /* target (@new_name) exists. */
  42674. + /* Not clear what to do with objects that are
  42675. + both directories and files at the same time. */
  42676. + if (result == CBK_COORD_FOUND) {
  42677. + result = replace_name(old_inode,
  42678. + new_dir,
  42679. + new_inode, new_coord, &new_lh);
  42680. + if (result == 0)
  42681. + fplug = inode_file_plugin(new_inode);
  42682. + } else if (result == CBK_COORD_NOTFOUND) {
  42683. + /* VFS told us that @new_name is bound to existing
  42684. + inode, but we failed to find directory entry. */
  42685. + warning("nikita-2324", "Target not found");
  42686. + result = RETERR(-ENOENT);
  42687. + }
  42688. + } else {
  42689. + /* target (@new_name) doesn't exists. */
  42690. + if (result == CBK_COORD_NOTFOUND)
  42691. + result = add_name(old_inode,
  42692. + new_dir,
  42693. + new_name, new_coord, &new_lh, is_dir);
  42694. + else if (result == CBK_COORD_FOUND) {
  42695. + /* VFS told us that @new_name is "negative" dentry,
  42696. + but we found directory entry. */
  42697. + warning("nikita-2331", "Target found unexpectedly");
  42698. + result = RETERR(-EIO);
  42699. + }
  42700. + }
  42701. +
  42702. + assert("nikita-3462", ergo(result == 0,
  42703. + old_inode->i_nlink >= 2 + !!is_dir));
  42704. +
  42705. + /* We are done with all modifications to the @new_dir, release lock on
  42706. + node. */
  42707. + done_lh(&new_lh);
  42708. +
  42709. + if (fplug && fplug->detach) {
  42710. + /* detach @new_inode from name-space */
  42711. + result = fplug->detach(new_inode, new_dir);
  42712. + if (result != 0)
  42713. + warning("nikita-2330", "Cannot detach %lli: %i. %s",
  42714. + (unsigned long long)get_inode_oid(new_inode),
  42715. + result, possible_leak);
  42716. + }
  42717. +
  42718. + if (new_inode != NULL)
  42719. + reiser4_update_sd(new_inode);
  42720. +
  42721. + if (result == 0) {
  42722. + memset(&old_entry, 0, sizeof old_entry);
  42723. + old_entry.obj = old_inode;
  42724. +
  42725. + dplug->build_entry_key(old_dir,
  42726. + &old_name->d_name, &old_entry.key);
  42727. +
  42728. + /* At this stage new name was introduced for
  42729. + @old_inode. @old_inode, @new_dir, and @new_inode i_nlink
  42730. + counters were updated.
  42731. +
  42732. + We want to remove @old_name now. If @old_inode wasn't
  42733. + directory this is simple.
  42734. + */
  42735. + result = dplug->rem_entry(old_dir, old_name, &old_entry);
  42736. + /*result = rem_entry_hashed(old_dir, old_name, &old_entry); */
  42737. + if (result != 0 && result != -ENOMEM) {
  42738. + warning("nikita-2335",
  42739. + "Cannot remove old name: %i", result);
  42740. + } else {
  42741. + result = reiser4_del_nlink(old_inode, old_dir, 0);
  42742. + if (result != 0 && result != -ENOMEM) {
  42743. + warning("nikita-2337",
  42744. + "Cannot drop link on old: %i", result);
  42745. + }
  42746. + }
  42747. +
  42748. + if (result == 0 && is_dir) {
  42749. + /* @old_inode is directory. We also have to update
  42750. + dotdot entry. */
  42751. + coord_t *dotdot_coord;
  42752. + lock_handle dotdot_lh;
  42753. + struct dentry dotdot_name;
  42754. + reiser4_dir_entry_desc dotdot_entry;
  42755. + struct reiser4_dentry_fsdata dataonstack;
  42756. + struct reiser4_dentry_fsdata *fsdata;
  42757. +
  42758. + memset(&dataonstack, 0, sizeof dataonstack);
  42759. + memset(&dotdot_entry, 0, sizeof dotdot_entry);
  42760. + dotdot_entry.obj = old_dir;
  42761. + memset(&dotdot_name, 0, sizeof dotdot_name);
  42762. + dotdot_name.d_name.name = "..";
  42763. + dotdot_name.d_name.len = 2;
  42764. + /*
  42765. + * allocate ->d_fsdata on the stack to avoid using
  42766. + * reiser4_get_dentry_fsdata(). Locking is not needed,
  42767. + * because dentry is private to the current thread.
  42768. + */
  42769. + dotdot_name.d_fsdata = &dataonstack;
  42770. + init_lh(&dotdot_lh);
  42771. +
  42772. + fsdata = &dataonstack;
  42773. + dotdot_coord = &fsdata->dec.entry_coord;
  42774. + coord_clear_iplug(dotdot_coord);
  42775. +
  42776. + result = reiser4_find_entry(old_inode,
  42777. + &dotdot_name,
  42778. + &dotdot_lh,
  42779. + ZNODE_WRITE_LOCK,
  42780. + &dotdot_entry);
  42781. + if (result == 0) {
  42782. + /* replace_name() decreases i_nlink on
  42783. + * @old_dir */
  42784. + result = replace_name(new_dir,
  42785. + old_inode,
  42786. + old_dir,
  42787. + dotdot_coord, &dotdot_lh);
  42788. + } else
  42789. + result = RETERR(-EIO);
  42790. + done_lh(&dotdot_lh);
  42791. + }
  42792. + }
  42793. + reiser4_update_dir(new_dir);
  42794. + reiser4_update_dir(old_dir);
  42795. + reiser4_update_sd(old_inode);
  42796. + if (result == 0) {
  42797. + file_plugin *fplug;
  42798. +
  42799. + if (new_inode != NULL) {
  42800. + /* add safe-link for target file (in case we removed
  42801. + * last reference to the poor fellow */
  42802. + fplug = inode_file_plugin(new_inode);
  42803. + if (new_inode->i_nlink == 0)
  42804. + result = safe_link_add(new_inode, SAFE_UNLINK);
  42805. + }
  42806. + }
  42807. +exit:
  42808. + context_set_commit_async(ctx);
  42809. + reiser4_exit_context(ctx);
  42810. + return result;
  42811. +}
  42812. +#endif
  42813. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/acl.h linux-5.16.14/fs/reiser4/plugin/item/acl.h
  42814. --- linux-5.16.14.orig/fs/reiser4/plugin/item/acl.h 1970-01-01 01:00:00.000000000 +0100
  42815. +++ linux-5.16.14/fs/reiser4/plugin/item/acl.h 2022-03-12 13:26:19.675892784 +0100
  42816. @@ -0,0 +1,66 @@
  42817. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  42818. +
  42819. +/* Directory entry. */
  42820. +
  42821. +#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ )
  42822. +#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__
  42823. +
  42824. +#include "../../forward.h"
  42825. +#include "../../dformat.h"
  42826. +#include "../../kassign.h"
  42827. +#include "../../key.h"
  42828. +
  42829. +#include <linux/fs.h>
  42830. +#include <linux/dcache.h> /* for struct dentry */
  42831. +
  42832. +typedef struct directory_entry_format {
  42833. + /* key of object stat-data. It's not necessary to store whole
  42834. + key here, because it's always key of stat-data, so minor
  42835. + packing locality and offset can be omitted here. But this
  42836. + relies on particular key allocation scheme for stat-data, so,
  42837. + for extensibility sake, whole key can be stored here.
  42838. +
  42839. + We store key as array of bytes, because we don't want 8-byte
  42840. + alignment of dir entries.
  42841. + */
  42842. + obj_key_id id;
  42843. + /* file name. Null terminated string. */
  42844. + d8 name[0];
  42845. +} directory_entry_format;
  42846. +
  42847. +void print_de(const char *prefix, coord_t * coord);
  42848. +int extract_key_de(const coord_t * coord, reiser4_key * key);
  42849. +int update_key_de(const coord_t * coord, const reiser4_key * key,
  42850. + lock_handle * lh);
  42851. +char *extract_name_de(const coord_t * coord, char *buf);
  42852. +unsigned extract_file_type_de(const coord_t * coord);
  42853. +int add_entry_de(struct inode *dir, coord_t * coord,
  42854. + lock_handle * lh, const struct dentry *name,
  42855. + reiser4_dir_entry_desc * entry);
  42856. +int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord,
  42857. + lock_handle * lh, reiser4_dir_entry_desc * entry);
  42858. +int max_name_len_de(const struct inode *dir);
  42859. +
  42860. +int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length);
  42861. +
  42862. +char *extract_dent_name(const coord_t * coord,
  42863. + directory_entry_format * dent, char *buf);
  42864. +
  42865. +#if REISER4_LARGE_KEY
  42866. +#define DE_NAME_BUF_LEN (24)
  42867. +#else
  42868. +#define DE_NAME_BUF_LEN (16)
  42869. +#endif
  42870. +
  42871. +/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */
  42872. +#endif
  42873. +
  42874. +/* Make Linus happy.
  42875. + Local variables:
  42876. + c-indentation-style: "K&R"
  42877. + mode-name: "LC"
  42878. + c-basic-offset: 8
  42879. + tab-width: 8
  42880. + fill-column: 120
  42881. + End:
  42882. +*/
  42883. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/blackbox.c linux-5.16.14/fs/reiser4/plugin/item/blackbox.c
  42884. --- linux-5.16.14.orig/fs/reiser4/plugin/item/blackbox.c 1970-01-01 01:00:00.000000000 +0100
  42885. +++ linux-5.16.14/fs/reiser4/plugin/item/blackbox.c 2022-03-12 13:26:19.675892784 +0100
  42886. @@ -0,0 +1,142 @@
  42887. +/* Copyright 2003 by Hans Reiser, licensing governed by
  42888. + * reiser4/README */
  42889. +
  42890. +/* Black box item implementation */
  42891. +
  42892. +#include "../../forward.h"
  42893. +#include "../../debug.h"
  42894. +#include "../../dformat.h"
  42895. +#include "../../kassign.h"
  42896. +#include "../../coord.h"
  42897. +#include "../../tree.h"
  42898. +#include "../../lock.h"
  42899. +
  42900. +#include "blackbox.h"
  42901. +#include "item.h"
  42902. +#include "../plugin.h"
  42903. +
  42904. +int
  42905. +store_black_box(reiser4_tree * tree,
  42906. + const reiser4_key * key, void *data, int length)
  42907. +{
  42908. + int result;
  42909. + reiser4_item_data idata;
  42910. + coord_t coord;
  42911. + lock_handle lh;
  42912. +
  42913. + memset(&idata, 0, sizeof idata);
  42914. +
  42915. + idata.data = data;
  42916. + idata.user = 0;
  42917. + idata.length = length;
  42918. + idata.iplug = item_plugin_by_id(BLACK_BOX_ID);
  42919. +
  42920. + init_lh(&lh);
  42921. + result = insert_by_key(tree, key,
  42922. + &idata, &coord, &lh, LEAF_LEVEL, CBK_UNIQUE);
  42923. +
  42924. + assert("nikita-3413",
  42925. + ergo(result == 0,
  42926. + WITH_COORD(&coord,
  42927. + item_length_by_coord(&coord) == length)));
  42928. +
  42929. + done_lh(&lh);
  42930. + return result;
  42931. +}
  42932. +
  42933. +int
  42934. +load_black_box(reiser4_tree * tree,
  42935. + reiser4_key * key, void *data, int length, int exact)
  42936. +{
  42937. + int result;
  42938. + coord_t coord;
  42939. + lock_handle lh;
  42940. +
  42941. + init_lh(&lh);
  42942. + result = coord_by_key(tree, key,
  42943. + &coord, &lh, ZNODE_READ_LOCK,
  42944. + exact ? FIND_EXACT : FIND_MAX_NOT_MORE_THAN,
  42945. + LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL);
  42946. +
  42947. + if (result == 0) {
  42948. + int ilen;
  42949. +
  42950. + result = zload(coord.node);
  42951. + if (result == 0) {
  42952. + ilen = item_length_by_coord(&coord);
  42953. + if (ilen <= length) {
  42954. + memcpy(data, item_body_by_coord(&coord), ilen);
  42955. + unit_key_by_coord(&coord, key);
  42956. + } else if (exact) {
  42957. + /*
  42958. + * item is larger than buffer provided by the
  42959. + * user. Only issue a warning if @exact is
  42960. + * set. If @exact is false, we are iterating
  42961. + * over all safe-links and here we are reaching
  42962. + * the end of the iteration.
  42963. + */
  42964. + warning("nikita-3415",
  42965. + "Wrong black box length: %i > %i",
  42966. + ilen, length);
  42967. + result = RETERR(-EIO);
  42968. + }
  42969. + zrelse(coord.node);
  42970. + }
  42971. + }
  42972. +
  42973. + done_lh(&lh);
  42974. + return result;
  42975. +
  42976. +}
  42977. +
  42978. +int
  42979. +update_black_box(reiser4_tree * tree,
  42980. + const reiser4_key * key, void *data, int length)
  42981. +{
  42982. + int result;
  42983. + coord_t coord;
  42984. + lock_handle lh;
  42985. +
  42986. + init_lh(&lh);
  42987. + result = coord_by_key(tree, key,
  42988. + &coord, &lh, ZNODE_READ_LOCK,
  42989. + FIND_EXACT,
  42990. + LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL);
  42991. + if (result == 0) {
  42992. + int ilen;
  42993. +
  42994. + result = zload(coord.node);
  42995. + if (result == 0) {
  42996. + ilen = item_length_by_coord(&coord);
  42997. + if (length <= ilen) {
  42998. + memcpy(item_body_by_coord(&coord), data,
  42999. + length);
  43000. + } else {
  43001. + warning("nikita-3437",
  43002. + "Wrong black box length: %i < %i",
  43003. + ilen, length);
  43004. + result = RETERR(-EIO);
  43005. + }
  43006. + zrelse(coord.node);
  43007. + }
  43008. + }
  43009. +
  43010. + done_lh(&lh);
  43011. + return result;
  43012. +
  43013. +}
  43014. +
  43015. +int kill_black_box(reiser4_tree * tree, const reiser4_key * key)
  43016. +{
  43017. + return reiser4_cut_tree(tree, key, key, NULL, 1);
  43018. +}
  43019. +
  43020. +/* Make Linus happy.
  43021. + Local variables:
  43022. + c-indentation-style: "K&R"
  43023. + mode-name: "LC"
  43024. + c-basic-offset: 8
  43025. + tab-width: 8
  43026. + fill-column: 120
  43027. + End:
  43028. +*/
  43029. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/blackbox.h linux-5.16.14/fs/reiser4/plugin/item/blackbox.h
  43030. --- linux-5.16.14.orig/fs/reiser4/plugin/item/blackbox.h 1970-01-01 01:00:00.000000000 +0100
  43031. +++ linux-5.16.14/fs/reiser4/plugin/item/blackbox.h 2022-03-12 13:26:19.675892784 +0100
  43032. @@ -0,0 +1,33 @@
  43033. +/* Copyright 2003 by Hans Reiser, licensing governed by
  43034. + * reiser4/README */
  43035. +
  43036. +/* "Black box" entry to fixed-width contain user supplied data */
  43037. +
  43038. +#if !defined( __FS_REISER4_BLACK_BOX_H__ )
  43039. +#define __FS_REISER4_BLACK_BOX_H__
  43040. +
  43041. +#include "../../forward.h"
  43042. +#include "../../dformat.h"
  43043. +#include "../../kassign.h"
  43044. +#include "../../key.h"
  43045. +
  43046. +extern int store_black_box(reiser4_tree * tree,
  43047. + const reiser4_key * key, void *data, int length);
  43048. +extern int load_black_box(reiser4_tree * tree,
  43049. + reiser4_key * key, void *data, int length, int exact);
  43050. +extern int kill_black_box(reiser4_tree * tree, const reiser4_key * key);
  43051. +extern int update_black_box(reiser4_tree * tree,
  43052. + const reiser4_key * key, void *data, int length);
  43053. +
  43054. +/* __FS_REISER4_BLACK_BOX_H__ */
  43055. +#endif
  43056. +
  43057. +/* Make Linus happy.
  43058. + Local variables:
  43059. + c-indentation-style: "K&R"
  43060. + mode-name: "LC"
  43061. + c-basic-offset: 8
  43062. + tab-width: 8
  43063. + fill-column: 120
  43064. + End:
  43065. +*/
  43066. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/cde.c linux-5.16.14/fs/reiser4/plugin/item/cde.c
  43067. --- linux-5.16.14.orig/fs/reiser4/plugin/item/cde.c 1970-01-01 01:00:00.000000000 +0100
  43068. +++ linux-5.16.14/fs/reiser4/plugin/item/cde.c 2022-03-12 13:26:19.675892784 +0100
  43069. @@ -0,0 +1,1006 @@
  43070. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  43071. +
  43072. +/* Directory entry implementation */
  43073. +
  43074. +/* DESCRIPTION:
  43075. +
  43076. + This is "compound" directory item plugin implementation. This directory
  43077. + item type is compound (as opposed to the "simple directory item" in
  43078. + fs/reiser4/plugin/item/sde.[ch]), because it consists of several directory
  43079. + entries.
  43080. +
  43081. + The reason behind this decision is disk space efficiency: all directory
  43082. + entries inside the same directory have identical fragment in their
  43083. + keys. This, of course, depends on key assignment policy. In our default key
  43084. + assignment policy, all directory entries have the same locality which is
  43085. + equal to the object id of their directory.
  43086. +
  43087. + Composing directory item out of several directory entries for the same
  43088. + directory allows us to store said key fragment only once. That is, this is
  43089. + some ad hoc form of key compression (stem compression) that is implemented
  43090. + here, because general key compression is not supposed to be implemented in
  43091. + v4.0.
  43092. +
  43093. + Another decision that was made regarding all directory item plugins, is
  43094. + that they will store entry keys unaligned. This is for that sake of disk
  43095. + space efficiency again.
  43096. +
  43097. + In should be noted, that storing keys unaligned increases CPU consumption,
  43098. + at least on some architectures.
  43099. +
  43100. + Internal on-disk structure of the compound directory item is the following:
  43101. +
  43102. + HEADER cde_item_format. Here number of entries is stored.
  43103. + ENTRY_HEADER_0 cde_unit_header. Here part of entry key and
  43104. + ENTRY_HEADER_1 offset of entry body are stored.
  43105. + ENTRY_HEADER_2 (basically two last parts of key)
  43106. + ...
  43107. + ENTRY_HEADER_N
  43108. + ENTRY_BODY_0 directory_entry_format. Here part of stat data key and
  43109. + ENTRY_BODY_1 NUL-terminated name are stored.
  43110. + ENTRY_BODY_2 (part of statadta key in the
  43111. + sence that since all SDs have
  43112. + zero offset, this offset is not
  43113. + stored on disk).
  43114. + ...
  43115. + ENTRY_BODY_N
  43116. +
  43117. + When it comes to the balancing, each directory entry in compound directory
  43118. + item is unit, that is, something that can be cut from one item and pasted
  43119. + into another item of the same type. Handling of unit cut and paste is major
  43120. + reason for the complexity of code below.
  43121. +
  43122. +*/
  43123. +
  43124. +#include "../../forward.h"
  43125. +#include "../../debug.h"
  43126. +#include "../../dformat.h"
  43127. +#include "../../kassign.h"
  43128. +#include "../../key.h"
  43129. +#include "../../coord.h"
  43130. +#include "sde.h"
  43131. +#include "cde.h"
  43132. +#include "item.h"
  43133. +#include "../node/node.h"
  43134. +#include "../plugin.h"
  43135. +#include "../../znode.h"
  43136. +#include "../../carry.h"
  43137. +#include "../../tree.h"
  43138. +#include "../../inode.h"
  43139. +
  43140. +#include <linux/fs.h> /* for struct inode */
  43141. +#include <linux/dcache.h> /* for struct dentry */
  43142. +
  43143. +#if 0
  43144. +#define CHECKME(coord) \
  43145. +({ \
  43146. + const char *message; \
  43147. + coord_t dup; \
  43148. + \
  43149. + coord_dup_nocheck(&dup, (coord)); \
  43150. + dup.unit_pos = 0; \
  43151. + assert("nikita-2871", cde_check(&dup, &message) == 0); \
  43152. +})
  43153. +#else
  43154. +#define CHECKME(coord) noop
  43155. +#endif
  43156. +
  43157. +static_assert(REISER4_SEQ_SEARCH_BREAK > 2);
  43158. +
  43159. +/* return body of compound directory item at @coord */
  43160. +static inline cde_item_format *formatted_at(const coord_t * coord)
  43161. +{
  43162. + assert("nikita-1282", coord != NULL);
  43163. + return item_body_by_coord(coord);
  43164. +}
  43165. +
  43166. +/* return entry header at @coord */
  43167. +static inline cde_unit_header *header_at(const coord_t *
  43168. + coord /* coord of item */ ,
  43169. + int idx /* index of unit */ )
  43170. +{
  43171. + assert("nikita-1283", coord != NULL);
  43172. + return &formatted_at(coord)->entry[idx];
  43173. +}
  43174. +
  43175. +/* return number of units in compound directory item at @coord */
  43176. +static int units(const coord_t * coord /* coord of item */ )
  43177. +{
  43178. + return le16_to_cpu(get_unaligned(&formatted_at(coord)->num_of_entries));
  43179. +}
  43180. +
  43181. +/* return offset of the body of @idx-th entry in @coord */
  43182. +static unsigned int offset_of(const coord_t * coord /* coord of item */ ,
  43183. + int idx /* index of unit */ )
  43184. +{
  43185. + if (idx < units(coord))
  43186. + return le16_to_cpu(get_unaligned(&header_at(coord, idx)->offset));
  43187. + else if (idx == units(coord))
  43188. + return item_length_by_coord(coord);
  43189. + else
  43190. + impossible("nikita-1308", "Wrong idx");
  43191. + return 0;
  43192. +}
  43193. +
  43194. +/* set offset of the body of @idx-th entry in @coord */
  43195. +static void set_offset(const coord_t * coord /* coord of item */ ,
  43196. + int idx /* index of unit */ ,
  43197. + unsigned int offset /* new offset */ )
  43198. +{
  43199. + put_unaligned(cpu_to_le16((__u16) offset), &header_at(coord, idx)->offset);
  43200. +}
  43201. +
  43202. +static void adj_offset(const coord_t * coord /* coord of item */ ,
  43203. + int idx /* index of unit */ ,
  43204. + int delta /* offset change */ )
  43205. +{
  43206. + d16 *doffset;
  43207. + __u16 offset;
  43208. +
  43209. + doffset = &header_at(coord, idx)->offset;
  43210. + offset = le16_to_cpu(get_unaligned(doffset));
  43211. + offset += delta;
  43212. + put_unaligned(cpu_to_le16((__u16) offset), doffset);
  43213. +}
  43214. +
  43215. +/* return pointer to @offset-th byte from the beginning of @coord */
  43216. +static char *address(const coord_t * coord /* coord of item */ ,
  43217. + int offset)
  43218. +{
  43219. + return ((char *)item_body_by_coord(coord)) + offset;
  43220. +}
  43221. +
  43222. +/* return pointer to the body of @idx-th entry in @coord */
  43223. +static directory_entry_format *entry_at(const coord_t * coord /* coord of
  43224. + * item */ ,
  43225. + int idx /* index of unit */ )
  43226. +{
  43227. + return (directory_entry_format *) address(coord,
  43228. + (int)offset_of(coord, idx));
  43229. +}
  43230. +
  43231. +/* return number of unit referenced by @coord */
  43232. +static int idx_of(const coord_t * coord /* coord of item */ )
  43233. +{
  43234. + assert("nikita-1285", coord != NULL);
  43235. + return coord->unit_pos;
  43236. +}
  43237. +
  43238. +/* find position where entry with @entry_key would be inserted into @coord */
  43239. +static int find(const coord_t * coord /* coord of item */ ,
  43240. + const reiser4_key * entry_key /* key to look for */ ,
  43241. + cmp_t * last /* result of last comparison */ )
  43242. +{
  43243. + int entries;
  43244. +
  43245. + int left;
  43246. + int right;
  43247. +
  43248. + cde_unit_header *header;
  43249. +
  43250. + assert("nikita-1295", coord != NULL);
  43251. + assert("nikita-1296", entry_key != NULL);
  43252. + assert("nikita-1297", last != NULL);
  43253. +
  43254. + entries = units(coord);
  43255. + left = 0;
  43256. + right = entries - 1;
  43257. + while (right - left >= REISER4_SEQ_SEARCH_BREAK) {
  43258. + int median;
  43259. +
  43260. + median = (left + right) >> 1;
  43261. +
  43262. + header = header_at(coord, median);
  43263. + *last = de_id_key_cmp(&header->hash, entry_key);
  43264. + switch (*last) {
  43265. + case LESS_THAN:
  43266. + left = median;
  43267. + break;
  43268. + case GREATER_THAN:
  43269. + right = median;
  43270. + break;
  43271. + case EQUAL_TO:{
  43272. + do {
  43273. + median--;
  43274. + header--;
  43275. + } while (median >= 0 &&
  43276. + de_id_key_cmp(&header->hash,
  43277. + entry_key) == EQUAL_TO);
  43278. + return median + 1;
  43279. + }
  43280. + }
  43281. + }
  43282. + header = header_at(coord, left);
  43283. + for (; left < entries; ++left, ++header) {
  43284. + prefetch(header + 1);
  43285. + *last = de_id_key_cmp(&header->hash, entry_key);
  43286. + if (*last != LESS_THAN)
  43287. + break;
  43288. + }
  43289. + if (left < entries)
  43290. + return left;
  43291. + else
  43292. + return RETERR(-ENOENT);
  43293. +
  43294. +}
  43295. +
  43296. +/* expand @coord as to accommodate for insertion of @no new entries starting
  43297. + from @pos, with total bodies size @size. */
  43298. +static int expand_item(const coord_t * coord /* coord of item */ ,
  43299. + int pos /* unit position */ , int no /* number of new
  43300. + * units*/ ,
  43301. + int size /* total size of new units' data */ ,
  43302. + unsigned int data_size /* free space already reserved
  43303. + * in the item for insertion */ )
  43304. +{
  43305. + int entries;
  43306. + cde_unit_header *header;
  43307. + char *dent;
  43308. + int i;
  43309. +
  43310. + assert("nikita-1310", coord != NULL);
  43311. + assert("nikita-1311", pos >= 0);
  43312. + assert("nikita-1312", no > 0);
  43313. + assert("nikita-1313", data_size >= no * sizeof(directory_entry_format));
  43314. + assert("nikita-1343",
  43315. + item_length_by_coord(coord) >=
  43316. + (int)(size + data_size + no * sizeof *header));
  43317. +
  43318. + entries = units(coord);
  43319. +
  43320. + if (pos == entries)
  43321. + dent = address(coord, size);
  43322. + else
  43323. + dent = (char *)entry_at(coord, pos);
  43324. + /* place where new header will be in */
  43325. + header = header_at(coord, pos);
  43326. + /* free space for new entry headers */
  43327. + memmove(header + no, header,
  43328. + (unsigned)(address(coord, size) - (char *)header));
  43329. + /* if adding to the end initialise first new header */
  43330. + if (pos == entries) {
  43331. + set_offset(coord, pos, (unsigned)size);
  43332. + }
  43333. +
  43334. + /* adjust entry pointer and size */
  43335. + dent = dent + no * sizeof *header;
  43336. + size += no * sizeof *header;
  43337. + /* free space for new entries */
  43338. + memmove(dent + data_size, dent,
  43339. + (unsigned)(address(coord, size) - dent));
  43340. +
  43341. + /* increase counter */
  43342. + entries += no;
  43343. + put_unaligned(cpu_to_le16((__u16) entries), &formatted_at(coord)->num_of_entries);
  43344. +
  43345. + /* [ 0 ... pos ] entries were shifted by no * ( sizeof *header )
  43346. + bytes. */
  43347. + for (i = 0; i <= pos; ++i)
  43348. + adj_offset(coord, i, no * sizeof *header);
  43349. + /* [ pos + no ... +\infty ) entries were shifted by ( no *
  43350. + sizeof *header + data_size ) bytes */
  43351. + for (i = pos + no; i < entries; ++i)
  43352. + adj_offset(coord, i, no * sizeof *header + data_size);
  43353. + return 0;
  43354. +}
  43355. +
  43356. +/* insert new @entry into item */
  43357. +static int expand(const coord_t * coord /* coord of item */ ,
  43358. + struct cde_entry * entry /* entry to insert */ ,
  43359. + int len /* length of @entry data */ ,
  43360. + int *pos /* position to insert */ ,
  43361. + reiser4_dir_entry_desc * dir_entry /* parameters for new
  43362. + * entry */ )
  43363. +{
  43364. + cmp_t cmp_res;
  43365. + int datasize;
  43366. +
  43367. + *pos = find(coord, &dir_entry->key, &cmp_res);
  43368. + if (*pos < 0)
  43369. + *pos = units(coord);
  43370. +
  43371. + datasize = sizeof(directory_entry_format);
  43372. + if (is_longname(entry->name->name, entry->name->len))
  43373. + datasize += entry->name->len + 1;
  43374. +
  43375. + expand_item(coord, *pos, 1, item_length_by_coord(coord) - len,
  43376. + datasize);
  43377. + return 0;
  43378. +}
  43379. +
  43380. +/* paste body of @entry into item */
  43381. +static int paste_entry(const coord_t * coord /* coord of item */ ,
  43382. + struct cde_entry * entry /* new entry */ ,
  43383. + int pos /* position to insert */ ,
  43384. + reiser4_dir_entry_desc * dir_entry /* parameters for
  43385. + * new entry */ )
  43386. +{
  43387. + cde_unit_header *header;
  43388. + directory_entry_format *dent;
  43389. + const char *name;
  43390. + int len;
  43391. +
  43392. + header = header_at(coord, pos);
  43393. + dent = entry_at(coord, pos);
  43394. +
  43395. + build_de_id_by_key(&dir_entry->key, &header->hash);
  43396. + build_inode_key_id(entry->obj, &dent->id);
  43397. + /* AUDIT unsafe strcpy() operation! It should be replaced with
  43398. + much less CPU hungry
  43399. + memcpy( ( char * ) dent -> name, entry -> name -> name , entry -> name -> len );
  43400. +
  43401. + Also a more major thing is that there should be a way to figure out
  43402. + amount of space in dent -> name and be able to check that we are
  43403. + not going to overwrite more than we supposed to */
  43404. + name = entry->name->name;
  43405. + len = entry->name->len;
  43406. + if (is_longname(name, len)) {
  43407. + strcpy((unsigned char *)dent->name, name);
  43408. + put_unaligned(0, &dent->name[len]);
  43409. + }
  43410. + return 0;
  43411. +}
  43412. +
  43413. +/* estimate how much space is necessary in item to insert/paste set of entries
  43414. + described in @data. */
  43415. +int estimate_cde(const coord_t * coord /* coord of item */ ,
  43416. + const reiser4_item_data * data /* parameters for new item */ )
  43417. +{
  43418. + struct cde_entry_data *e;
  43419. + int result;
  43420. + int i;
  43421. +
  43422. + e = (struct cde_entry_data *) data->data;
  43423. +
  43424. + assert("nikita-1288", e != NULL);
  43425. + assert("nikita-1289", e->num_of_entries >= 0);
  43426. +
  43427. + if (coord == NULL)
  43428. + /* insert */
  43429. + result = sizeof(cde_item_format);
  43430. + else
  43431. + /* paste */
  43432. + result = 0;
  43433. +
  43434. + result += e->num_of_entries *
  43435. + (sizeof(cde_unit_header) + sizeof(directory_entry_format));
  43436. + for (i = 0; i < e->num_of_entries; ++i) {
  43437. + const char *name;
  43438. + int len;
  43439. +
  43440. + name = e->entry[i].name->name;
  43441. + len = e->entry[i].name->len;
  43442. + assert("nikita-2054", strlen(name) == len);
  43443. + if (is_longname(name, len))
  43444. + result += len + 1;
  43445. + }
  43446. + ((reiser4_item_data *) data)->length = result;
  43447. + return result;
  43448. +}
  43449. +
  43450. +/* ->nr_units() method for this item plugin. */
  43451. +pos_in_node_t nr_units_cde(const coord_t * coord /* coord of item */ )
  43452. +{
  43453. + return units(coord);
  43454. +}
  43455. +
  43456. +/* ->unit_key() method for this item plugin. */
  43457. +reiser4_key *unit_key_cde(const coord_t * coord /* coord of item */ ,
  43458. + reiser4_key * key /* resulting key */ )
  43459. +{
  43460. + assert("nikita-1452", coord != NULL);
  43461. + assert("nikita-1345", idx_of(coord) < units(coord));
  43462. + assert("nikita-1346", key != NULL);
  43463. +
  43464. + item_key_by_coord(coord, key);
  43465. + extract_key_from_de_id(extract_dir_id_from_key(key),
  43466. + &header_at(coord, idx_of(coord))->hash, key);
  43467. + return key;
  43468. +}
  43469. +
  43470. +/* mergeable_cde(): implementation of ->mergeable() item method.
  43471. +
  43472. + Two directory items are mergeable iff they are from the same
  43473. + directory. That simple.
  43474. +
  43475. +*/
  43476. +int mergeable_cde(const coord_t * p1 /* coord of first item */ ,
  43477. + const coord_t * p2 /* coord of second item */ )
  43478. +{
  43479. + reiser4_key k1;
  43480. + reiser4_key k2;
  43481. +
  43482. + assert("nikita-1339", p1 != NULL);
  43483. + assert("nikita-1340", p2 != NULL);
  43484. +
  43485. + return
  43486. + (item_plugin_by_coord(p1) == item_plugin_by_coord(p2)) &&
  43487. + (extract_dir_id_from_key(item_key_by_coord(p1, &k1)) ==
  43488. + extract_dir_id_from_key(item_key_by_coord(p2, &k2)));
  43489. +
  43490. +}
  43491. +
  43492. +/* ->max_key_inside() method for this item plugin. */
  43493. +reiser4_key *max_key_inside_cde(const coord_t * coord /* coord of item */ ,
  43494. + reiser4_key * result /* resulting key */ )
  43495. +{
  43496. + assert("nikita-1342", coord != NULL);
  43497. +
  43498. + item_key_by_coord(coord, result);
  43499. + set_key_ordering(result, get_key_ordering(reiser4_max_key()));
  43500. + set_key_fulloid(result, get_key_fulloid(reiser4_max_key()));
  43501. + set_key_offset(result, get_key_offset(reiser4_max_key()));
  43502. + return result;
  43503. +}
  43504. +
  43505. +/* @data contains data which are to be put into tree */
  43506. +int can_contain_key_cde(const coord_t * coord /* coord of item */ ,
  43507. + const reiser4_key * key /* key to check */ ,
  43508. + const reiser4_item_data * data /* parameters of new
  43509. + * item/unit being
  43510. + * created */ )
  43511. +{
  43512. + reiser4_key item_key;
  43513. +
  43514. + /* FIXME-VS: do not rely on anything but iplug field of @data. Only
  43515. + data->iplug is initialized */
  43516. + assert("vs-457", data && data->iplug);
  43517. +/* assert( "vs-553", data -> user == 0 );*/
  43518. + item_key_by_coord(coord, &item_key);
  43519. +
  43520. + return (item_plugin_by_coord(coord) == data->iplug) &&
  43521. + (extract_dir_id_from_key(&item_key) ==
  43522. + extract_dir_id_from_key(key));
  43523. +}
  43524. +
  43525. +#if REISER4_DEBUG
  43526. +/* cde_check ->check() method for compressed directory items
  43527. +
  43528. + used for debugging, every item should have here the most complete
  43529. + possible check of the consistency of the item that the inventor can
  43530. + construct
  43531. +*/
  43532. +int reiser4_check_cde(const coord_t * coord /* coord of item to check */,
  43533. + const char **error /* where to store error message */)
  43534. +{
  43535. + int i;
  43536. + int result;
  43537. + char *item_start;
  43538. + char *item_end;
  43539. + reiser4_key key;
  43540. +
  43541. + coord_t c;
  43542. +
  43543. + assert("nikita-1357", coord != NULL);
  43544. + assert("nikita-1358", error != NULL);
  43545. +
  43546. + if (!ergo(coord->item_pos != 0,
  43547. + is_dot_key(item_key_by_coord(coord, &key)))) {
  43548. + *error = "CDE doesn't start with dot";
  43549. + return -1;
  43550. + }
  43551. + item_start = item_body_by_coord(coord);
  43552. + item_end = item_start + item_length_by_coord(coord);
  43553. +
  43554. + coord_dup(&c, coord);
  43555. + result = 0;
  43556. + for (i = 0; i < units(coord); ++i) {
  43557. + directory_entry_format *entry;
  43558. +
  43559. + if ((char *)(header_at(coord, i) + 1) >
  43560. + item_end - units(coord) * sizeof *entry) {
  43561. + *error = "CDE header is out of bounds";
  43562. + result = -1;
  43563. + break;
  43564. + }
  43565. + entry = entry_at(coord, i);
  43566. + if ((char *)entry < item_start + sizeof(cde_item_format)) {
  43567. + *error = "CDE header is too low";
  43568. + result = -1;
  43569. + break;
  43570. + }
  43571. + if ((char *)(entry + 1) > item_end) {
  43572. + *error = "CDE header is too high";
  43573. + result = -1;
  43574. + break;
  43575. + }
  43576. + }
  43577. +
  43578. + return result;
  43579. +}
  43580. +#endif
  43581. +
  43582. +/* ->init() method for this item plugin. */
  43583. +int init_cde(coord_t * coord /* coord of item */ ,
  43584. + coord_t * from UNUSED_ARG, reiser4_item_data * data /* structure used for insertion */
  43585. + UNUSED_ARG)
  43586. +{
  43587. + put_unaligned(cpu_to_le16(0), &formatted_at(coord)->num_of_entries);
  43588. + return 0;
  43589. +}
  43590. +
  43591. +/* ->lookup() method for this item plugin. */
  43592. +lookup_result lookup_cde(const reiser4_key * key /* key to search for */ ,
  43593. + lookup_bias bias /* search bias */ ,
  43594. + coord_t * coord /* coord of item to lookup in */ )
  43595. +{
  43596. + cmp_t last_comp;
  43597. + int pos;
  43598. +
  43599. + reiser4_key utmost_key;
  43600. +
  43601. + assert("nikita-1293", coord != NULL);
  43602. + assert("nikita-1294", key != NULL);
  43603. +
  43604. + CHECKME(coord);
  43605. +
  43606. + if (keygt(item_key_by_coord(coord, &utmost_key), key)) {
  43607. + coord->unit_pos = 0;
  43608. + coord->between = BEFORE_UNIT;
  43609. + return CBK_COORD_NOTFOUND;
  43610. + }
  43611. + pos = find(coord, key, &last_comp);
  43612. + if (pos >= 0) {
  43613. + coord->unit_pos = (int)pos;
  43614. + switch (last_comp) {
  43615. + case EQUAL_TO:
  43616. + coord->between = AT_UNIT;
  43617. + return CBK_COORD_FOUND;
  43618. + case GREATER_THAN:
  43619. + coord->between = BEFORE_UNIT;
  43620. + return RETERR(-ENOENT);
  43621. + case LESS_THAN:
  43622. + default:
  43623. + impossible("nikita-1298", "Broken find");
  43624. + return RETERR(-EIO);
  43625. + }
  43626. + } else {
  43627. + coord->unit_pos = units(coord) - 1;
  43628. + coord->between = AFTER_UNIT;
  43629. + return (bias ==
  43630. + FIND_MAX_NOT_MORE_THAN) ? CBK_COORD_FOUND :
  43631. + CBK_COORD_NOTFOUND;
  43632. + }
  43633. +}
  43634. +
  43635. +/* ->paste() method for this item plugin. */
  43636. +int paste_cde(coord_t * coord /* coord of item */ ,
  43637. + reiser4_item_data * data /* parameters of new unit being
  43638. + * inserted */ ,
  43639. + carry_plugin_info * info UNUSED_ARG /* todo carry queue */ )
  43640. +{
  43641. + struct cde_entry_data *e;
  43642. + int result;
  43643. + int i;
  43644. +
  43645. + CHECKME(coord);
  43646. + e = (struct cde_entry_data *) data->data;
  43647. +
  43648. + result = 0;
  43649. + for (i = 0; i < e->num_of_entries; ++i) {
  43650. + int pos;
  43651. + int phantom_size;
  43652. +
  43653. + phantom_size = data->length;
  43654. + if (units(coord) == 0)
  43655. + phantom_size -= sizeof(cde_item_format);
  43656. +
  43657. + result =
  43658. + expand(coord, e->entry + i, phantom_size, &pos, data->arg);
  43659. + if (result != 0)
  43660. + break;
  43661. + result = paste_entry(coord, e->entry + i, pos, data->arg);
  43662. + if (result != 0)
  43663. + break;
  43664. + }
  43665. + CHECKME(coord);
  43666. + return result;
  43667. +}
  43668. +
  43669. +/* amount of space occupied by all entries starting from @idx both headers and
  43670. + bodies. */
  43671. +static unsigned int part_size(const coord_t * coord /* coord of item */ ,
  43672. + int idx /* index of unit */ )
  43673. +{
  43674. + assert("nikita-1299", coord != NULL);
  43675. + assert("nikita-1300", idx < (int)units(coord));
  43676. +
  43677. + return sizeof(cde_item_format) +
  43678. + (idx + 1) * sizeof(cde_unit_header) + offset_of(coord,
  43679. + idx + 1) -
  43680. + offset_of(coord, 0);
  43681. +}
  43682. +
  43683. +/* how many but not more than @want units of @source can be merged with
  43684. + item in @target node. If pend == append - we try to append last item
  43685. + of @target by first units of @source. If pend == prepend - we try to
  43686. + "prepend" first item in @target by last units of @source. @target
  43687. + node has @free_space bytes of free space. Total size of those units
  43688. + are returned via @size */
  43689. +int can_shift_cde(unsigned free_space /* free space in item */ ,
  43690. + coord_t * coord /* coord of source item */ ,
  43691. + znode * target /* target node */ ,
  43692. + shift_direction pend /* shift direction */ ,
  43693. + unsigned *size /* resulting number of shifted bytes */ ,
  43694. + unsigned want /* maximal number of bytes to shift */ )
  43695. +{
  43696. + int shift;
  43697. +
  43698. + CHECKME(coord);
  43699. + if (want == 0) {
  43700. + *size = 0;
  43701. + return 0;
  43702. + }
  43703. +
  43704. + /* pend == SHIFT_LEFT <==> shifting to the left */
  43705. + if (pend == SHIFT_LEFT) {
  43706. + for (shift = min((int)want - 1, units(coord)); shift >= 0;
  43707. + --shift) {
  43708. + *size = part_size(coord, shift);
  43709. + if (target != NULL)
  43710. + *size -= sizeof(cde_item_format);
  43711. + if (*size <= free_space)
  43712. + break;
  43713. + }
  43714. + shift = shift + 1;
  43715. + } else {
  43716. + int total_size;
  43717. +
  43718. + assert("nikita-1301", pend == SHIFT_RIGHT);
  43719. +
  43720. + total_size = item_length_by_coord(coord);
  43721. + for (shift = units(coord) - want - 1; shift < units(coord) - 1;
  43722. + ++shift) {
  43723. + *size = total_size - part_size(coord, shift);
  43724. + if (target == NULL)
  43725. + *size += sizeof(cde_item_format);
  43726. + if (*size <= free_space)
  43727. + break;
  43728. + }
  43729. + shift = units(coord) - shift - 1;
  43730. + }
  43731. + if (shift == 0)
  43732. + *size = 0;
  43733. + CHECKME(coord);
  43734. + return shift;
  43735. +}
  43736. +
  43737. +/* ->copy_units() method for this item plugin. */
  43738. +void copy_units_cde(coord_t * target /* coord of target item */ ,
  43739. + coord_t * source /* coord of source item */ ,
  43740. + unsigned from /* starting unit */ ,
  43741. + unsigned count /* how many units to copy */ ,
  43742. + shift_direction where_is_free_space /* shift direction */ ,
  43743. + unsigned free_space /* free space in item */ )
  43744. +{
  43745. + char *header_from;
  43746. + char *header_to;
  43747. +
  43748. + char *entry_from;
  43749. + char *entry_to;
  43750. +
  43751. + int pos_in_target;
  43752. + int data_size;
  43753. + int data_delta;
  43754. + int i;
  43755. +
  43756. + assert("nikita-1303", target != NULL);
  43757. + assert("nikita-1304", source != NULL);
  43758. + assert("nikita-1305", (int)from < units(source));
  43759. + assert("nikita-1307", (int)(from + count) <= units(source));
  43760. +
  43761. + if (where_is_free_space == SHIFT_LEFT) {
  43762. + assert("nikita-1453", from == 0);
  43763. + pos_in_target = units(target);
  43764. + } else {
  43765. + assert("nikita-1309", (int)(from + count) == units(source));
  43766. + pos_in_target = 0;
  43767. + memmove(item_body_by_coord(target),
  43768. + (char *)item_body_by_coord(target) + free_space,
  43769. + item_length_by_coord(target) - free_space);
  43770. + }
  43771. +
  43772. + CHECKME(target);
  43773. + CHECKME(source);
  43774. +
  43775. + /* expand @target */
  43776. + data_size =
  43777. + offset_of(source, (int)(from + count)) - offset_of(source,
  43778. + (int)from);
  43779. +
  43780. + if (units(target) == 0)
  43781. + free_space -= sizeof(cde_item_format);
  43782. +
  43783. + expand_item(target, pos_in_target, (int)count,
  43784. + (int)(item_length_by_coord(target) - free_space),
  43785. + (unsigned)data_size);
  43786. +
  43787. + /* copy first @count units of @source into @target */
  43788. + data_delta =
  43789. + offset_of(target, pos_in_target) - offset_of(source, (int)from);
  43790. +
  43791. + /* copy entries */
  43792. + entry_from = (char *)entry_at(source, (int)from);
  43793. + entry_to = (char *)entry_at(source, (int)(from + count));
  43794. + memmove(entry_at(target, pos_in_target), entry_from,
  43795. + (unsigned)(entry_to - entry_from));
  43796. +
  43797. + /* copy headers */
  43798. + header_from = (char *)header_at(source, (int)from);
  43799. + header_to = (char *)header_at(source, (int)(from + count));
  43800. + memmove(header_at(target, pos_in_target), header_from,
  43801. + (unsigned)(header_to - header_from));
  43802. +
  43803. + /* update offsets */
  43804. + for (i = pos_in_target; i < (int)(pos_in_target + count); ++i)
  43805. + adj_offset(target, i, data_delta);
  43806. + CHECKME(target);
  43807. + CHECKME(source);
  43808. +}
  43809. +
  43810. +/* ->cut_units() method for this item plugin. */
  43811. +int cut_units_cde(coord_t * coord /* coord of item */ ,
  43812. + pos_in_node_t from /* start unit pos */ ,
  43813. + pos_in_node_t to /* stop unit pos */ ,
  43814. + struct carry_cut_data *cdata UNUSED_ARG,
  43815. + reiser4_key * smallest_removed, reiser4_key * new_first)
  43816. +{
  43817. + char *header_from;
  43818. + char *header_to;
  43819. +
  43820. + char *entry_from;
  43821. + char *entry_to;
  43822. +
  43823. + int size;
  43824. + int entry_delta;
  43825. + int header_delta;
  43826. + int i;
  43827. +
  43828. + unsigned count;
  43829. +
  43830. + CHECKME(coord);
  43831. +
  43832. + count = to - from + 1;
  43833. +
  43834. + assert("nikita-1454", coord != NULL);
  43835. + assert("nikita-1455", (int)(from + count) <= units(coord));
  43836. +
  43837. + if (smallest_removed)
  43838. + unit_key_by_coord(coord, smallest_removed);
  43839. +
  43840. + if (new_first) {
  43841. + coord_t next;
  43842. +
  43843. + /* not everything is cut from item head */
  43844. + assert("vs-1527", from == 0);
  43845. + assert("vs-1528", to < units(coord) - 1);
  43846. +
  43847. + coord_dup(&next, coord);
  43848. + next.unit_pos++;
  43849. + unit_key_by_coord(&next, new_first);
  43850. + }
  43851. +
  43852. + size = item_length_by_coord(coord);
  43853. + if (count == (unsigned)units(coord)) {
  43854. + return size;
  43855. + }
  43856. +
  43857. + header_from = (char *)header_at(coord, (int)from);
  43858. + header_to = (char *)header_at(coord, (int)(from + count));
  43859. +
  43860. + entry_from = (char *)entry_at(coord, (int)from);
  43861. + entry_to = (char *)entry_at(coord, (int)(from + count));
  43862. +
  43863. + /* move headers */
  43864. + memmove(header_from, header_to,
  43865. + (unsigned)(address(coord, size) - header_to));
  43866. +
  43867. + header_delta = header_to - header_from;
  43868. +
  43869. + entry_from -= header_delta;
  43870. + entry_to -= header_delta;
  43871. + size -= header_delta;
  43872. +
  43873. + /* copy entries */
  43874. + memmove(entry_from, entry_to,
  43875. + (unsigned)(address(coord, size) - entry_to));
  43876. +
  43877. + entry_delta = entry_to - entry_from;
  43878. + size -= entry_delta;
  43879. +
  43880. + /* update offsets */
  43881. +
  43882. + for (i = 0; i < (int)from; ++i)
  43883. + adj_offset(coord, i, -header_delta);
  43884. +
  43885. + for (i = from; i < units(coord) - (int)count; ++i)
  43886. + adj_offset(coord, i, -header_delta - entry_delta);
  43887. +
  43888. + put_unaligned(cpu_to_le16((__u16) units(coord) - count),
  43889. + &formatted_at(coord)->num_of_entries);
  43890. +
  43891. + if (from == 0) {
  43892. + /* entries from head was removed - move remaining to right */
  43893. + memmove((char *)item_body_by_coord(coord) +
  43894. + header_delta + entry_delta, item_body_by_coord(coord),
  43895. + (unsigned)size);
  43896. + if (REISER4_DEBUG)
  43897. + memset(item_body_by_coord(coord), 0,
  43898. + (unsigned)header_delta + entry_delta);
  43899. + } else {
  43900. + /* freed space is already at the end of item */
  43901. + if (REISER4_DEBUG)
  43902. + memset((char *)item_body_by_coord(coord) + size, 0,
  43903. + (unsigned)header_delta + entry_delta);
  43904. + }
  43905. +
  43906. + return header_delta + entry_delta;
  43907. +}
  43908. +
  43909. +int kill_units_cde(coord_t * coord /* coord of item */ ,
  43910. + pos_in_node_t from /* start unit pos */ ,
  43911. + pos_in_node_t to /* stop unit pos */ ,
  43912. + struct carry_kill_data *kdata UNUSED_ARG,
  43913. + reiser4_key * smallest_removed, reiser4_key * new_first)
  43914. +{
  43915. + return cut_units_cde(coord, from, to, NULL, smallest_removed, new_first);
  43916. +}
  43917. +
  43918. +/* ->s.dir.extract_key() method for this item plugin. */
  43919. +int extract_key_cde(const coord_t * coord /* coord of item */ ,
  43920. + reiser4_key * key /* resulting key */ )
  43921. +{
  43922. + directory_entry_format *dent;
  43923. +
  43924. + assert("nikita-1155", coord != NULL);
  43925. + assert("nikita-1156", key != NULL);
  43926. +
  43927. + dent = entry_at(coord, idx_of(coord));
  43928. + return extract_key_from_id(&dent->id, key);
  43929. +}
  43930. +
  43931. +int
  43932. +update_key_cde(const coord_t * coord, const reiser4_key * key,
  43933. + lock_handle * lh UNUSED_ARG)
  43934. +{
  43935. + directory_entry_format *dent;
  43936. + obj_key_id obj_id;
  43937. + int result;
  43938. +
  43939. + assert("nikita-2344", coord != NULL);
  43940. + assert("nikita-2345", key != NULL);
  43941. +
  43942. + dent = entry_at(coord, idx_of(coord));
  43943. + result = build_obj_key_id(key, &obj_id);
  43944. + if (result == 0) {
  43945. + dent->id = obj_id;
  43946. + znode_make_dirty(coord->node);
  43947. + }
  43948. + return 0;
  43949. +}
  43950. +
  43951. +/* ->s.dir.extract_name() method for this item plugin. */
  43952. +char *extract_name_cde(const coord_t * coord /* coord of item */ , char *buf)
  43953. +{
  43954. + directory_entry_format *dent;
  43955. +
  43956. + assert("nikita-1157", coord != NULL);
  43957. +
  43958. + dent = entry_at(coord, idx_of(coord));
  43959. + return extract_dent_name(coord, dent, buf);
  43960. +}
  43961. +
  43962. +static int cde_bytes(int pasting, const reiser4_item_data * data)
  43963. +{
  43964. + int result;
  43965. +
  43966. + result = data->length;
  43967. + if (!pasting)
  43968. + result -= sizeof(cde_item_format);
  43969. + return result;
  43970. +}
  43971. +
  43972. +/* ->s.dir.add_entry() method for this item plugin */
  43973. +int add_entry_cde(struct inode *dir /* directory object */ ,
  43974. + coord_t * coord /* coord of item */ ,
  43975. + lock_handle * lh /* lock handle for insertion */ ,
  43976. + const struct dentry *name /* name to insert */ ,
  43977. + reiser4_dir_entry_desc * dir_entry /* parameters of new
  43978. + * directory entry */ )
  43979. +{
  43980. + reiser4_item_data data;
  43981. + struct cde_entry entry;
  43982. + struct cde_entry_data edata;
  43983. + int result;
  43984. +
  43985. + assert("nikita-1656", coord->node == lh->node);
  43986. + assert("nikita-1657", znode_is_write_locked(coord->node));
  43987. +
  43988. + edata.num_of_entries = 1;
  43989. + edata.entry = &entry;
  43990. +
  43991. + entry.dir = dir;
  43992. + entry.obj = dir_entry->obj;
  43993. + entry.name = &name->d_name;
  43994. +
  43995. + data.data = (char *)&edata;
  43996. + data.user = 0; /* &edata is not user space */
  43997. + data.iplug = item_plugin_by_id(COMPOUND_DIR_ID);
  43998. + data.arg = dir_entry;
  43999. + assert("nikita-1302", data.iplug != NULL);
  44000. +
  44001. + result = is_dot_key(&dir_entry->key);
  44002. + data.length = estimate_cde(result ? coord : NULL, &data);
  44003. +
  44004. + inode_add_bytes(dir, cde_bytes(result, &data));
  44005. +
  44006. + if (result)
  44007. + result = insert_by_coord(coord, &data, &dir_entry->key, lh, 0);
  44008. + else
  44009. + result = reiser4_resize_item(coord, &data, &dir_entry->key,
  44010. + lh, 0);
  44011. + return result;
  44012. +}
  44013. +
  44014. +/* ->s.dir.rem_entry() */
  44015. +int rem_entry_cde(struct inode *dir /* directory of item */ ,
  44016. + const struct qstr *name, coord_t * coord /* coord of item */ ,
  44017. + lock_handle * lh UNUSED_ARG /* lock handle for
  44018. + * removal */ ,
  44019. + reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of
  44020. + * directory entry
  44021. + * being removed */ )
  44022. +{
  44023. + coord_t shadow;
  44024. + int result;
  44025. + int length;
  44026. + ON_DEBUG(char buf[DE_NAME_BUF_LEN]);
  44027. +
  44028. + assert("nikita-2870", strlen(name->name) == name->len);
  44029. + assert("nikita-2869",
  44030. + !strcmp(name->name, extract_name_cde(coord, buf)));
  44031. +
  44032. + length = sizeof(directory_entry_format) + sizeof(cde_unit_header);
  44033. + if (is_longname(name->name, name->len))
  44034. + length += name->len + 1;
  44035. +
  44036. + if (inode_get_bytes(dir) < length) {
  44037. + warning("nikita-2628", "Dir is broke: %llu: %llu",
  44038. + (unsigned long long)get_inode_oid(dir),
  44039. + inode_get_bytes(dir));
  44040. +
  44041. + return RETERR(-EIO);
  44042. + }
  44043. +
  44044. + /* cut_node() is supposed to take pointers to _different_
  44045. + coords, because it will modify them without respect to
  44046. + possible aliasing. To work around this, create temporary copy
  44047. + of @coord.
  44048. + */
  44049. + coord_dup(&shadow, coord);
  44050. + result =
  44051. + kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0);
  44052. + if (result == 0) {
  44053. + inode_sub_bytes(dir, length);
  44054. + }
  44055. + return result;
  44056. +}
  44057. +
  44058. +/* ->s.dir.max_name_len() method for this item plugin */
  44059. +int max_name_len_cde(const struct inode *dir /* directory */ )
  44060. +{
  44061. + return
  44062. + reiser4_tree_by_inode(dir)->nplug->max_item_size() -
  44063. + sizeof(directory_entry_format) - sizeof(cde_item_format) -
  44064. + sizeof(cde_unit_header) - 2;
  44065. +}
  44066. +
  44067. +/* Make Linus happy.
  44068. + Local variables:
  44069. + c-indentation-style: "K&R"
  44070. + mode-name: "LC"
  44071. + c-basic-offset: 8
  44072. + tab-width: 8
  44073. + fill-column: 120
  44074. + End:
  44075. +*/
  44076. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/cde.h linux-5.16.14/fs/reiser4/plugin/item/cde.h
  44077. --- linux-5.16.14.orig/fs/reiser4/plugin/item/cde.h 1970-01-01 01:00:00.000000000 +0100
  44078. +++ linux-5.16.14/fs/reiser4/plugin/item/cde.h 2022-03-12 13:26:19.676892787 +0100
  44079. @@ -0,0 +1,87 @@
  44080. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  44081. +
  44082. +/* Compound directory item. See cde.c for description. */
  44083. +
  44084. +#if !defined( __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ )
  44085. +#define __FS_REISER4_PLUGIN_COMPRESSED_DE_H__
  44086. +
  44087. +#include "../../forward.h"
  44088. +#include "../../kassign.h"
  44089. +#include "../../dformat.h"
  44090. +
  44091. +#include <linux/fs.h> /* for struct inode */
  44092. +#include <linux/dcache.h> /* for struct dentry, etc */
  44093. +
  44094. +typedef struct cde_unit_header {
  44095. + de_id hash;
  44096. + d16 offset;
  44097. +} cde_unit_header;
  44098. +
  44099. +typedef struct cde_item_format {
  44100. + d16 num_of_entries;
  44101. + cde_unit_header entry[0];
  44102. +} cde_item_format;
  44103. +
  44104. +struct cde_entry {
  44105. + const struct inode *dir;
  44106. + const struct inode *obj;
  44107. + const struct qstr *name;
  44108. +};
  44109. +
  44110. +struct cde_entry_data {
  44111. + int num_of_entries;
  44112. + struct cde_entry *entry;
  44113. +};
  44114. +
  44115. +/* plugin->item.b.* */
  44116. +reiser4_key *max_key_inside_cde(const coord_t * coord, reiser4_key * result);
  44117. +int can_contain_key_cde(const coord_t * coord, const reiser4_key * key,
  44118. + const reiser4_item_data *);
  44119. +int mergeable_cde(const coord_t * p1, const coord_t * p2);
  44120. +pos_in_node_t nr_units_cde(const coord_t * coord);
  44121. +reiser4_key *unit_key_cde(const coord_t * coord, reiser4_key * key);
  44122. +int estimate_cde(const coord_t * coord, const reiser4_item_data * data);
  44123. +void print_cde(const char *prefix, coord_t * coord);
  44124. +int init_cde(coord_t * coord, coord_t * from, reiser4_item_data * data);
  44125. +lookup_result lookup_cde(const reiser4_key * key, lookup_bias bias,
  44126. + coord_t * coord);
  44127. +int paste_cde(coord_t * coord, reiser4_item_data * data,
  44128. + carry_plugin_info * info UNUSED_ARG);
  44129. +int can_shift_cde(unsigned free_space, coord_t * coord, znode * target,
  44130. + shift_direction pend, unsigned *size, unsigned want);
  44131. +void copy_units_cde(coord_t * target, coord_t * source, unsigned from,
  44132. + unsigned count, shift_direction where_is_free_space,
  44133. + unsigned free_space);
  44134. +int cut_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  44135. + struct carry_cut_data *, reiser4_key * smallest_removed,
  44136. + reiser4_key * new_first);
  44137. +int kill_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  44138. + struct carry_kill_data *, reiser4_key * smallest_removed,
  44139. + reiser4_key * new_first);
  44140. +void print_cde(const char *prefix, coord_t * coord);
  44141. +int reiser4_check_cde(const coord_t * coord, const char **error);
  44142. +
  44143. +/* plugin->u.item.s.dir.* */
  44144. +int extract_key_cde(const coord_t * coord, reiser4_key * key);
  44145. +int update_key_cde(const coord_t * coord, const reiser4_key * key,
  44146. + lock_handle * lh);
  44147. +char *extract_name_cde(const coord_t * coord, char *buf);
  44148. +int add_entry_cde(struct inode *dir, coord_t * coord,
  44149. + lock_handle * lh, const struct dentry *name,
  44150. + reiser4_dir_entry_desc * entry);
  44151. +int rem_entry_cde(struct inode *dir, const struct qstr *name, coord_t * coord,
  44152. + lock_handle * lh, reiser4_dir_entry_desc * entry);
  44153. +int max_name_len_cde(const struct inode *dir);
  44154. +
  44155. +/* __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ */
  44156. +#endif
  44157. +
  44158. +/* Make Linus happy.
  44159. + Local variables:
  44160. + c-indentation-style: "K&R"
  44161. + mode-name: "LC"
  44162. + c-basic-offset: 8
  44163. + tab-width: 8
  44164. + fill-column: 120
  44165. + End:
  44166. +*/
  44167. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/ctail.c linux-5.16.14/fs/reiser4/plugin/item/ctail.c
  44168. --- linux-5.16.14.orig/fs/reiser4/plugin/item/ctail.c 1970-01-01 01:00:00.000000000 +0100
  44169. +++ linux-5.16.14/fs/reiser4/plugin/item/ctail.c 2022-03-12 13:26:19.676892787 +0100
  44170. @@ -0,0 +1,1755 @@
  44171. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  44172. +
  44173. +/* ctails (aka "clustered tails") are items for cryptcompress objects */
  44174. +
  44175. +/* DESCRIPTION:
  44176. +
  44177. +Each cryptcompress object is stored on disk as a set of clusters sliced
  44178. +into ctails.
  44179. +
  44180. +Internal on-disk structure:
  44181. +
  44182. + HEADER (1) Here stored disk cluster shift
  44183. + BODY
  44184. +*/
  44185. +
  44186. +#include "../../forward.h"
  44187. +#include "../../debug.h"
  44188. +#include "../../dformat.h"
  44189. +#include "../../kassign.h"
  44190. +#include "../../key.h"
  44191. +#include "../../coord.h"
  44192. +#include "item.h"
  44193. +#include "../node/node.h"
  44194. +#include "../plugin.h"
  44195. +#include "../object.h"
  44196. +#include "../../znode.h"
  44197. +#include "../../carry.h"
  44198. +#include "../../tree.h"
  44199. +#include "../../inode.h"
  44200. +#include "../../super.h"
  44201. +#include "../../context.h"
  44202. +#include "../../page_cache.h"
  44203. +#include "../cluster.h"
  44204. +#include "../../flush.h"
  44205. +#include "../../tree_walk.h"
  44206. +
  44207. +#include <linux/pagevec.h>
  44208. +#include <linux/swap.h>
  44209. +#include <linux/fs.h>
  44210. +
  44211. +/* return body of ctail item at @coord */
  44212. +static ctail_item_format *ctail_formatted_at(const coord_t * coord)
  44213. +{
  44214. + assert("edward-60", coord != NULL);
  44215. + return item_body_by_coord(coord);
  44216. +}
  44217. +
  44218. +static int cluster_shift_by_coord(const coord_t * coord)
  44219. +{
  44220. + return get_unaligned(&ctail_formatted_at(coord)->cluster_shift);
  44221. +}
  44222. +
  44223. +static inline void dclust_set_extension_shift(hint_t * hint)
  44224. +{
  44225. + assert("edward-1270",
  44226. + item_id_by_coord(&hint->ext_coord.coord) == CTAIL_ID);
  44227. + hint->ext_coord.extension.ctail.shift =
  44228. + cluster_shift_by_coord(&hint->ext_coord.coord);
  44229. +}
  44230. +
  44231. +static loff_t off_by_coord(const coord_t * coord)
  44232. +{
  44233. + reiser4_key key;
  44234. + return get_key_offset(item_key_by_coord(coord, &key));
  44235. +}
  44236. +
  44237. +int coord_is_unprepped_ctail(const coord_t * coord)
  44238. +{
  44239. + assert("edward-1233", coord != NULL);
  44240. + assert("edward-1234", item_id_by_coord(coord) == CTAIL_ID);
  44241. + assert("edward-1235",
  44242. + ergo((int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT,
  44243. + nr_units_ctail(coord) == (pos_in_node_t) UCTAIL_NR_UNITS));
  44244. +
  44245. + return (int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT;
  44246. +}
  44247. +
  44248. +static cloff_t clust_by_coord(const coord_t * coord, struct inode *inode)
  44249. +{
  44250. + int shift;
  44251. +
  44252. + if (inode != NULL) {
  44253. + shift = inode_cluster_shift(inode);
  44254. + assert("edward-1236",
  44255. + ergo(!coord_is_unprepped_ctail(coord),
  44256. + shift == cluster_shift_by_coord(coord)));
  44257. + } else {
  44258. + assert("edward-1237", !coord_is_unprepped_ctail(coord));
  44259. + shift = cluster_shift_by_coord(coord);
  44260. + }
  44261. + return off_by_coord(coord) >> shift;
  44262. +}
  44263. +
  44264. +static int disk_cluster_size(const coord_t * coord)
  44265. +{
  44266. + assert("edward-1156",
  44267. + item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID));
  44268. + /* calculation of disk cluster size
  44269. + is meaninless if ctail is unprepped */
  44270. + assert("edward-1238", !coord_is_unprepped_ctail(coord));
  44271. +
  44272. + return 1 << cluster_shift_by_coord(coord);
  44273. +}
  44274. +
  44275. +/* true if the key is of first disk cluster item */
  44276. +static int is_disk_cluster_key(const reiser4_key * key, const coord_t * coord)
  44277. +{
  44278. + assert("edward-1239", item_id_by_coord(coord) == CTAIL_ID);
  44279. +
  44280. + return coord_is_unprepped_ctail(coord) ||
  44281. + ((get_key_offset(key) &
  44282. + ((loff_t) disk_cluster_size(coord) - 1)) == 0);
  44283. +}
  44284. +
  44285. +static char *first_unit(coord_t * coord)
  44286. +{
  44287. + /* FIXME: warning: pointer of type `void *' used in arithmetic */
  44288. + return (char *)item_body_by_coord(coord) + sizeof(ctail_item_format);
  44289. +}
  44290. +
  44291. +/* plugin->u.item.b.max_key_inside :
  44292. + tail_max_key_inside */
  44293. +
  44294. +/* plugin->u.item.b.can_contain_key */
  44295. +int can_contain_key_ctail(const coord_t * coord, const reiser4_key * key,
  44296. + const reiser4_item_data * data)
  44297. +{
  44298. + reiser4_key item_key;
  44299. +
  44300. + if (item_plugin_by_coord(coord) != data->iplug)
  44301. + return 0;
  44302. +
  44303. + item_key_by_coord(coord, &item_key);
  44304. + if (get_key_locality(key) != get_key_locality(&item_key) ||
  44305. + get_key_objectid(key) != get_key_objectid(&item_key))
  44306. + return 0;
  44307. + if (get_key_offset(&item_key) + nr_units_ctail(coord) !=
  44308. + get_key_offset(key))
  44309. + return 0;
  44310. + if (is_disk_cluster_key(key, coord))
  44311. + /*
  44312. + * can not merge at the beginning
  44313. + * of a logical cluster in a file
  44314. + */
  44315. + return 0;
  44316. + return 1;
  44317. +}
  44318. +
  44319. +/* plugin->u.item.b.mergeable */
  44320. +int mergeable_ctail(const coord_t * p1, const coord_t * p2)
  44321. +{
  44322. + reiser4_key key1, key2;
  44323. +
  44324. + assert("edward-62", item_id_by_coord(p1) == CTAIL_ID);
  44325. + assert("edward-61", plugin_of_group(item_plugin_by_coord(p1),
  44326. + UNIX_FILE_METADATA_ITEM_TYPE));
  44327. +
  44328. + if (item_id_by_coord(p2) != CTAIL_ID) {
  44329. + /* second item is of another type */
  44330. + return 0;
  44331. + }
  44332. + item_key_by_coord(p1, &key1);
  44333. + item_key_by_coord(p2, &key2);
  44334. + if (get_key_locality(&key1) != get_key_locality(&key2) ||
  44335. + get_key_objectid(&key1) != get_key_objectid(&key2) ||
  44336. + get_key_type(&key1) != get_key_type(&key2)) {
  44337. + /* items of different objects */
  44338. + return 0;
  44339. + }
  44340. + if (get_key_offset(&key1) + nr_units_ctail(p1) != get_key_offset(&key2))
  44341. + /* not adjacent items */
  44342. + return 0;
  44343. + if (is_disk_cluster_key(&key2, p2))
  44344. + /*
  44345. + * can not merge at the beginning
  44346. + * of a logical cluster in a file
  44347. + */
  44348. + return 0;
  44349. + return 1;
  44350. +}
  44351. +
  44352. +/* plugin->u.item.b.nr_units */
  44353. +pos_in_node_t nr_units_ctail(const coord_t * coord)
  44354. +{
  44355. + return (item_length_by_coord(coord) -
  44356. + sizeof(ctail_formatted_at(coord)->cluster_shift));
  44357. +}
  44358. +
  44359. +/* plugin->u.item.b.estimate:
  44360. + estimate how much space is needed to insert/paste @data->length bytes
  44361. + into ctail at @coord */
  44362. +int estimate_ctail(const coord_t * coord /* coord of item */ ,
  44363. + const reiser4_item_data *
  44364. + data /* parameters for new item */ )
  44365. +{
  44366. + if (coord == NULL)
  44367. + /* insert */
  44368. + return (sizeof(ctail_item_format) + data->length);
  44369. + else
  44370. + /* paste */
  44371. + return data->length;
  44372. +}
  44373. +
  44374. +/* ->init() method for this item plugin. */
  44375. +int init_ctail(coord_t * to /* coord of item */ ,
  44376. + coord_t * from /* old_item */ ,
  44377. + reiser4_item_data * data /* structure used for insertion */ )
  44378. +{
  44379. + int cluster_shift; /* cpu value to convert */
  44380. +
  44381. + if (data) {
  44382. + assert("edward-463", data->length > sizeof(ctail_item_format));
  44383. + cluster_shift = *((int *)(data->arg));
  44384. + data->length -= sizeof(ctail_item_format);
  44385. + } else {
  44386. + assert("edward-464", from != NULL);
  44387. + assert("edward-855", ctail_ok(from));
  44388. + cluster_shift = (int)(cluster_shift_by_coord(from));
  44389. + }
  44390. + put_unaligned((d8)cluster_shift, &ctail_formatted_at(to)->cluster_shift);
  44391. + assert("edward-856", ctail_ok(to));
  44392. + return 0;
  44393. +}
  44394. +
  44395. +/* plugin->u.item.b.lookup:
  44396. + NULL: We are looking for item keys only */
  44397. +
  44398. +#if REISER4_DEBUG
  44399. +int ctail_ok(const coord_t * coord)
  44400. +{
  44401. + return coord_is_unprepped_ctail(coord) ||
  44402. + cluster_shift_ok(cluster_shift_by_coord(coord));
  44403. +}
  44404. +
  44405. +/* plugin->u.item.b.check */
  44406. +int check_ctail(const coord_t * coord, const char **error)
  44407. +{
  44408. + if (!ctail_ok(coord)) {
  44409. + if (error)
  44410. + *error = "bad cluster shift in ctail";
  44411. + return 1;
  44412. + }
  44413. + return 0;
  44414. +}
  44415. +#endif
  44416. +
  44417. +/* plugin->u.item.b.paste */
  44418. +int
  44419. +paste_ctail(coord_t * coord, reiser4_item_data * data,
  44420. + carry_plugin_info * info UNUSED_ARG)
  44421. +{
  44422. + unsigned old_nr_units;
  44423. +
  44424. + assert("edward-268", data->data != NULL);
  44425. + /* copy only from kernel space */
  44426. + assert("edward-66", data->user == 0);
  44427. +
  44428. + old_nr_units =
  44429. + item_length_by_coord(coord) - sizeof(ctail_item_format) -
  44430. + data->length;
  44431. +
  44432. + /* ctail items never get pasted in the middle */
  44433. +
  44434. + if (coord->unit_pos == 0 && coord->between == AT_UNIT) {
  44435. +
  44436. + /* paste at the beginning when create new item */
  44437. + assert("edward-450",
  44438. + item_length_by_coord(coord) ==
  44439. + data->length + sizeof(ctail_item_format));
  44440. + assert("edward-451", old_nr_units == 0);
  44441. + } else if (coord->unit_pos == old_nr_units - 1
  44442. + && coord->between == AFTER_UNIT) {
  44443. +
  44444. + /* paste at the end */
  44445. + coord->unit_pos++;
  44446. + } else
  44447. + impossible("edward-453", "bad paste position");
  44448. +
  44449. + memcpy(first_unit(coord) + coord->unit_pos, data->data, data->length);
  44450. +
  44451. + assert("edward-857", ctail_ok(coord));
  44452. +
  44453. + return 0;
  44454. +}
  44455. +
  44456. +/* plugin->u.item.b.fast_paste */
  44457. +
  44458. +/*
  44459. + * plugin->u.item.b.can_shift
  44460. + *
  44461. + * Return number of units that can be shifted;
  44462. + * Store space (in bytes) occupied by those units in @size.
  44463. + */
  44464. +int can_shift_ctail(unsigned free_space, coord_t *source,
  44465. + znode * target, shift_direction direction UNUSED_ARG,
  44466. + unsigned *size, unsigned want)
  44467. +{
  44468. + /* make sure that that we do not want to shift more than we have */
  44469. + assert("edward-68", want > 0 && want <= nr_units_ctail(source));
  44470. +
  44471. + *size = min(want, free_space);
  44472. +
  44473. + if (!target) {
  44474. + /*
  44475. + * new item will be created
  44476. + */
  44477. + if (*size <= sizeof(ctail_item_format)) {
  44478. + /*
  44479. + * can not shift only ctail header
  44480. + */
  44481. + *size = 0;
  44482. + return 0;
  44483. + }
  44484. + return *size - sizeof(ctail_item_format);
  44485. + }
  44486. + else
  44487. + /*
  44488. + * shifting to the mergeable item
  44489. + */
  44490. + return *size;
  44491. +}
  44492. +
  44493. +/*
  44494. + * plugin->u.item.b.copy_units
  44495. + * cooperates with ->can_shift()
  44496. + */
  44497. +void copy_units_ctail(coord_t * target, coord_t * source,
  44498. + unsigned from, unsigned count /* units */ ,
  44499. + shift_direction where_is_free_space,
  44500. + unsigned free_space /* bytes */ )
  44501. +{
  44502. + /* make sure that item @target is expanded already */
  44503. + assert("edward-69", (unsigned)item_length_by_coord(target) >= count);
  44504. + assert("edward-70", free_space == count || free_space == count + 1);
  44505. +
  44506. + assert("edward-858", ctail_ok(source));
  44507. +
  44508. + if (where_is_free_space == SHIFT_LEFT) {
  44509. + /*
  44510. + * append item @target with @count first bytes
  44511. + * of @source: this restriction came from ordinary tails
  44512. + */
  44513. + assert("edward-71", from == 0);
  44514. + assert("edward-860", ctail_ok(target));
  44515. +
  44516. + memcpy(first_unit(target) + nr_units_ctail(target) - count,
  44517. + first_unit(source), count);
  44518. + } else {
  44519. + /*
  44520. + * target item is moved to right already
  44521. + */
  44522. + reiser4_key key;
  44523. +
  44524. + assert("edward-72", nr_units_ctail(source) == from + count);
  44525. +
  44526. + if (free_space == count) {
  44527. + init_ctail(target, source, NULL);
  44528. + } else {
  44529. + /*
  44530. + * shifting to a mergeable item
  44531. + */
  44532. + assert("edward-862", ctail_ok(target));
  44533. + }
  44534. + memcpy(first_unit(target), first_unit(source) + from, count);
  44535. +
  44536. + assert("edward-863", ctail_ok(target));
  44537. + /*
  44538. + * new units are inserted before first unit
  44539. + * in an item, therefore, we have to update
  44540. + * item key
  44541. + */
  44542. + item_key_by_coord(source, &key);
  44543. + set_key_offset(&key, get_key_offset(&key) + from);
  44544. +
  44545. + node_plugin_by_node(target->node)->update_item_key(target,
  44546. + &key,
  44547. + NULL /*info */);
  44548. + }
  44549. +}
  44550. +
  44551. +/* plugin->u.item.b.create_hook */
  44552. +int create_hook_ctail(const coord_t * coord, void *arg)
  44553. +{
  44554. + assert("edward-864", znode_is_loaded(coord->node));
  44555. +
  44556. + znode_set_convertible(coord->node);
  44557. + return 0;
  44558. +}
  44559. +
  44560. +/* plugin->u.item.b.kill_hook */
  44561. +int kill_hook_ctail(const coord_t * coord, pos_in_node_t from,
  44562. + pos_in_node_t count, carry_kill_data * kdata)
  44563. +{
  44564. + struct inode *inode;
  44565. +
  44566. + assert("edward-1157", item_id_by_coord(coord) == CTAIL_ID);
  44567. + assert("edward-291", znode_is_write_locked(coord->node));
  44568. +
  44569. + inode = kdata->inode;
  44570. + if (inode) {
  44571. + reiser4_key key;
  44572. + struct cryptcompress_info * info;
  44573. + cloff_t index;
  44574. +
  44575. + item_key_by_coord(coord, &key);
  44576. + info = cryptcompress_inode_data(inode);
  44577. + index = off_to_clust(get_key_offset(&key), inode);
  44578. +
  44579. + if (from == 0) {
  44580. + info->trunc_index = index;
  44581. + if (is_disk_cluster_key(&key, coord)) {
  44582. + /*
  44583. + * first item of disk cluster is to be killed
  44584. + */
  44585. + truncate_complete_page_cluster(
  44586. + inode, index, kdata->params.truncate);
  44587. + inode_sub_bytes(inode,
  44588. + inode_cluster_size(inode));
  44589. + }
  44590. + }
  44591. + }
  44592. + return 0;
  44593. +}
  44594. +
  44595. +/* for shift_hook_ctail(),
  44596. + return true if the first disk cluster item has dirty child
  44597. +*/
  44598. +static int ctail_convertible(const coord_t * coord)
  44599. +{
  44600. + int result;
  44601. + reiser4_key key;
  44602. + jnode *child = NULL;
  44603. +
  44604. + assert("edward-477", coord != NULL);
  44605. + assert("edward-478", item_id_by_coord(coord) == CTAIL_ID);
  44606. +
  44607. + if (coord_is_unprepped_ctail(coord))
  44608. + /* unprepped ctail should be converted */
  44609. + return 1;
  44610. +
  44611. + item_key_by_coord(coord, &key);
  44612. + child = jlookup(current_tree,
  44613. + get_key_objectid(&key),
  44614. + off_to_pg(off_by_coord(coord)));
  44615. + if (!child)
  44616. + return 0;
  44617. + result = JF_ISSET(child, JNODE_DIRTY);
  44618. + jput(child);
  44619. + return result;
  44620. +}
  44621. +
  44622. +/* FIXME-EDWARD */
  44623. +/* plugin->u.item.b.shift_hook */
  44624. +int shift_hook_ctail(const coord_t * item /* coord of item */ ,
  44625. + unsigned from UNUSED_ARG /* start unit */ ,
  44626. + unsigned count UNUSED_ARG /* stop unit */ ,
  44627. + znode * old_node /* old parent */ )
  44628. +{
  44629. + assert("edward-479", item != NULL);
  44630. + assert("edward-480", item->node != old_node);
  44631. +
  44632. + if (!znode_convertible(old_node) || znode_convertible(item->node))
  44633. + return 0;
  44634. + if (ctail_convertible(item))
  44635. + znode_set_convertible(item->node);
  44636. + return 0;
  44637. +}
  44638. +
  44639. +static int
  44640. +cut_or_kill_ctail_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  44641. + int cut, void *p, reiser4_key * smallest_removed,
  44642. + reiser4_key * new_first)
  44643. +{
  44644. + pos_in_node_t count; /* number of units to cut */
  44645. + char *item;
  44646. +
  44647. + count = to - from + 1;
  44648. + item = item_body_by_coord(coord);
  44649. +
  44650. + assert("edward-74", ergo(from != 0, to == coord_last_unit_pos(coord)));
  44651. +
  44652. + if (smallest_removed) {
  44653. + /* store smallest key removed */
  44654. + item_key_by_coord(coord, smallest_removed);
  44655. + set_key_offset(smallest_removed,
  44656. + get_key_offset(smallest_removed) + from);
  44657. + }
  44658. +
  44659. + if (new_first) {
  44660. + assert("vs-1531", from == 0);
  44661. +
  44662. + item_key_by_coord(coord, new_first);
  44663. + set_key_offset(new_first,
  44664. + get_key_offset(new_first) + from + count);
  44665. + }
  44666. +
  44667. + if (!cut)
  44668. + kill_hook_ctail(coord, from, 0, (struct carry_kill_data *)p);
  44669. +
  44670. + if (from == 0) {
  44671. + if (count != nr_units_ctail(coord)) {
  44672. + /* part of item is removed, so move free space at the beginning
  44673. + of the item and update item key */
  44674. + reiser4_key key;
  44675. + memcpy(item + to + 1, item, sizeof(ctail_item_format));
  44676. + item_key_by_coord(coord, &key);
  44677. + set_key_offset(&key, get_key_offset(&key) + count);
  44678. + node_plugin_by_node(coord->node)->update_item_key(coord,
  44679. + &key,
  44680. + NULL);
  44681. + } else {
  44682. + /* cut_units should not be called to cut evrything */
  44683. + assert("vs-1532", ergo(cut, 0));
  44684. + /* whole item is cut, so more then amount of space occupied
  44685. + by units got freed */
  44686. + count += sizeof(ctail_item_format);
  44687. + }
  44688. + }
  44689. + return count;
  44690. +}
  44691. +
  44692. +/* plugin->u.item.b.cut_units */
  44693. +int
  44694. +cut_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to,
  44695. + carry_cut_data * cdata, reiser4_key * smallest_removed,
  44696. + reiser4_key * new_first)
  44697. +{
  44698. + return cut_or_kill_ctail_units(item, from, to, 1, NULL,
  44699. + smallest_removed, new_first);
  44700. +}
  44701. +
  44702. +/* plugin->u.item.b.kill_units */
  44703. +int
  44704. +kill_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to,
  44705. + struct carry_kill_data *kdata, reiser4_key * smallest_removed,
  44706. + reiser4_key * new_first)
  44707. +{
  44708. + return cut_or_kill_ctail_units(item, from, to, 0, kdata,
  44709. + smallest_removed, new_first);
  44710. +}
  44711. +
  44712. +/* plugin->u.item.s.file.read */
  44713. +int read_ctail(flow_t * f, hint_t * hint,
  44714. + struct kiocb *iocb, struct iov_iter *iter)
  44715. +{
  44716. + uf_coord_t *uf_coord;
  44717. + coord_t *coord;
  44718. +
  44719. + uf_coord = &hint->ext_coord;
  44720. + coord = &uf_coord->coord;
  44721. + assert("edward-127", f->user == 0);
  44722. + assert("edward-129", coord && coord->node);
  44723. + assert("edward-130", coord_is_existing_unit(coord));
  44724. + assert("edward-132", znode_is_loaded(coord->node));
  44725. +
  44726. + /* start read only from the beginning of ctail */
  44727. + assert("edward-133", coord->unit_pos == 0);
  44728. + /* read only whole ctails */
  44729. + assert("edward-135", nr_units_ctail(coord) <= f->length);
  44730. +
  44731. + assert("edward-136", reiser4_schedulable());
  44732. + assert("edward-886", ctail_ok(coord));
  44733. +
  44734. + if (f->data)
  44735. + memcpy(f->data, (char *)first_unit(coord),
  44736. + (size_t) nr_units_ctail(coord));
  44737. +
  44738. + dclust_set_extension_shift(hint);
  44739. + mark_page_accessed(znode_page(coord->node));
  44740. + move_flow_forward(f, nr_units_ctail(coord));
  44741. +
  44742. + return 0;
  44743. +}
  44744. +
  44745. +/**
  44746. + * Prepare transform stream with plain text for page
  44747. + * @page taking into account synchronization issues.
  44748. + */
  44749. +static int ctail_read_disk_cluster(struct cluster_handle * clust,
  44750. + struct inode * inode, struct page * page,
  44751. + znode_lock_mode mode)
  44752. +{
  44753. + int result;
  44754. +
  44755. + assert("edward-1450", mode == ZNODE_READ_LOCK || ZNODE_WRITE_LOCK);
  44756. + assert("edward-671", clust->hint != NULL);
  44757. + assert("edward-140", clust->dstat == INVAL_DISK_CLUSTER);
  44758. + assert("edward-672", cryptcompress_inode_ok(inode));
  44759. + assert("edward-1527", PageLocked(page));
  44760. +
  44761. + unlock_page(page);
  44762. +
  44763. + /* set input stream */
  44764. + result = grab_tfm_stream(inode, &clust->tc, INPUT_STREAM);
  44765. + if (result) {
  44766. + lock_page(page);
  44767. + return result;
  44768. + }
  44769. + result = find_disk_cluster(clust, inode, 1 /* read items */, mode);
  44770. + lock_page(page);
  44771. + if (result)
  44772. + return result;
  44773. + /*
  44774. + * at this point we have locked position in the tree
  44775. + */
  44776. + assert("edward-1528", znode_is_any_locked(clust->hint->lh.node));
  44777. +
  44778. + if (page->mapping != inode->i_mapping) {
  44779. + /* page was truncated */
  44780. + reiser4_unset_hint(clust->hint);
  44781. + reset_cluster_params(clust);
  44782. + return AOP_TRUNCATED_PAGE;
  44783. + }
  44784. + if (PageUptodate(page)) {
  44785. + /* disk cluster can be obsolete, don't use it! */
  44786. + reiser4_unset_hint(clust->hint);
  44787. + reset_cluster_params(clust);
  44788. + return 0;
  44789. + }
  44790. + if (clust->dstat == FAKE_DISK_CLUSTER ||
  44791. + clust->dstat == UNPR_DISK_CLUSTER ||
  44792. + clust->dstat == TRNC_DISK_CLUSTER) {
  44793. + /*
  44794. + * this information about disk cluster will be valid
  44795. + * as long as we keep the position in the tree locked
  44796. + */
  44797. + tfm_cluster_set_uptodate(&clust->tc);
  44798. + return 0;
  44799. + }
  44800. + /* now prepare output stream.. */
  44801. + result = grab_coa(&clust->tc, inode_compression_plugin(inode));
  44802. + if (result)
  44803. + return result;
  44804. + /* ..and fill this with plain text */
  44805. + result = reiser4_inflate_cluster(clust, inode);
  44806. + if (result)
  44807. + return result;
  44808. + /*
  44809. + * The stream is ready! It won't be obsolete as
  44810. + * long as we keep last disk cluster item locked.
  44811. + */
  44812. + tfm_cluster_set_uptodate(&clust->tc);
  44813. + return 0;
  44814. +}
  44815. +
  44816. +/*
  44817. + * fill one page with plain text.
  44818. + */
  44819. +int do_readpage_ctail(struct inode * inode, struct cluster_handle * clust,
  44820. + struct page *page, znode_lock_mode mode)
  44821. +{
  44822. + int ret;
  44823. + unsigned cloff;
  44824. + char *data;
  44825. + size_t to_page;
  44826. + struct tfm_cluster * tc = &clust->tc;
  44827. +
  44828. + assert("edward-212", PageLocked(page));
  44829. +
  44830. + if (unlikely(page->mapping != inode->i_mapping))
  44831. + return AOP_TRUNCATED_PAGE;
  44832. + if (PageUptodate(page))
  44833. + goto exit;
  44834. + to_page = pbytes(page_index(page), inode);
  44835. + if (to_page == 0) {
  44836. + zero_user(page, 0, PAGE_SIZE);
  44837. + SetPageUptodate(page);
  44838. + goto exit;
  44839. + }
  44840. + if (!tfm_cluster_is_uptodate(&clust->tc)) {
  44841. + clust->index = pg_to_clust(page->index, inode);
  44842. +
  44843. + /* this will unlock/lock the page */
  44844. + ret = ctail_read_disk_cluster(clust, inode, page, mode);
  44845. +
  44846. + assert("edward-212", PageLocked(page));
  44847. + if (ret)
  44848. + return ret;
  44849. +
  44850. + /* refresh bytes */
  44851. + to_page = pbytes(page_index(page), inode);
  44852. + if (to_page == 0) {
  44853. + zero_user(page, 0, PAGE_SIZE);
  44854. + SetPageUptodate(page);
  44855. + goto exit;
  44856. + }
  44857. + }
  44858. + if (PageUptodate(page))
  44859. + /* somebody else fill it already */
  44860. + goto exit;
  44861. +
  44862. + assert("edward-119", tfm_cluster_is_uptodate(tc));
  44863. + assert("edward-1529", znode_is_any_locked(clust->hint->lh.node));
  44864. +
  44865. + switch (clust->dstat) {
  44866. + case UNPR_DISK_CLUSTER:
  44867. + /*
  44868. + * Page is not uptodate and item cluster is unprepped:
  44869. + * this must not ever happen.
  44870. + */
  44871. + warning("edward-1632",
  44872. + "Bad item cluster %lu (Inode %llu). Fsck?",
  44873. + clust->index,
  44874. + (unsigned long long)get_inode_oid(inode));
  44875. + return RETERR(-EIO);
  44876. + case TRNC_DISK_CLUSTER:
  44877. + /*
  44878. + * Race with truncate!
  44879. + * We resolve it in favour of the last one (the only way,
  44880. + * as in this case plain text is unrecoverable)
  44881. + */
  44882. + case FAKE_DISK_CLUSTER:
  44883. + /* fill the page by zeroes */
  44884. + zero_user(page, 0, PAGE_SIZE);
  44885. + SetPageUptodate(page);
  44886. + break;
  44887. + case PREP_DISK_CLUSTER:
  44888. + /* fill page by transformed stream with plain text */
  44889. + assert("edward-1058", !PageUptodate(page));
  44890. + assert("edward-120", tc->len <= inode_cluster_size(inode));
  44891. +
  44892. + /* page index in this logical cluster */
  44893. + cloff = pg_to_off_to_cloff(page->index, inode);
  44894. +
  44895. + data = kmap(page);
  44896. + memcpy(data, tfm_stream_data(tc, OUTPUT_STREAM) + cloff, to_page);
  44897. + memset(data + to_page, 0, (size_t) PAGE_SIZE - to_page);
  44898. + flush_dcache_page(page);
  44899. + kunmap(page);
  44900. + SetPageUptodate(page);
  44901. + break;
  44902. + default:
  44903. + impossible("edward-1169", "bad disk cluster state");
  44904. + }
  44905. + exit:
  44906. + return 0;
  44907. +}
  44908. +
  44909. +/* plugin->u.item.s.file.readpage */
  44910. +int readpage_ctail(void *vp, struct page *page)
  44911. +{
  44912. + int result;
  44913. + hint_t * hint;
  44914. + struct cluster_handle * clust = vp;
  44915. +
  44916. + assert("edward-114", clust != NULL);
  44917. + assert("edward-115", PageLocked(page));
  44918. + assert("edward-116", !PageUptodate(page));
  44919. + assert("edward-118", page->mapping && page->mapping->host);
  44920. + assert("edward-867", !tfm_cluster_is_uptodate(&clust->tc));
  44921. +
  44922. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  44923. + if (hint == NULL) {
  44924. + unlock_page(page);
  44925. + return RETERR(-ENOMEM);
  44926. + }
  44927. + clust->hint = hint;
  44928. + result = load_file_hint(clust->file, hint);
  44929. + if (result) {
  44930. + kfree(hint);
  44931. + unlock_page(page);
  44932. + return result;
  44933. + }
  44934. + assert("vs-25", hint->ext_coord.lh == &hint->lh);
  44935. +
  44936. + result = do_readpage_ctail(page->mapping->host, clust, page,
  44937. + ZNODE_READ_LOCK);
  44938. + assert("edward-213", PageLocked(page));
  44939. + assert("edward-1163", ergo(!result, PageUptodate(page)));
  44940. +
  44941. + unlock_page(page);
  44942. + done_lh(&hint->lh);
  44943. + hint->ext_coord.valid = 0;
  44944. + save_file_hint(clust->file, hint);
  44945. + kfree(hint);
  44946. + tfm_cluster_clr_uptodate(&clust->tc);
  44947. +
  44948. + return result;
  44949. +}
  44950. +
  44951. +/* Helper function for ->readpages() */
  44952. +static int ctail_read_page_cluster(struct cluster_handle * clust,
  44953. + struct inode *inode)
  44954. +{
  44955. + int i;
  44956. + int result;
  44957. + assert("edward-779", clust != NULL);
  44958. + assert("edward-1059", clust->win == NULL);
  44959. + assert("edward-780", inode != NULL);
  44960. +
  44961. + result = prepare_page_cluster(inode, clust, READ_OP);
  44962. + if (result)
  44963. + return result;
  44964. +
  44965. + assert("edward-781", !tfm_cluster_is_uptodate(&clust->tc));
  44966. +
  44967. + for (i = 0; i < clust->nr_pages; i++) {
  44968. + struct page *page = clust->pages[i];
  44969. + lock_page(page);
  44970. + result = do_readpage_ctail(inode, clust, page, ZNODE_READ_LOCK);
  44971. + unlock_page(page);
  44972. + if (result)
  44973. + break;
  44974. + }
  44975. + tfm_cluster_clr_uptodate(&clust->tc);
  44976. + put_page_cluster(clust, inode, READ_OP);
  44977. + return result;
  44978. +}
  44979. +
  44980. +/* filler for read_cache_pages() */
  44981. +static int ctail_readpages_filler(void * data, struct page * page)
  44982. +{
  44983. + int ret = 0;
  44984. + struct cluster_handle * clust = data;
  44985. + struct inode * inode = file_inode(clust->file);
  44986. +
  44987. + assert("edward-1525", page->mapping == inode->i_mapping);
  44988. +
  44989. + if (PageUptodate(page)) {
  44990. + unlock_page(page);
  44991. + return 0;
  44992. + }
  44993. + if (pbytes(page_index(page), inode) == 0) {
  44994. + zero_user(page, 0, PAGE_SIZE);
  44995. + SetPageUptodate(page);
  44996. + unlock_page(page);
  44997. + return 0;
  44998. + }
  44999. + move_cluster_forward(clust, inode, page->index);
  45000. + unlock_page(page);
  45001. + /*
  45002. + * read the whole page cluster
  45003. + */
  45004. + ret = ctail_read_page_cluster(clust, inode);
  45005. +
  45006. + assert("edward-869", !tfm_cluster_is_uptodate(&clust->tc));
  45007. + return ret;
  45008. +}
  45009. +
  45010. +/*
  45011. + * We populate a bit more then upper readahead suggests:
  45012. + * with each nominated page we read the whole page cluster
  45013. + * this page belongs to.
  45014. + */
  45015. +int readpages_ctail(struct file *file, struct address_space *mapping,
  45016. + struct list_head *pages)
  45017. +{
  45018. + int ret = 0;
  45019. + hint_t *hint;
  45020. + struct cluster_handle clust;
  45021. + struct inode *inode = mapping->host;
  45022. +
  45023. + assert("edward-1521", inode == file_inode(file));
  45024. +
  45025. + cluster_init_read(&clust, NULL);
  45026. + clust.file = file;
  45027. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  45028. + if (hint == NULL) {
  45029. + warning("vs-28", "failed to allocate hint");
  45030. + ret = RETERR(-ENOMEM);
  45031. + goto exit1;
  45032. + }
  45033. + clust.hint = hint;
  45034. + ret = load_file_hint(clust.file, hint);
  45035. + if (ret) {
  45036. + warning("edward-1522", "failed to load hint");
  45037. + goto exit2;
  45038. + }
  45039. + assert("vs-26", hint->ext_coord.lh == &hint->lh);
  45040. + ret = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
  45041. + if (ret) {
  45042. + warning("edward-1523", "failed to alloc pgset");
  45043. + goto exit3;
  45044. + }
  45045. + ret = read_cache_pages(mapping, pages, ctail_readpages_filler, &clust);
  45046. +
  45047. + assert("edward-870", !tfm_cluster_is_uptodate(&clust.tc));
  45048. + exit3:
  45049. + done_lh(&hint->lh);
  45050. + save_file_hint(file, hint);
  45051. + hint->ext_coord.valid = 0;
  45052. + exit2:
  45053. + kfree(hint);
  45054. + exit1:
  45055. + put_cluster_handle(&clust);
  45056. + return ret;
  45057. +}
  45058. +
  45059. +/*
  45060. + plugin->u.item.s.file.append_key
  45061. + key of the first item of the next disk cluster
  45062. +*/
  45063. +reiser4_key *append_key_ctail(const coord_t * coord, reiser4_key * key)
  45064. +{
  45065. + assert("edward-1241", item_id_by_coord(coord) == CTAIL_ID);
  45066. + assert("edward-1242", cluster_shift_ok(cluster_shift_by_coord(coord)));
  45067. +
  45068. + item_key_by_coord(coord, key);
  45069. + set_key_offset(key, ((__u64) (clust_by_coord(coord, NULL)) + 1)
  45070. + << cluster_shift_by_coord(coord));
  45071. + return key;
  45072. +}
  45073. +
  45074. +static int insert_unprepped_ctail(struct cluster_handle * clust,
  45075. + struct inode *inode)
  45076. +{
  45077. + int result;
  45078. + char buf[UCTAIL_NR_UNITS];
  45079. + reiser4_item_data data;
  45080. + reiser4_key key;
  45081. + int shift = (int)UCTAIL_SHIFT;
  45082. +
  45083. + memset(buf, 0, (size_t) UCTAIL_NR_UNITS);
  45084. + result = key_by_inode_cryptcompress(inode,
  45085. + clust_to_off(clust->index, inode),
  45086. + &key);
  45087. + if (result)
  45088. + return result;
  45089. + data.user = 0;
  45090. + data.iplug = item_plugin_by_id(CTAIL_ID);
  45091. + data.arg = &shift;
  45092. + data.length = sizeof(ctail_item_format) + (size_t) UCTAIL_NR_UNITS;
  45093. + data.data = buf;
  45094. +
  45095. + result = insert_by_coord(&clust->hint->ext_coord.coord,
  45096. + &data, &key, clust->hint->ext_coord.lh, 0);
  45097. + return result;
  45098. +}
  45099. +
  45100. +static int
  45101. +insert_cryptcompress_flow(coord_t * coord, lock_handle * lh, flow_t * f,
  45102. + int cluster_shift)
  45103. +{
  45104. + int result;
  45105. + carry_pool *pool;
  45106. + carry_level *lowest_level;
  45107. + reiser4_item_data *data;
  45108. + carry_op *op;
  45109. +
  45110. + pool =
  45111. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  45112. + sizeof(*data));
  45113. + if (IS_ERR(pool))
  45114. + return PTR_ERR(pool);
  45115. + lowest_level = (carry_level *) (pool + 1);
  45116. + init_carry_level(lowest_level, pool);
  45117. + data = (reiser4_item_data *) (lowest_level + 3);
  45118. +
  45119. + assert("edward-466", coord->between == AFTER_ITEM
  45120. + || coord->between == AFTER_UNIT || coord->between == BEFORE_ITEM
  45121. + || coord->between == EMPTY_NODE
  45122. + || coord->between == BEFORE_UNIT);
  45123. +
  45124. + if (coord->between == AFTER_UNIT) {
  45125. + coord->unit_pos = 0;
  45126. + coord->between = AFTER_ITEM;
  45127. + }
  45128. + op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node,
  45129. + 0 /* operate directly on coord -> node */);
  45130. + if (IS_ERR(op) || (op == NULL)) {
  45131. + done_carry_pool(pool);
  45132. + return RETERR(op ? PTR_ERR(op) : -EIO);
  45133. + }
  45134. + data->user = 0;
  45135. + data->iplug = item_plugin_by_id(CTAIL_ID);
  45136. + data->arg = &cluster_shift;
  45137. +
  45138. + data->length = 0;
  45139. + data->data = NULL;
  45140. +
  45141. + op->u.insert_flow.flags =
  45142. + COPI_SWEEP |
  45143. + COPI_DONT_SHIFT_LEFT |
  45144. + COPI_DONT_SHIFT_RIGHT;
  45145. + op->u.insert_flow.insert_point = coord;
  45146. + op->u.insert_flow.flow = f;
  45147. + op->u.insert_flow.data = data;
  45148. + op->u.insert_flow.new_nodes = 0;
  45149. +
  45150. + lowest_level->track_type = CARRY_TRACK_CHANGE;
  45151. + lowest_level->tracked = lh;
  45152. +
  45153. + result = reiser4_carry(lowest_level, NULL);
  45154. + done_carry_pool(pool);
  45155. +
  45156. + return result;
  45157. +}
  45158. +
  45159. +/* Implementation of CRC_APPEND_ITEM mode of ctail conversion */
  45160. +static int insert_cryptcompress_flow_in_place(coord_t * coord,
  45161. + lock_handle * lh, flow_t * f,
  45162. + int cluster_shift)
  45163. +{
  45164. + int ret;
  45165. + coord_t pos;
  45166. + lock_handle lock;
  45167. +
  45168. + assert("edward-484",
  45169. + coord->between == AT_UNIT || coord->between == AFTER_ITEM);
  45170. + assert("edward-485", item_id_by_coord(coord) == CTAIL_ID);
  45171. +
  45172. + coord_dup(&pos, coord);
  45173. + pos.unit_pos = 0;
  45174. + pos.between = AFTER_ITEM;
  45175. +
  45176. + init_lh(&lock);
  45177. + copy_lh(&lock, lh);
  45178. +
  45179. + ret = insert_cryptcompress_flow(&pos, &lock, f, cluster_shift);
  45180. + done_lh(&lock);
  45181. + assert("edward-1347", znode_is_write_locked(lh->node));
  45182. + assert("edward-1228", !ret);
  45183. + return ret;
  45184. +}
  45185. +
  45186. +/* Implementation of CRC_OVERWRITE_ITEM mode of ctail conversion */
  45187. +static int overwrite_ctail(coord_t * coord, flow_t * f)
  45188. +{
  45189. + unsigned count;
  45190. +
  45191. + assert("edward-269", f->user == 0);
  45192. + assert("edward-270", f->data != NULL);
  45193. + assert("edward-271", f->length > 0);
  45194. + assert("edward-272", coord_is_existing_unit(coord));
  45195. + assert("edward-273", coord->unit_pos == 0);
  45196. + assert("edward-274", znode_is_write_locked(coord->node));
  45197. + assert("edward-275", reiser4_schedulable());
  45198. + assert("edward-467", item_id_by_coord(coord) == CTAIL_ID);
  45199. + assert("edward-1243", ctail_ok(coord));
  45200. +
  45201. + count = nr_units_ctail(coord);
  45202. +
  45203. + if (count > f->length)
  45204. + count = f->length;
  45205. + memcpy(first_unit(coord), f->data, count);
  45206. + move_flow_forward(f, count);
  45207. + coord->unit_pos += count;
  45208. + return 0;
  45209. +}
  45210. +
  45211. +/* Implementation of CRC_CUT_ITEM mode of ctail conversion:
  45212. + cut ctail (part or whole) starting from next unit position */
  45213. +static int cut_ctail(coord_t * coord)
  45214. +{
  45215. + coord_t stop;
  45216. +
  45217. + assert("edward-435", coord->between == AT_UNIT &&
  45218. + coord->item_pos < coord_num_items(coord) &&
  45219. + coord->unit_pos <= coord_num_units(coord));
  45220. +
  45221. + if (coord->unit_pos == coord_num_units(coord))
  45222. + /* nothing to cut */
  45223. + return 0;
  45224. + coord_dup(&stop, coord);
  45225. + stop.unit_pos = coord_last_unit_pos(coord);
  45226. +
  45227. + return cut_node_content(coord, &stop, NULL, NULL, NULL);
  45228. +}
  45229. +
  45230. +int ctail_insert_unprepped_cluster(struct cluster_handle * clust,
  45231. + struct inode * inode)
  45232. +{
  45233. + int result;
  45234. + assert("edward-1244", inode != NULL);
  45235. + assert("edward-1245", clust->hint != NULL);
  45236. + assert("edward-1246", clust->dstat == FAKE_DISK_CLUSTER);
  45237. + assert("edward-1247", clust->reserved == 1);
  45238. +
  45239. + result = get_disk_cluster_locked(clust, inode, ZNODE_WRITE_LOCK);
  45240. + if (cbk_errored(result))
  45241. + return result;
  45242. + assert("edward-1249", result == CBK_COORD_NOTFOUND);
  45243. + assert("edward-1250", znode_is_write_locked(clust->hint->lh.node));
  45244. +
  45245. + assert("edward-1295",
  45246. + clust->hint->ext_coord.lh->node ==
  45247. + clust->hint->ext_coord.coord.node);
  45248. +
  45249. + coord_set_between_clusters(&clust->hint->ext_coord.coord);
  45250. +
  45251. + result = insert_unprepped_ctail(clust, inode);
  45252. + all_grabbed2free();
  45253. +
  45254. + assert("edward-1251", !result);
  45255. + assert("edward-1252", cryptcompress_inode_ok(inode));
  45256. + assert("edward-1253", znode_is_write_locked(clust->hint->lh.node));
  45257. + assert("edward-1254",
  45258. + reiser4_clustered_blocks(reiser4_get_current_sb()));
  45259. + assert("edward-1255",
  45260. + znode_convertible(clust->hint->ext_coord.coord.node));
  45261. +
  45262. + return result;
  45263. +}
  45264. +
  45265. +/* plugin->u.item.f.scan */
  45266. +int scan_ctail(flush_scan * scan)
  45267. +{
  45268. + int result = 0;
  45269. + struct page *page;
  45270. + struct inode *inode;
  45271. + jnode *node = scan->node;
  45272. +
  45273. + assert("edward-227", scan->node != NULL);
  45274. + assert("edward-228", jnode_is_cluster_page(scan->node));
  45275. + assert("edward-639", znode_is_write_locked(scan->parent_lock.node));
  45276. +
  45277. + page = jnode_page(node);
  45278. + inode = page->mapping->host;
  45279. +
  45280. + if (!reiser4_scanning_left(scan))
  45281. + return result;
  45282. +
  45283. + if (!znode_convertible(scan->parent_lock.node)) {
  45284. + if (JF_ISSET(scan->node, JNODE_DIRTY))
  45285. + znode_set_convertible(scan->parent_lock.node);
  45286. + else {
  45287. + warning("edward-681",
  45288. + "cluster page is already processed");
  45289. + return -EAGAIN;
  45290. + }
  45291. + }
  45292. + return result;
  45293. +}
  45294. +
  45295. +/* If true, this function attaches children */
  45296. +static int should_attach_convert_idata(flush_pos_t * pos)
  45297. +{
  45298. + int result;
  45299. + assert("edward-431", pos != NULL);
  45300. + assert("edward-432", pos->child == NULL);
  45301. + assert("edward-619", znode_is_write_locked(pos->coord.node));
  45302. + assert("edward-470",
  45303. + item_plugin_by_coord(&pos->coord) ==
  45304. + item_plugin_by_id(CTAIL_ID));
  45305. +
  45306. + /* check for leftmost child */
  45307. + utmost_child_ctail(&pos->coord, LEFT_SIDE, &pos->child);
  45308. +
  45309. + if (!pos->child)
  45310. + return 0;
  45311. + spin_lock_jnode(pos->child);
  45312. + result = (JF_ISSET(pos->child, JNODE_DIRTY) &&
  45313. + pos->child->atom == ZJNODE(pos->coord.node)->atom);
  45314. + spin_unlock_jnode(pos->child);
  45315. + if (!result && pos->child) {
  45316. + /* existing child isn't to attach, clear up this one */
  45317. + jput(pos->child);
  45318. + pos->child = NULL;
  45319. + }
  45320. + return result;
  45321. +}
  45322. +
  45323. +/**
  45324. + * Collect all needed information about the object here,
  45325. + * as in-memory inode can be evicted from memory before
  45326. + * disk update completion.
  45327. + */
  45328. +static int init_convert_data_ctail(struct convert_item_info * idata,
  45329. + struct inode *inode)
  45330. +{
  45331. + assert("edward-813", idata != NULL);
  45332. + assert("edward-814", inode != NULL);
  45333. +
  45334. + idata->cluster_shift = inode_cluster_shift(inode);
  45335. + idata->d_cur = DC_FIRST_ITEM;
  45336. + idata->d_next = DC_INVALID_STATE;
  45337. +
  45338. + return 0;
  45339. +}
  45340. +
  45341. +static int alloc_item_convert_data(struct convert_info * sq)
  45342. +{
  45343. + assert("edward-816", sq != NULL);
  45344. + assert("edward-817", sq->itm == NULL);
  45345. +
  45346. + sq->itm = kmalloc(sizeof(*sq->itm), reiser4_ctx_gfp_mask_get());
  45347. + if (sq->itm == NULL)
  45348. + return RETERR(-ENOMEM);
  45349. + init_lh(&sq->right_lock);
  45350. + sq->right_locked = 0;
  45351. + return 0;
  45352. +}
  45353. +
  45354. +static void free_item_convert_data(struct convert_info * sq)
  45355. +{
  45356. + assert("edward-818", sq != NULL);
  45357. + assert("edward-819", sq->itm != NULL);
  45358. + assert("edward-820", sq->iplug != NULL);
  45359. +
  45360. + done_lh(&sq->right_lock);
  45361. + sq->right_locked = 0;
  45362. + kfree(sq->itm);
  45363. + sq->itm = NULL;
  45364. + return;
  45365. +}
  45366. +
  45367. +static struct convert_info *alloc_convert_data(void)
  45368. +{
  45369. + struct convert_info *info;
  45370. +
  45371. + info = kmalloc(sizeof(*info), reiser4_ctx_gfp_mask_get());
  45372. + if (info != NULL) {
  45373. + memset(info, 0, sizeof(*info));
  45374. + cluster_init_write(&info->clust, NULL);
  45375. + }
  45376. + return info;
  45377. +}
  45378. +
  45379. +static void reset_convert_data(struct convert_info *info)
  45380. +{
  45381. + info->clust.tc.hole = 0;
  45382. +}
  45383. +
  45384. +void free_convert_data(flush_pos_t * pos)
  45385. +{
  45386. + struct convert_info *sq;
  45387. +
  45388. + assert("edward-823", pos != NULL);
  45389. + assert("edward-824", pos->sq != NULL);
  45390. +
  45391. + sq = pos->sq;
  45392. + if (sq->itm)
  45393. + free_item_convert_data(sq);
  45394. + put_cluster_handle(&sq->clust);
  45395. + kfree(pos->sq);
  45396. + pos->sq = NULL;
  45397. + return;
  45398. +}
  45399. +
  45400. +static int init_item_convert_data(flush_pos_t * pos, struct inode *inode)
  45401. +{
  45402. + struct convert_info *sq;
  45403. +
  45404. + assert("edward-825", pos != NULL);
  45405. + assert("edward-826", pos->sq != NULL);
  45406. + assert("edward-827", item_convert_data(pos) != NULL);
  45407. + assert("edward-828", inode != NULL);
  45408. +
  45409. + sq = pos->sq;
  45410. + memset(sq->itm, 0, sizeof(*sq->itm));
  45411. +
  45412. + /* iplug->init_convert_data() */
  45413. + return init_convert_data_ctail(sq->itm, inode);
  45414. +}
  45415. +
  45416. +/* create and attach disk cluster info used by 'convert' phase of the flush
  45417. + squalloc() */
  45418. +static int attach_convert_idata(flush_pos_t * pos, struct inode *inode)
  45419. +{
  45420. + int ret = 0;
  45421. + struct convert_item_info *info;
  45422. + struct cluster_handle *clust;
  45423. + file_plugin *fplug = inode_file_plugin(inode);
  45424. +
  45425. + assert("edward-248", pos != NULL);
  45426. + assert("edward-249", pos->child != NULL);
  45427. + assert("edward-251", inode != NULL);
  45428. + assert("edward-682", cryptcompress_inode_ok(inode));
  45429. + assert("edward-252",
  45430. + fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
  45431. + assert("edward-473",
  45432. + item_plugin_by_coord(&pos->coord) ==
  45433. + item_plugin_by_id(CTAIL_ID));
  45434. +
  45435. + if (!pos->sq) {
  45436. + pos->sq = alloc_convert_data();
  45437. + if (!pos->sq)
  45438. + return RETERR(-ENOMEM);
  45439. + }
  45440. + else
  45441. + reset_convert_data(pos->sq);
  45442. +
  45443. + clust = &pos->sq->clust;
  45444. +
  45445. + ret = set_cluster_by_page(clust,
  45446. + jnode_page(pos->child),
  45447. + MAX_CLUSTER_NRPAGES);
  45448. + if (ret)
  45449. + goto err;
  45450. +
  45451. + assert("edward-829", pos->sq != NULL);
  45452. + assert("edward-250", item_convert_data(pos) == NULL);
  45453. +
  45454. + pos->sq->iplug = item_plugin_by_id(CTAIL_ID);
  45455. +
  45456. + ret = alloc_item_convert_data(pos->sq);
  45457. + if (ret)
  45458. + goto err;
  45459. + ret = init_item_convert_data(pos, inode);
  45460. + if (ret)
  45461. + goto err;
  45462. + info = item_convert_data(pos);
  45463. +
  45464. + ret = checkout_logical_cluster(clust, pos->child, inode);
  45465. + if (ret)
  45466. + goto err;
  45467. +
  45468. + reiser4_deflate_cluster(clust, inode);
  45469. + inc_item_convert_count(pos);
  45470. +
  45471. + /* prepare flow for insertion */
  45472. + fplug->flow_by_inode(inode,
  45473. + (const char __user *)tfm_stream_data(&clust->tc,
  45474. + OUTPUT_STREAM),
  45475. + 0 /* kernel space */ ,
  45476. + clust->tc.len,
  45477. + clust_to_off(clust->index, inode),
  45478. + WRITE_OP, &info->flow);
  45479. + if (clust->tc.hole)
  45480. + info->flow.length = 0;
  45481. +
  45482. + jput(pos->child);
  45483. + return 0;
  45484. + err:
  45485. + jput(pos->child);
  45486. + free_convert_data(pos);
  45487. + return ret;
  45488. +}
  45489. +
  45490. +/* clear up disk cluster info */
  45491. +static void detach_convert_idata(struct convert_info * sq)
  45492. +{
  45493. + struct convert_item_info *info;
  45494. +
  45495. + assert("edward-253", sq != NULL);
  45496. + assert("edward-840", sq->itm != NULL);
  45497. +
  45498. + info = sq->itm;
  45499. + assert("edward-1212", info->flow.length == 0);
  45500. +
  45501. + free_item_convert_data(sq);
  45502. + return;
  45503. +}
  45504. +
  45505. +/* plugin->u.item.f.utmost_child */
  45506. +
  45507. +/* This function sets leftmost child for a first cluster item,
  45508. + if the child exists, and NULL in other cases.
  45509. + NOTE-EDWARD: Do not call this for RIGHT_SIDE */
  45510. +
  45511. +int utmost_child_ctail(const coord_t * coord, sideof side, jnode ** child)
  45512. +{
  45513. + reiser4_key key;
  45514. +
  45515. + item_key_by_coord(coord, &key);
  45516. +
  45517. + assert("edward-257", coord != NULL);
  45518. + assert("edward-258", child != NULL);
  45519. + assert("edward-259", side == LEFT_SIDE);
  45520. + assert("edward-260",
  45521. + item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID));
  45522. +
  45523. + if (!is_disk_cluster_key(&key, coord))
  45524. + *child = NULL;
  45525. + else
  45526. + *child = jlookup(current_tree,
  45527. + get_key_objectid(item_key_by_coord
  45528. + (coord, &key)),
  45529. + off_to_pg(get_key_offset(&key)));
  45530. + return 0;
  45531. +}
  45532. +
  45533. +/*
  45534. + * Set status (d_next) of the first item at the right neighbor
  45535. + *
  45536. + * If the current position is the last item in the node, then
  45537. + * look at its first item at the right neighbor (skip empty nodes).
  45538. + * Note, that right neighbors may be not dirty because of races.
  45539. + * If so, make it dirty and set convertible flag.
  45540. + */
  45541. +static int pre_convert_ctail(flush_pos_t * pos)
  45542. +{
  45543. + int ret = 0;
  45544. + int stop = 0;
  45545. + znode *slider;
  45546. + lock_handle slider_lh;
  45547. + lock_handle right_lh;
  45548. +
  45549. + assert("edward-1232", !node_is_empty(pos->coord.node));
  45550. + assert("edward-1014",
  45551. + pos->coord.item_pos < coord_num_items(&pos->coord));
  45552. + assert("edward-1015", convert_data_attached(pos));
  45553. + assert("edward-1611",
  45554. + item_convert_data(pos)->d_cur != DC_INVALID_STATE);
  45555. + assert("edward-1017",
  45556. + item_convert_data(pos)->d_next == DC_INVALID_STATE);
  45557. +
  45558. + /*
  45559. + * In the following two cases we don't need
  45560. + * to look at right neighbor
  45561. + */
  45562. + if (item_convert_data(pos)->d_cur == DC_AFTER_CLUSTER) {
  45563. + /*
  45564. + * cluster is over, so the first item of the right
  45565. + * neighbor doesn't belong to this cluster
  45566. + */
  45567. + return 0;
  45568. + }
  45569. + if (pos->coord.item_pos < coord_num_items(&pos->coord) - 1) {
  45570. + /*
  45571. + * current position is not the last item in the node,
  45572. + * so the first item of the right neighbor doesn't
  45573. + * belong to this cluster
  45574. + */
  45575. + return 0;
  45576. + }
  45577. + /*
  45578. + * Look at right neighbor.
  45579. + * Note that concurrent truncate is not a problem
  45580. + * since we have locked the beginning of the cluster.
  45581. + */
  45582. + slider = pos->coord.node;
  45583. + init_lh(&slider_lh);
  45584. + init_lh(&right_lh);
  45585. +
  45586. + while (!stop) {
  45587. + coord_t coord;
  45588. +
  45589. + ret = reiser4_get_right_neighbor(&right_lh,
  45590. + slider,
  45591. + ZNODE_WRITE_LOCK,
  45592. + GN_CAN_USE_UPPER_LEVELS);
  45593. + if (ret)
  45594. + break;
  45595. + slider = right_lh.node;
  45596. + ret = zload(slider);
  45597. + if (ret)
  45598. + break;
  45599. + coord_init_before_first_item(&coord, slider);
  45600. +
  45601. + if (node_is_empty(slider)) {
  45602. + warning("edward-1641", "Found empty right neighbor");
  45603. + znode_set_convertible(slider);
  45604. + /*
  45605. + * skip this node,
  45606. + * go rightward
  45607. + */
  45608. + stop = 0;
  45609. + } else if (same_disk_cluster(&pos->coord, &coord)) {
  45610. +
  45611. + item_convert_data(pos)->d_next = DC_CHAINED_ITEM;
  45612. +
  45613. + if (!znode_convertible(slider)) {
  45614. + /*
  45615. + warning("edward-1272",
  45616. + "next slum item mergeable, "
  45617. + "but znode %p isn't convertible\n",
  45618. + lh.node);
  45619. + */
  45620. + znode_set_convertible(slider);
  45621. + }
  45622. + stop = 1;
  45623. + convert_data(pos)->right_locked = 1;
  45624. + } else {
  45625. + item_convert_data(pos)->d_next = DC_AFTER_CLUSTER;
  45626. + stop = 1;
  45627. + convert_data(pos)->right_locked = 1;
  45628. + }
  45629. + zrelse(slider);
  45630. + done_lh(&slider_lh);
  45631. + move_lh(&slider_lh, &right_lh);
  45632. + }
  45633. + if (convert_data(pos)->right_locked)
  45634. + /*
  45635. + * Store locked right neighbor in
  45636. + * the conversion info. Otherwise,
  45637. + * we won't be able to access it,
  45638. + * if the current node gets deleted
  45639. + * during conversion
  45640. + */
  45641. + move_lh(&convert_data(pos)->right_lock, &slider_lh);
  45642. + done_lh(&slider_lh);
  45643. + done_lh(&right_lh);
  45644. +
  45645. + if (ret == -E_NO_NEIGHBOR) {
  45646. + item_convert_data(pos)->d_next = DC_AFTER_CLUSTER;
  45647. + ret = 0;
  45648. + }
  45649. + assert("edward-1610",
  45650. + ergo(ret != 0,
  45651. + item_convert_data(pos)->d_next == DC_INVALID_STATE));
  45652. + return ret;
  45653. +}
  45654. +
  45655. +/*
  45656. + * do some post-conversion actions;
  45657. + * detach conversion data if there is nothing to convert anymore
  45658. + */
  45659. +static void post_convert_ctail(flush_pos_t * pos,
  45660. + ctail_convert_mode_t mode, int old_nr_items)
  45661. +{
  45662. + switch (mode) {
  45663. + case CTAIL_CUT_ITEM:
  45664. + assert("edward-1214", item_convert_data(pos)->flow.length == 0);
  45665. + assert("edward-1215",
  45666. + coord_num_items(&pos->coord) == old_nr_items ||
  45667. + coord_num_items(&pos->coord) == old_nr_items - 1);
  45668. +
  45669. + if (item_convert_data(pos)->d_next == DC_CHAINED_ITEM)
  45670. + /*
  45671. + * the next item belongs to this cluster,
  45672. + * and should be also killed
  45673. + */
  45674. + break;
  45675. + if (coord_num_items(&pos->coord) != old_nr_items) {
  45676. + /*
  45677. + * the latest item in the
  45678. + * cluster has been killed,
  45679. + */
  45680. + detach_convert_idata(pos->sq);
  45681. + if (!node_is_empty(pos->coord.node))
  45682. + /*
  45683. + * make sure the next item will be scanned
  45684. + */
  45685. + coord_init_before_item(&pos->coord);
  45686. + break;
  45687. + }
  45688. + fallthrough;
  45689. + case CTAIL_APPEND_ITEM:
  45690. + /*
  45691. + * in the append mode the whole flow has been inserted
  45692. + * (see COP_INSERT_FLOW primitive)
  45693. + */
  45694. + assert("edward-434", item_convert_data(pos)->flow.length == 0);
  45695. + detach_convert_idata(pos->sq);
  45696. + break;
  45697. + case CTAIL_OVERWRITE_ITEM:
  45698. + if (coord_is_unprepped_ctail(&pos->coord)) {
  45699. + /*
  45700. + * the first (unprepped) ctail has been overwritten;
  45701. + * convert it to the prepped one
  45702. + */
  45703. + assert("edward-1259",
  45704. + cluster_shift_ok(item_convert_data(pos)->
  45705. + cluster_shift));
  45706. + put_unaligned((d8)item_convert_data(pos)->cluster_shift,
  45707. + &ctail_formatted_at(&pos->coord)->
  45708. + cluster_shift);
  45709. + }
  45710. + break;
  45711. + default:
  45712. + impossible("edward-1609", "Bad ctail conversion mode");
  45713. + }
  45714. +}
  45715. +
  45716. +static int assign_conversion_mode(flush_pos_t * pos, ctail_convert_mode_t *mode)
  45717. +{
  45718. + int ret = 0;
  45719. +
  45720. + *mode = CTAIL_INVAL_CONVERT_MODE;
  45721. +
  45722. + if (!convert_data_attached(pos)) {
  45723. + if (should_attach_convert_idata(pos)) {
  45724. + struct inode *inode;
  45725. + gfp_t old_mask = get_current_context()->gfp_mask;
  45726. +
  45727. + assert("edward-264", pos->child != NULL);
  45728. + assert("edward-265", jnode_page(pos->child) != NULL);
  45729. + assert("edward-266",
  45730. + jnode_page(pos->child)->mapping != NULL);
  45731. +
  45732. + inode = jnode_page(pos->child)->mapping->host;
  45733. +
  45734. + assert("edward-267", inode != NULL);
  45735. + /*
  45736. + * attach new convert item info
  45737. + */
  45738. + get_current_context()->gfp_mask |= __GFP_NOFAIL;
  45739. + ret = attach_convert_idata(pos, inode);
  45740. + get_current_context()->gfp_mask = old_mask;
  45741. + pos->child = NULL;
  45742. + if (ret == -E_REPEAT) {
  45743. + /*
  45744. + * jnode became clean, or there is no dirty
  45745. + * pages (nothing to update in disk cluster)
  45746. + */
  45747. + warning("edward-1021",
  45748. + "convert_ctail: nothing to attach");
  45749. + ret = 0;
  45750. + goto dont_convert;
  45751. + }
  45752. + if (ret)
  45753. + goto dont_convert;
  45754. +
  45755. + if (pos->sq->clust.tc.hole) {
  45756. + assert("edward-1634",
  45757. + item_convert_data(pos)->flow.length == 0);
  45758. + /*
  45759. + * new content is filled with zeros -
  45760. + * we punch a hole using cut (not kill)
  45761. + * primitive, so attached pages won't
  45762. + * be truncated
  45763. + */
  45764. + *mode = CTAIL_CUT_ITEM;
  45765. + }
  45766. + else
  45767. + /*
  45768. + * this is the first ctail in the cluster,
  45769. + * so it (may be only its head) should be
  45770. + * overwritten
  45771. + */
  45772. + *mode = CTAIL_OVERWRITE_ITEM;
  45773. + } else
  45774. + /*
  45775. + * non-convertible item
  45776. + */
  45777. + goto dont_convert;
  45778. + } else {
  45779. + /*
  45780. + * use old convert info
  45781. + */
  45782. + struct convert_item_info *idata;
  45783. + idata = item_convert_data(pos);
  45784. +
  45785. + switch (idata->d_cur) {
  45786. + case DC_FIRST_ITEM:
  45787. + case DC_CHAINED_ITEM:
  45788. + if (idata->flow.length)
  45789. + *mode = CTAIL_OVERWRITE_ITEM;
  45790. + else
  45791. + *mode = CTAIL_CUT_ITEM;
  45792. + break;
  45793. + case DC_AFTER_CLUSTER:
  45794. + if (idata->flow.length)
  45795. + *mode = CTAIL_APPEND_ITEM;
  45796. + else {
  45797. + /*
  45798. + * nothing to update anymore
  45799. + */
  45800. + detach_convert_idata(pos->sq);
  45801. + goto dont_convert;
  45802. + }
  45803. + break;
  45804. + default:
  45805. + impossible("edward-1018",
  45806. + "wrong current item state");
  45807. + ret = RETERR(-EIO);
  45808. + goto dont_convert;
  45809. + }
  45810. + }
  45811. + /*
  45812. + * ok, ctail will be converted
  45813. + */
  45814. + assert("edward-433", convert_data_attached(pos));
  45815. + assert("edward-1022",
  45816. + pos->coord.item_pos < coord_num_items(&pos->coord));
  45817. + return 0;
  45818. + dont_convert:
  45819. + return ret;
  45820. +}
  45821. +
  45822. +/*
  45823. + * perform an operation on the ctail item in
  45824. + * accordance with assigned conversion @mode
  45825. + */
  45826. +static int do_convert_ctail(flush_pos_t * pos, ctail_convert_mode_t mode)
  45827. +{
  45828. + int result = 0;
  45829. + struct convert_item_info * info;
  45830. +
  45831. + assert("edward-468", pos != NULL);
  45832. + assert("edward-469", pos->sq != NULL);
  45833. + assert("edward-845", item_convert_data(pos) != NULL);
  45834. +
  45835. + info = item_convert_data(pos);
  45836. + assert("edward-679", info->flow.data != NULL);
  45837. +
  45838. + switch (mode) {
  45839. + case CTAIL_APPEND_ITEM:
  45840. + assert("edward-1229", info->flow.length != 0);
  45841. + assert("edward-1256",
  45842. + cluster_shift_ok(cluster_shift_by_coord(&pos->coord)));
  45843. + /*
  45844. + * insert flow without balancing
  45845. + * (see comments to convert_node())
  45846. + */
  45847. + result = insert_cryptcompress_flow_in_place(&pos->coord,
  45848. + &pos->lock,
  45849. + &info->flow,
  45850. + info->cluster_shift);
  45851. + break;
  45852. + case CTAIL_OVERWRITE_ITEM:
  45853. + assert("edward-1230", info->flow.length != 0);
  45854. + overwrite_ctail(&pos->coord, &info->flow);
  45855. + if (info->flow.length != 0)
  45856. + break;
  45857. + fallthrough;
  45858. + /* cut the rest of item (if any) */
  45859. + case CTAIL_CUT_ITEM:
  45860. + assert("edward-1231", info->flow.length == 0);
  45861. + result = cut_ctail(&pos->coord);
  45862. + break;
  45863. + default:
  45864. + result = RETERR(-EIO);
  45865. + impossible("edward-244", "bad ctail conversion mode");
  45866. + }
  45867. + return result;
  45868. +}
  45869. +
  45870. +/*
  45871. + * plugin->u.item.f.convert
  45872. + *
  45873. + * Convert ctail items at flush time
  45874. + */
  45875. +int convert_ctail(flush_pos_t * pos)
  45876. +{
  45877. + int ret;
  45878. + int old_nr_items;
  45879. + ctail_convert_mode_t mode;
  45880. +
  45881. + assert("edward-1020", pos != NULL);
  45882. + assert("edward-1213", coord_num_items(&pos->coord) != 0);
  45883. + assert("edward-1257", item_id_by_coord(&pos->coord) == CTAIL_ID);
  45884. + assert("edward-1258", ctail_ok(&pos->coord));
  45885. + assert("edward-261", pos->coord.node != NULL);
  45886. +
  45887. + old_nr_items = coord_num_items(&pos->coord);
  45888. + /*
  45889. + * detach old conversion data and
  45890. + * attach a new one, if needed
  45891. + */
  45892. + ret = assign_conversion_mode(pos, &mode);
  45893. + if (ret || mode == CTAIL_INVAL_CONVERT_MODE) {
  45894. + assert("edward-1633", !convert_data_attached(pos));
  45895. + return ret;
  45896. + }
  45897. + /*
  45898. + * find out the status of the right neighbor
  45899. + */
  45900. + ret = pre_convert_ctail(pos);
  45901. + if (ret) {
  45902. + detach_convert_idata(pos->sq);
  45903. + return ret;
  45904. + }
  45905. + ret = do_convert_ctail(pos, mode);
  45906. + if (ret) {
  45907. + detach_convert_idata(pos->sq);
  45908. + return ret;
  45909. + }
  45910. + /*
  45911. + * detach old conversion data if needed
  45912. + */
  45913. + post_convert_ctail(pos, mode, old_nr_items);
  45914. + return 0;
  45915. +}
  45916. +
  45917. +/*
  45918. + Local variables:
  45919. + c-indentation-style: "K&R"
  45920. + mode-name: "LC"
  45921. + c-basic-offset: 8
  45922. + tab-width: 8
  45923. + fill-column: 120
  45924. + End:
  45925. +*/
  45926. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/ctail.h linux-5.16.14/fs/reiser4/plugin/item/ctail.h
  45927. --- linux-5.16.14.orig/fs/reiser4/plugin/item/ctail.h 1970-01-01 01:00:00.000000000 +0100
  45928. +++ linux-5.16.14/fs/reiser4/plugin/item/ctail.h 2022-03-12 13:26:19.676892787 +0100
  45929. @@ -0,0 +1,102 @@
  45930. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  45931. +
  45932. +/* Ctail items are fragments (or bodies) of special tipe to provide
  45933. + optimal storage of encrypted and(or) compressed files. */
  45934. +
  45935. +
  45936. +#if !defined( __FS_REISER4_CTAIL_H__ )
  45937. +#define __FS_REISER4_CTAIL_H__
  45938. +
  45939. +/* Disk format of ctail item */
  45940. +typedef struct ctail_item_format {
  45941. + /* packed shift;
  45942. + if its value is different from UCTAIL_SHIFT (see below), then
  45943. + size of disk cluster is calculated as (1 << cluster_shift) */
  45944. + d8 cluster_shift;
  45945. + /* ctail body */
  45946. + d8 body[0];
  45947. +} __attribute__ ((packed)) ctail_item_format;
  45948. +
  45949. +/* "Unprepped" disk cluster is represented by a single ctail item
  45950. + with the following "magic" attributes: */
  45951. +/* "magic" cluster_shift */
  45952. +#define UCTAIL_SHIFT 0xff
  45953. +/* How many units unprepped ctail item has */
  45954. +#define UCTAIL_NR_UNITS 1
  45955. +
  45956. +/* The following is a set of various item states in a disk cluster.
  45957. + Disk cluster is a set of items whose keys belong to the interval
  45958. + [dc_key , dc_key + disk_cluster_size - 1] */
  45959. +typedef enum {
  45960. + DC_INVALID_STATE = 0,
  45961. + DC_FIRST_ITEM = 1,
  45962. + DC_CHAINED_ITEM = 2,
  45963. + DC_AFTER_CLUSTER = 3
  45964. +} dc_item_stat;
  45965. +
  45966. +/* ctail-specific extension.
  45967. + In particular this describes parameters of disk cluster an item belongs to */
  45968. +struct ctail_coord_extension {
  45969. + int shift; /* this contains cluster_shift extracted from
  45970. + ctail_item_format (above), or UCTAIL_SHIFT
  45971. + (the last one is the "magic" of unprepped disk clusters)*/
  45972. + int dsize; /* size of a prepped disk cluster */
  45973. + int ncount; /* count of nodes occupied by a disk cluster */
  45974. +};
  45975. +
  45976. +struct cut_list;
  45977. +
  45978. +/* plugin->item.b.* */
  45979. +int can_contain_key_ctail(const coord_t *, const reiser4_key *,
  45980. + const reiser4_item_data *);
  45981. +int mergeable_ctail(const coord_t * p1, const coord_t * p2);
  45982. +pos_in_node_t nr_units_ctail(const coord_t * coord);
  45983. +int estimate_ctail(const coord_t * coord, const reiser4_item_data * data);
  45984. +void print_ctail(const char *prefix, coord_t * coord);
  45985. +lookup_result lookup_ctail(const reiser4_key *, lookup_bias, coord_t *);
  45986. +
  45987. +int paste_ctail(coord_t * coord, reiser4_item_data * data,
  45988. + carry_plugin_info * info UNUSED_ARG);
  45989. +int init_ctail(coord_t *, coord_t *, reiser4_item_data *);
  45990. +int can_shift_ctail(unsigned free_space, coord_t * coord,
  45991. + znode * target, shift_direction pend, unsigned *size,
  45992. + unsigned want);
  45993. +void copy_units_ctail(coord_t * target, coord_t * source, unsigned from,
  45994. + unsigned count, shift_direction where_is_free_space,
  45995. + unsigned free_space);
  45996. +int cut_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  45997. + carry_cut_data *, reiser4_key * smallest_removed,
  45998. + reiser4_key * new_first);
  45999. +int kill_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  46000. + carry_kill_data *, reiser4_key * smallest_removed,
  46001. + reiser4_key * new_first);
  46002. +int ctail_ok(const coord_t * coord);
  46003. +int check_ctail(const coord_t * coord, const char **error);
  46004. +
  46005. +/* plugin->u.item.s.* */
  46006. +int read_ctail(flow_t *, hint_t *, struct kiocb *, struct iov_iter *);
  46007. +int readpage_ctail(void *, struct page *);
  46008. +int readpages_ctail(struct file *, struct address_space *, struct list_head *);
  46009. +reiser4_key *append_key_ctail(const coord_t *, reiser4_key *);
  46010. +int create_hook_ctail(const coord_t * coord, void *arg);
  46011. +int kill_hook_ctail(const coord_t *, pos_in_node_t, pos_in_node_t,
  46012. + carry_kill_data *);
  46013. +int shift_hook_ctail(const coord_t *, unsigned, unsigned, znode *);
  46014. +
  46015. +/* plugin->u.item.f */
  46016. +int utmost_child_ctail(const coord_t *, sideof, jnode **);
  46017. +int scan_ctail(flush_scan *);
  46018. +int convert_ctail(flush_pos_t *);
  46019. +size_t inode_scaled_cluster_size(struct inode *);
  46020. +
  46021. +#endif /* __FS_REISER4_CTAIL_H__ */
  46022. +
  46023. +/* Make Linus happy.
  46024. + Local variables:
  46025. + c-indentation-style: "K&R"
  46026. + mode-name: "LC"
  46027. + c-basic-offset: 8
  46028. + tab-width: 8
  46029. + fill-column: 120
  46030. + End:
  46031. +*/
  46032. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/extent.c linux-5.16.14/fs/reiser4/plugin/item/extent.c
  46033. --- linux-5.16.14.orig/fs/reiser4/plugin/item/extent.c 1970-01-01 01:00:00.000000000 +0100
  46034. +++ linux-5.16.14/fs/reiser4/plugin/item/extent.c 2022-03-12 13:26:19.676892787 +0100
  46035. @@ -0,0 +1,197 @@
  46036. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  46037. +
  46038. +#include "item.h"
  46039. +#include "../../key.h"
  46040. +#include "../../super.h"
  46041. +#include "../../carry.h"
  46042. +#include "../../inode.h"
  46043. +#include "../../page_cache.h"
  46044. +#include "../../flush.h"
  46045. +#include "../object.h"
  46046. +
  46047. +/* prepare structure reiser4_item_data. It is used to put one extent unit into tree */
  46048. +/* Audited by: green(2002.06.13) */
  46049. +reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit,
  46050. + int nr_extents)
  46051. +{
  46052. + data->data = ext_unit;
  46053. + /* data->data is kernel space */
  46054. + data->user = 0;
  46055. + data->length = sizeof(reiser4_extent) * nr_extents;
  46056. + data->arg = NULL;
  46057. + data->iplug = item_plugin_by_id(EXTENT_POINTER_ID);
  46058. + return data;
  46059. +}
  46060. +
  46061. +/* how many bytes are addressed by @nr first extents of the extent item */
  46062. +reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr)
  46063. +{
  46064. + pos_in_node_t i;
  46065. + reiser4_block_nr blocks;
  46066. + reiser4_extent *ext;
  46067. +
  46068. + ext = item_body_by_coord(coord);
  46069. + assert("vs-263", nr <= nr_units_extent(coord));
  46070. +
  46071. + blocks = 0;
  46072. + for (i = 0; i < nr; i++, ext++) {
  46073. + blocks += extent_get_width(ext);
  46074. + }
  46075. +
  46076. + return blocks * current_blocksize;
  46077. +}
  46078. +
  46079. +extent_state state_of_extent(reiser4_extent * ext)
  46080. +{
  46081. + switch ((int)extent_get_start(ext)) {
  46082. + case 0:
  46083. + return HOLE_EXTENT;
  46084. + case 1:
  46085. + return UNALLOCATED_EXTENT;
  46086. + default:
  46087. + break;
  46088. + }
  46089. + return ALLOCATED_EXTENT;
  46090. +}
  46091. +
  46092. +int extent_is_unallocated(const coord_t * item)
  46093. +{
  46094. + assert("jmacd-5133", item_is_extent(item));
  46095. +
  46096. + return state_of_extent(extent_by_coord(item)) == UNALLOCATED_EXTENT;
  46097. +}
  46098. +
  46099. +/* set extent's start and width */
  46100. +void reiser4_set_extent(reiser4_extent * ext, reiser4_block_nr start,
  46101. + reiser4_block_nr width)
  46102. +{
  46103. + extent_set_start(ext, start);
  46104. + extent_set_width(ext, width);
  46105. +}
  46106. +
  46107. +/**
  46108. + * reiser4_replace_extent - replace extent and paste 1 or 2 after it
  46109. + * @un_extent: coordinate of extent to be overwritten
  46110. + * @lh: need better comment
  46111. + * @key: need better comment
  46112. + * @exts_to_add: data prepared for insertion into tree
  46113. + * @replace: need better comment
  46114. + * @flags: need better comment
  46115. + * @return_insert_position: need better comment
  46116. + *
  46117. + * Overwrites one extent, pastes 1 or 2 more ones after overwritten one. If
  46118. + * @return_inserted_position is 1 - @un_extent and @lh are returned set to
  46119. + * first of newly inserted units, if it is 0 - @un_extent and @lh are returned
  46120. + * set to extent which was overwritten.
  46121. + */
  46122. +int reiser4_replace_extent(struct replace_handle *h,
  46123. + int return_inserted_position)
  46124. +{
  46125. + int result;
  46126. + znode *orig_znode;
  46127. + /*ON_DEBUG(reiser4_extent orig_ext);*/ /* this is for debugging */
  46128. +
  46129. + assert("vs-990", coord_is_existing_unit(h->coord));
  46130. + assert("vs-1375", znode_is_write_locked(h->coord->node));
  46131. + assert("vs-1426", extent_get_width(&h->overwrite) != 0);
  46132. + assert("vs-1427", extent_get_width(&h->new_extents[0]) != 0);
  46133. + assert("vs-1427", ergo(h->nr_new_extents == 2,
  46134. + extent_get_width(&h->new_extents[1]) != 0));
  46135. +
  46136. + /* compose structure for paste */
  46137. + init_new_extent(&h->item, &h->new_extents[0], h->nr_new_extents);
  46138. +
  46139. + coord_dup(&h->coord_after, h->coord);
  46140. + init_lh(&h->lh_after);
  46141. + copy_lh(&h->lh_after, h->lh);
  46142. + reiser4_tap_init(&h->watch, &h->coord_after, &h->lh_after, ZNODE_WRITE_LOCK);
  46143. + reiser4_tap_monitor(&h->watch);
  46144. +
  46145. + ON_DEBUG(h->orig_ext = *extent_by_coord(h->coord));
  46146. + orig_znode = h->coord->node;
  46147. +
  46148. +#if REISER4_DEBUG
  46149. + /* make sure that key is set properly */
  46150. + unit_key_by_coord(h->coord, &h->tmp);
  46151. + set_key_offset(&h->tmp,
  46152. + get_key_offset(&h->tmp) +
  46153. + extent_get_width(&h->overwrite) * current_blocksize);
  46154. + assert("vs-1080", keyeq(&h->tmp, &h->paste_key));
  46155. +#endif
  46156. +
  46157. + /* set insert point after unit to be replaced */
  46158. + h->coord->between = AFTER_UNIT;
  46159. +
  46160. + result = insert_into_item(h->coord, return_inserted_position ? h->lh : NULL,
  46161. + &h->paste_key, &h->item, h->flags);
  46162. + if (!result) {
  46163. + /* now we have to replace the unit after which new units were
  46164. + inserted. Its position is tracked by @watch */
  46165. + reiser4_extent *ext;
  46166. + znode *node;
  46167. +
  46168. + node = h->coord_after.node;
  46169. + if (node != orig_znode) {
  46170. + coord_clear_iplug(&h->coord_after);
  46171. + result = zload(node);
  46172. + }
  46173. +
  46174. + if (likely(!result)) {
  46175. + ext = extent_by_coord(&h->coord_after);
  46176. +
  46177. + assert("vs-987", znode_is_loaded(node));
  46178. + assert("vs-988", !memcmp(ext, &h->orig_ext, sizeof(*ext)));
  46179. +
  46180. + /* overwrite extent unit */
  46181. + memcpy(ext, &h->overwrite, sizeof(reiser4_extent));
  46182. + znode_make_dirty(node);
  46183. +
  46184. + if (node != orig_znode)
  46185. + zrelse(node);
  46186. +
  46187. + if (return_inserted_position == 0) {
  46188. + /* coord and lh are to be set to overwritten
  46189. + extent */
  46190. + assert("vs-1662",
  46191. + WITH_DATA(node, !memcmp(&h->overwrite,
  46192. + extent_by_coord(
  46193. + &h->coord_after),
  46194. + sizeof(reiser4_extent))));
  46195. +
  46196. + *h->coord = h->coord_after;
  46197. + done_lh(h->lh);
  46198. + copy_lh(h->lh, &h->lh_after);
  46199. + } else {
  46200. + /* h->coord and h->lh are to be set to first of
  46201. + inserted units */
  46202. + assert("vs-1663",
  46203. + WITH_DATA(h->coord->node,
  46204. + !memcmp(&h->new_extents[0],
  46205. + extent_by_coord(h->coord),
  46206. + sizeof(reiser4_extent))));
  46207. + assert("vs-1664", h->lh->node == h->coord->node);
  46208. + }
  46209. + }
  46210. + }
  46211. + reiser4_tap_done(&h->watch);
  46212. +
  46213. + return result;
  46214. +}
  46215. +
  46216. +lock_handle *znode_lh(znode *node)
  46217. +{
  46218. + assert("vs-1371", znode_is_write_locked(node));
  46219. + assert("vs-1372", znode_is_wlocked_once(node));
  46220. + return list_entry(node->lock.owners.next, lock_handle, owners_link);
  46221. +}
  46222. +
  46223. +/*
  46224. + * Local variables:
  46225. + * c-indentation-style: "K&R"
  46226. + * mode-name: "LC"
  46227. + * c-basic-offset: 8
  46228. + * tab-width: 8
  46229. + * fill-column: 79
  46230. + * scroll-step: 1
  46231. + * End:
  46232. + */
  46233. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/extent_file_ops.c linux-5.16.14/fs/reiser4/plugin/item/extent_file_ops.c
  46234. --- linux-5.16.14.orig/fs/reiser4/plugin/item/extent_file_ops.c 1970-01-01 01:00:00.000000000 +0100
  46235. +++ linux-5.16.14/fs/reiser4/plugin/item/extent_file_ops.c 2022-03-12 13:26:19.677892789 +0100
  46236. @@ -0,0 +1,1387 @@
  46237. +/* COPYRIGHT 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  46238. +
  46239. +#include "item.h"
  46240. +#include "../../inode.h"
  46241. +#include "../../page_cache.h"
  46242. +#include "../object.h"
  46243. +
  46244. +#include <linux/swap.h>
  46245. +#include <linux/uio.h>
  46246. +
  46247. +static inline reiser4_extent *ext_by_offset(const znode *node, int offset)
  46248. +{
  46249. + reiser4_extent *ext;
  46250. +
  46251. + ext = (reiser4_extent *) (zdata(node) + offset);
  46252. + return ext;
  46253. +}
  46254. +
  46255. +/**
  46256. + * check_uf_coord - verify coord extension
  46257. + * @uf_coord:
  46258. + * @key:
  46259. + *
  46260. + * Makes sure that all fields of @uf_coord are set properly. If @key is
  46261. + * specified - check whether @uf_coord is set correspondingly.
  46262. + */
  46263. +static void check_uf_coord(const uf_coord_t *uf_coord, const reiser4_key *key)
  46264. +{
  46265. +#if REISER4_DEBUG
  46266. + const coord_t *coord;
  46267. + const struct extent_coord_extension *ext_coord;
  46268. + reiser4_extent *ext;
  46269. +
  46270. + coord = &uf_coord->coord;
  46271. + ext_coord = &uf_coord->extension.extent;
  46272. + ext = ext_by_offset(coord->node, uf_coord->extension.extent.ext_offset);
  46273. +
  46274. + assert("",
  46275. + WITH_DATA(coord->node,
  46276. + (uf_coord->valid == 1 &&
  46277. + coord_is_iplug_set(coord) &&
  46278. + item_is_extent(coord) &&
  46279. + ext_coord->nr_units == nr_units_extent(coord) &&
  46280. + ext == extent_by_coord(coord) &&
  46281. + ext_coord->width == extent_get_width(ext) &&
  46282. + coord->unit_pos < ext_coord->nr_units &&
  46283. + ext_coord->pos_in_unit < ext_coord->width &&
  46284. + memcmp(ext, &ext_coord->extent,
  46285. + sizeof(reiser4_extent)) == 0)));
  46286. + if (key) {
  46287. + reiser4_key coord_key;
  46288. +
  46289. + unit_key_by_coord(&uf_coord->coord, &coord_key);
  46290. + set_key_offset(&coord_key,
  46291. + get_key_offset(&coord_key) +
  46292. + (uf_coord->extension.extent.
  46293. + pos_in_unit << PAGE_SHIFT));
  46294. + assert("", keyeq(key, &coord_key));
  46295. + }
  46296. +#endif
  46297. +}
  46298. +
  46299. +static inline reiser4_extent *ext_by_ext_coord(const uf_coord_t *uf_coord)
  46300. +{
  46301. + return ext_by_offset(uf_coord->coord.node,
  46302. + uf_coord->extension.extent.ext_offset);
  46303. +}
  46304. +
  46305. +#if REISER4_DEBUG
  46306. +
  46307. +/**
  46308. + * offset_is_in_unit
  46309. + *
  46310. + *
  46311. + *
  46312. + */
  46313. +/* return 1 if offset @off is inside of extent unit pointed to by @coord. Set
  46314. + pos_in_unit inside of unit correspondingly */
  46315. +static int offset_is_in_unit(const coord_t *coord, loff_t off)
  46316. +{
  46317. + reiser4_key unit_key;
  46318. + __u64 unit_off;
  46319. + reiser4_extent *ext;
  46320. +
  46321. + ext = extent_by_coord(coord);
  46322. +
  46323. + unit_key_extent(coord, &unit_key);
  46324. + unit_off = get_key_offset(&unit_key);
  46325. + if (off < unit_off)
  46326. + return 0;
  46327. + if (off >= (unit_off + (current_blocksize * extent_get_width(ext))))
  46328. + return 0;
  46329. + return 1;
  46330. +}
  46331. +
  46332. +static int
  46333. +coord_matches_key_extent(const coord_t * coord, const reiser4_key * key)
  46334. +{
  46335. + reiser4_key item_key;
  46336. +
  46337. + assert("vs-771", coord_is_existing_unit(coord));
  46338. + assert("vs-1258", keylt(key, append_key_extent(coord, &item_key)));
  46339. + assert("vs-1259", keyge(key, item_key_by_coord(coord, &item_key)));
  46340. +
  46341. + return offset_is_in_unit(coord, get_key_offset(key));
  46342. +}
  46343. +
  46344. +#endif
  46345. +
  46346. +/**
  46347. + * can_append -
  46348. + * @key:
  46349. + * @coord:
  46350. + *
  46351. + * Returns 1 if @key is equal to an append key of item @coord is set to
  46352. + */
  46353. +static int can_append(const reiser4_key *key, const coord_t *coord)
  46354. +{
  46355. + reiser4_key append_key;
  46356. +
  46357. + return keyeq(key, append_key_extent(coord, &append_key));
  46358. +}
  46359. +
  46360. +/**
  46361. + * append_hole
  46362. + * @coord:
  46363. + * @lh:
  46364. + * @key:
  46365. + *
  46366. + */
  46367. +static int append_hole(coord_t *coord, lock_handle *lh,
  46368. + const reiser4_key *key)
  46369. +{
  46370. + reiser4_key append_key;
  46371. + reiser4_block_nr hole_width;
  46372. + reiser4_extent *ext, new_ext;
  46373. + reiser4_item_data idata;
  46374. +
  46375. + /* last item of file may have to be appended with hole */
  46376. + assert("vs-708", znode_get_level(coord->node) == TWIG_LEVEL);
  46377. + assert("vs-714", item_id_by_coord(coord) == EXTENT_POINTER_ID);
  46378. +
  46379. + /* key of first byte which is not addressed by this extent */
  46380. + append_key_extent(coord, &append_key);
  46381. +
  46382. + assert("", keyle(&append_key, key));
  46383. +
  46384. + /*
  46385. + * extent item has to be appended with hole. Calculate length of that
  46386. + * hole
  46387. + */
  46388. + hole_width = ((get_key_offset(key) - get_key_offset(&append_key) +
  46389. + current_blocksize - 1) >> current_blocksize_bits);
  46390. + assert("vs-954", hole_width > 0);
  46391. +
  46392. + /* set coord after last unit */
  46393. + coord_init_after_item_end(coord);
  46394. +
  46395. + /* get last extent in the item */
  46396. + ext = extent_by_coord(coord);
  46397. + if (state_of_extent(ext) == HOLE_EXTENT) {
  46398. + /*
  46399. + * last extent of a file is hole extent. Widen that extent by
  46400. + * @hole_width blocks. Note that we do not worry about
  46401. + * overflowing - extent width is 64 bits
  46402. + */
  46403. + reiser4_set_extent(ext, HOLE_EXTENT_START,
  46404. + extent_get_width(ext) + hole_width);
  46405. + znode_make_dirty(coord->node);
  46406. + return 0;
  46407. + }
  46408. +
  46409. + /* append last item of the file with hole extent unit */
  46410. + assert("vs-713", (state_of_extent(ext) == ALLOCATED_EXTENT ||
  46411. + state_of_extent(ext) == UNALLOCATED_EXTENT));
  46412. +
  46413. + reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width);
  46414. + init_new_extent(&idata, &new_ext, 1);
  46415. + return insert_into_item(coord, lh, &append_key, &idata, 0);
  46416. +}
  46417. +
  46418. +/**
  46419. + * check_jnodes
  46420. + * @twig: longterm locked twig node
  46421. + * @key:
  46422. + *
  46423. + */
  46424. +static void check_jnodes(znode *twig, const reiser4_key *key, int count)
  46425. +{
  46426. +#if REISER4_DEBUG
  46427. + coord_t c;
  46428. + reiser4_key node_key, jnode_key;
  46429. +
  46430. + jnode_key = *key;
  46431. +
  46432. + assert("", twig != NULL);
  46433. + assert("", znode_get_level(twig) == TWIG_LEVEL);
  46434. + assert("", znode_is_write_locked(twig));
  46435. +
  46436. + zload(twig);
  46437. + /* get the smallest key in twig node */
  46438. + coord_init_first_unit(&c, twig);
  46439. + unit_key_by_coord(&c, &node_key);
  46440. + assert("", keyle(&node_key, &jnode_key));
  46441. +
  46442. + coord_init_last_unit(&c, twig);
  46443. + unit_key_by_coord(&c, &node_key);
  46444. + if (item_plugin_by_coord(&c)->s.file.append_key)
  46445. + item_plugin_by_coord(&c)->s.file.append_key(&c, &node_key);
  46446. + set_key_offset(&jnode_key,
  46447. + get_key_offset(&jnode_key) + (loff_t)count * PAGE_SIZE - 1);
  46448. + assert("", keylt(&jnode_key, &node_key));
  46449. + zrelse(twig);
  46450. +#endif
  46451. +}
  46452. +
  46453. +/**
  46454. + * append_last_extent - append last file item
  46455. + * @uf_coord: coord to start insertion from
  46456. + * @jnodes: array of jnodes
  46457. + * @count: number of jnodes in the array
  46458. + *
  46459. + * There is already at least one extent item of file @inode in the tree. Append
  46460. + * the last of them with unallocated extent unit of width @count. Assign
  46461. + * fake block numbers to jnodes corresponding to the inserted extent.
  46462. + */
  46463. +static int append_last_extent(uf_coord_t *uf_coord, const reiser4_key *key,
  46464. + jnode **jnodes, int count)
  46465. +{
  46466. + int result;
  46467. + reiser4_extent new_ext;
  46468. + reiser4_item_data idata;
  46469. + coord_t *coord;
  46470. + struct extent_coord_extension *ext_coord;
  46471. + reiser4_extent *ext;
  46472. + reiser4_block_nr block;
  46473. + jnode *node;
  46474. + int i;
  46475. +
  46476. + coord = &uf_coord->coord;
  46477. + ext_coord = &uf_coord->extension.extent;
  46478. + ext = ext_by_ext_coord(uf_coord);
  46479. +
  46480. + /* check correctness of position in the item */
  46481. + assert("vs-228", coord->unit_pos == coord_last_unit_pos(coord));
  46482. + assert("vs-1311", coord->between == AFTER_UNIT);
  46483. + assert("vs-1302", ext_coord->pos_in_unit == ext_coord->width - 1);
  46484. +
  46485. + if (!can_append(key, coord)) {
  46486. + /* hole extent has to be inserted */
  46487. + result = append_hole(coord, uf_coord->lh, key);
  46488. + uf_coord->valid = 0;
  46489. + return result;
  46490. + }
  46491. +
  46492. + if (count == 0)
  46493. + return 0;
  46494. +
  46495. + assert("", get_key_offset(key) == (loff_t)index_jnode(jnodes[0]) * PAGE_SIZE);
  46496. +
  46497. + inode_add_blocks(mapping_jnode(jnodes[0])->host, count);
  46498. +
  46499. + switch (state_of_extent(ext)) {
  46500. + case UNALLOCATED_EXTENT:
  46501. + /*
  46502. + * last extent unit of the file is unallocated one. Increase
  46503. + * its width by @count
  46504. + */
  46505. + reiser4_set_extent(ext, UNALLOCATED_EXTENT_START,
  46506. + extent_get_width(ext) + count);
  46507. + znode_make_dirty(coord->node);
  46508. +
  46509. + /* update coord extension */
  46510. + ext_coord->width += count;
  46511. + ON_DEBUG(extent_set_width
  46512. + (&uf_coord->extension.extent.extent,
  46513. + ext_coord->width));
  46514. + break;
  46515. +
  46516. + case HOLE_EXTENT:
  46517. + case ALLOCATED_EXTENT:
  46518. + /*
  46519. + * last extent unit of the file is either hole or allocated
  46520. + * one. Append one unallocated extent of width @count
  46521. + */
  46522. + reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count);
  46523. + init_new_extent(&idata, &new_ext, 1);
  46524. + result = insert_into_item(coord, uf_coord->lh, key, &idata, 0);
  46525. + uf_coord->valid = 0;
  46526. + if (result)
  46527. + return result;
  46528. + break;
  46529. +
  46530. + default:
  46531. + return RETERR(-EIO);
  46532. + }
  46533. +
  46534. + /*
  46535. + * make sure that we hold long term locked twig node containing all
  46536. + * jnodes we are about to capture
  46537. + */
  46538. + check_jnodes(uf_coord->lh->node, key, count);
  46539. +
  46540. + /*
  46541. + * assign fake block numbers to all jnodes. FIXME: make sure whether
  46542. + * twig node containing inserted extent item is locked
  46543. + */
  46544. + block = fake_blocknr_unformatted(count);
  46545. + for (i = 0; i < count; i ++, block ++) {
  46546. + node = jnodes[i];
  46547. + spin_lock_jnode(node);
  46548. + JF_SET(node, JNODE_CREATED);
  46549. + jnode_set_block(node, &block);
  46550. + result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  46551. + BUG_ON(result != 0);
  46552. + jnode_make_dirty_locked(node);
  46553. + spin_unlock_jnode(node);
  46554. + }
  46555. + return count;
  46556. +}
  46557. +
  46558. +/**
  46559. + * insert_first_hole - inser hole extent into tree
  46560. + * @coord:
  46561. + * @lh:
  46562. + * @key:
  46563. + *
  46564. + *
  46565. + */
  46566. +static int insert_first_hole(coord_t *coord, lock_handle *lh,
  46567. + const reiser4_key *key)
  46568. +{
  46569. + reiser4_extent new_ext;
  46570. + reiser4_item_data idata;
  46571. + reiser4_key item_key;
  46572. + reiser4_block_nr hole_width;
  46573. +
  46574. + /* @coord must be set for inserting of new item */
  46575. + assert("vs-711", coord_is_between_items(coord));
  46576. +
  46577. + item_key = *key;
  46578. + set_key_offset(&item_key, 0ull);
  46579. +
  46580. + hole_width = ((get_key_offset(key) + current_blocksize - 1) >>
  46581. + current_blocksize_bits);
  46582. + assert("vs-710", hole_width > 0);
  46583. +
  46584. + /* compose body of hole extent and insert item into tree */
  46585. + reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width);
  46586. + init_new_extent(&idata, &new_ext, 1);
  46587. + return insert_extent_by_coord(coord, &idata, &item_key, lh);
  46588. +}
  46589. +
  46590. +
  46591. +/**
  46592. + * insert_first_extent - insert first file item
  46593. + * @inode: inode of file
  46594. + * @uf_coord: coord to start insertion from
  46595. + * @jnodes: array of jnodes
  46596. + * @count: number of jnodes in the array
  46597. + * @inode:
  46598. + *
  46599. + * There are no items of file @inode in the tree yet. Insert unallocated extent
  46600. + * of width @count into tree or hole extent if writing not to the
  46601. + * beginning. Assign fake block numbers to jnodes corresponding to the inserted
  46602. + * unallocated extent. Returns number of jnodes or error code.
  46603. + */
  46604. +static int insert_first_extent(uf_coord_t *uf_coord, const reiser4_key *key,
  46605. + jnode **jnodes, int count,
  46606. + struct inode *inode)
  46607. +{
  46608. + int result;
  46609. + int i;
  46610. + reiser4_extent new_ext;
  46611. + reiser4_item_data idata;
  46612. + reiser4_block_nr block;
  46613. + struct unix_file_info *uf_info;
  46614. + jnode *node;
  46615. +
  46616. + /* first extent insertion starts at leaf level */
  46617. + assert("vs-719", znode_get_level(uf_coord->coord.node) == LEAF_LEVEL);
  46618. + assert("vs-711", coord_is_between_items(&uf_coord->coord));
  46619. +
  46620. + if (get_key_offset(key) != 0) {
  46621. + result = insert_first_hole(&uf_coord->coord, uf_coord->lh, key);
  46622. + uf_coord->valid = 0;
  46623. + uf_info = unix_file_inode_data(inode);
  46624. +
  46625. + /*
  46626. + * first item insertion is only possible when writing to empty
  46627. + * file or performing tail conversion
  46628. + */
  46629. + assert("", (uf_info->container == UF_CONTAINER_EMPTY ||
  46630. + (reiser4_inode_get_flag(inode,
  46631. + REISER4_PART_MIXED) &&
  46632. + reiser4_inode_get_flag(inode,
  46633. + REISER4_PART_IN_CONV))));
  46634. + /* if file was empty - update its state */
  46635. + if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY)
  46636. + uf_info->container = UF_CONTAINER_EXTENTS;
  46637. + return result;
  46638. + }
  46639. +
  46640. + if (count == 0)
  46641. + return 0;
  46642. +
  46643. + inode_add_blocks(mapping_jnode(jnodes[0])->host, count);
  46644. +
  46645. + /*
  46646. + * prepare for tree modification: compose body of item and item data
  46647. + * structure needed for insertion
  46648. + */
  46649. + reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count);
  46650. + init_new_extent(&idata, &new_ext, 1);
  46651. +
  46652. + /* insert extent item into the tree */
  46653. + result = insert_extent_by_coord(&uf_coord->coord, &idata, key,
  46654. + uf_coord->lh);
  46655. + if (result)
  46656. + return result;
  46657. +
  46658. + /*
  46659. + * make sure that we hold long term locked twig node containing all
  46660. + * jnodes we are about to capture
  46661. + */
  46662. + check_jnodes(uf_coord->lh->node, key, count);
  46663. + /*
  46664. + * assign fake block numbers to all jnodes, capture and mark them dirty
  46665. + */
  46666. + block = fake_blocknr_unformatted(count);
  46667. + for (i = 0; i < count; i ++, block ++) {
  46668. + node = jnodes[i];
  46669. + spin_lock_jnode(node);
  46670. + JF_SET(node, JNODE_CREATED);
  46671. + jnode_set_block(node, &block);
  46672. + result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  46673. + BUG_ON(result != 0);
  46674. + jnode_make_dirty_locked(node);
  46675. + spin_unlock_jnode(node);
  46676. + }
  46677. +
  46678. + /*
  46679. + * invalidate coordinate, research must be performed to continue
  46680. + * because write will continue on twig level
  46681. + */
  46682. + uf_coord->valid = 0;
  46683. + return count;
  46684. +}
  46685. +
  46686. +/**
  46687. + * plug_hole - replace hole extent with unallocated and holes
  46688. + * @uf_coord:
  46689. + * @key:
  46690. + * @node:
  46691. + * @h: structure containing coordinate, lock handle, key, etc
  46692. + *
  46693. + * Creates an unallocated extent of width 1 within a hole. In worst case two
  46694. + * additional extents can be created.
  46695. + */
  46696. +static int plug_hole(uf_coord_t *uf_coord, const reiser4_key *key, int *how)
  46697. +{
  46698. + struct replace_handle rh;
  46699. + reiser4_extent *ext;
  46700. + reiser4_block_nr width, pos_in_unit;
  46701. + coord_t *coord;
  46702. + struct extent_coord_extension *ext_coord;
  46703. + int return_inserted_position;
  46704. +
  46705. + check_uf_coord(uf_coord, key);
  46706. +
  46707. + rh.coord = coord_by_uf_coord(uf_coord);
  46708. + rh.lh = uf_coord->lh;
  46709. + rh.flags = 0;
  46710. +
  46711. + coord = coord_by_uf_coord(uf_coord);
  46712. + ext_coord = ext_coord_by_uf_coord(uf_coord);
  46713. + ext = ext_by_ext_coord(uf_coord);
  46714. +
  46715. + width = ext_coord->width;
  46716. + pos_in_unit = ext_coord->pos_in_unit;
  46717. +
  46718. + *how = 0;
  46719. + if (width == 1) {
  46720. + reiser4_set_extent(ext, UNALLOCATED_EXTENT_START, 1);
  46721. + znode_make_dirty(coord->node);
  46722. + /* update uf_coord */
  46723. + ON_DEBUG(ext_coord->extent = *ext);
  46724. + *how = 1;
  46725. + return 0;
  46726. + } else if (pos_in_unit == 0) {
  46727. + /* we deal with first element of extent */
  46728. + if (coord->unit_pos) {
  46729. + /* there is an extent to the left */
  46730. + if (state_of_extent(ext - 1) == UNALLOCATED_EXTENT) {
  46731. + /*
  46732. + * left neighboring unit is an unallocated
  46733. + * extent. Increase its width and decrease
  46734. + * width of hole
  46735. + */
  46736. + extent_set_width(ext - 1,
  46737. + extent_get_width(ext - 1) + 1);
  46738. + extent_set_width(ext, width - 1);
  46739. + znode_make_dirty(coord->node);
  46740. +
  46741. + /* update coord extension */
  46742. + coord->unit_pos--;
  46743. + ext_coord->width = extent_get_width(ext - 1);
  46744. + ext_coord->pos_in_unit = ext_coord->width - 1;
  46745. + ext_coord->ext_offset -= sizeof(reiser4_extent);
  46746. + ON_DEBUG(ext_coord->extent =
  46747. + *extent_by_coord(coord));
  46748. + *how = 2;
  46749. + return 0;
  46750. + }
  46751. + }
  46752. + /* extent for replace */
  46753. + reiser4_set_extent(&rh.overwrite, UNALLOCATED_EXTENT_START, 1);
  46754. + /* extent to be inserted */
  46755. + reiser4_set_extent(&rh.new_extents[0], HOLE_EXTENT_START,
  46756. + width - 1);
  46757. + rh.nr_new_extents = 1;
  46758. +
  46759. + /* have reiser4_replace_extent to return with @coord and
  46760. + @uf_coord->lh set to unit which was replaced */
  46761. + return_inserted_position = 0;
  46762. + *how = 3;
  46763. + } else if (pos_in_unit == width - 1) {
  46764. + /* we deal with last element of extent */
  46765. + if (coord->unit_pos < nr_units_extent(coord) - 1) {
  46766. + /* there is an extent unit to the right */
  46767. + if (state_of_extent(ext + 1) == UNALLOCATED_EXTENT) {
  46768. + /*
  46769. + * right neighboring unit is an unallocated
  46770. + * extent. Increase its width and decrease
  46771. + * width of hole
  46772. + */
  46773. + extent_set_width(ext + 1,
  46774. + extent_get_width(ext + 1) + 1);
  46775. + extent_set_width(ext, width - 1);
  46776. + znode_make_dirty(coord->node);
  46777. +
  46778. + /* update coord extension */
  46779. + coord->unit_pos++;
  46780. + ext_coord->width = extent_get_width(ext + 1);
  46781. + ext_coord->pos_in_unit = 0;
  46782. + ext_coord->ext_offset += sizeof(reiser4_extent);
  46783. + ON_DEBUG(ext_coord->extent =
  46784. + *extent_by_coord(coord));
  46785. + *how = 4;
  46786. + return 0;
  46787. + }
  46788. + }
  46789. + /* extent for replace */
  46790. + reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START, width - 1);
  46791. + /* extent to be inserted */
  46792. + reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START,
  46793. + 1);
  46794. + rh.nr_new_extents = 1;
  46795. +
  46796. + /* have reiser4_replace_extent to return with @coord and
  46797. + @uf_coord->lh set to unit which was inserted */
  46798. + return_inserted_position = 1;
  46799. + *how = 5;
  46800. + } else {
  46801. + /* extent for replace */
  46802. + reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START,
  46803. + pos_in_unit);
  46804. + /* extents to be inserted */
  46805. + reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START,
  46806. + 1);
  46807. + reiser4_set_extent(&rh.new_extents[1], HOLE_EXTENT_START,
  46808. + width - pos_in_unit - 1);
  46809. + rh.nr_new_extents = 2;
  46810. +
  46811. + /* have reiser4_replace_extent to return with @coord and
  46812. + @uf_coord->lh set to first of units which were inserted */
  46813. + return_inserted_position = 1;
  46814. + *how = 6;
  46815. + }
  46816. + unit_key_by_coord(coord, &rh.paste_key);
  46817. + set_key_offset(&rh.paste_key, get_key_offset(&rh.paste_key) +
  46818. + extent_get_width(&rh.overwrite) * current_blocksize);
  46819. +
  46820. + uf_coord->valid = 0;
  46821. + return reiser4_replace_extent(&rh, return_inserted_position);
  46822. +}
  46823. +
  46824. +/**
  46825. + * overwrite_one_block -
  46826. + * @uf_coord:
  46827. + * @key:
  46828. + * @node:
  46829. + *
  46830. + * If @node corresponds to hole extent - create unallocated extent for it and
  46831. + * assign fake block number. If @node corresponds to allocated extent - assign
  46832. + * block number of jnode
  46833. + */
  46834. +static int overwrite_one_block(uf_coord_t *uf_coord, const reiser4_key *key,
  46835. + jnode *node, int *hole_plugged)
  46836. +{
  46837. + int result;
  46838. + struct extent_coord_extension *ext_coord;
  46839. + reiser4_extent *ext;
  46840. + reiser4_block_nr block;
  46841. + int how;
  46842. +
  46843. + assert("vs-1312", uf_coord->coord.between == AT_UNIT);
  46844. +
  46845. + result = 0;
  46846. + ext_coord = ext_coord_by_uf_coord(uf_coord);
  46847. + check_uf_coord(uf_coord, NULL);
  46848. + ext = ext_by_ext_coord(uf_coord);
  46849. + assert("", state_of_extent(ext) != UNALLOCATED_EXTENT);
  46850. +
  46851. + switch (state_of_extent(ext)) {
  46852. + case ALLOCATED_EXTENT:
  46853. + block = extent_get_start(ext) + ext_coord->pos_in_unit;
  46854. + break;
  46855. +
  46856. + case HOLE_EXTENT:
  46857. + inode_add_blocks(mapping_jnode(node)->host, 1);
  46858. + result = plug_hole(uf_coord, key, &how);
  46859. + if (result)
  46860. + return result;
  46861. + block = fake_blocknr_unformatted(1);
  46862. + if (hole_plugged)
  46863. + *hole_plugged = 1;
  46864. + JF_SET(node, JNODE_CREATED);
  46865. + break;
  46866. +
  46867. + default:
  46868. + return RETERR(-EIO);
  46869. + }
  46870. +
  46871. + jnode_set_block(node, &block);
  46872. + return 0;
  46873. +}
  46874. +
  46875. +/**
  46876. + * move_coord - move coordinate forward
  46877. + * @uf_coord:
  46878. + *
  46879. + * Move coordinate one data block pointer forward. Return 1 if coord is set to
  46880. + * the last one already or is invalid.
  46881. + */
  46882. +static int move_coord(uf_coord_t *uf_coord)
  46883. +{
  46884. + struct extent_coord_extension *ext_coord;
  46885. +
  46886. + if (uf_coord->valid == 0)
  46887. + return 1;
  46888. + ext_coord = &uf_coord->extension.extent;
  46889. + ext_coord->pos_in_unit ++;
  46890. + if (ext_coord->pos_in_unit < ext_coord->width)
  46891. + /* coordinate moved within the unit */
  46892. + return 0;
  46893. +
  46894. + /* end of unit is reached. Try to move to next unit */
  46895. + ext_coord->pos_in_unit = 0;
  46896. + uf_coord->coord.unit_pos ++;
  46897. + if (uf_coord->coord.unit_pos < ext_coord->nr_units) {
  46898. + /* coordinate moved to next unit */
  46899. + ext_coord->ext_offset += sizeof(reiser4_extent);
  46900. + ext_coord->width =
  46901. + extent_get_width(ext_by_offset
  46902. + (uf_coord->coord.node,
  46903. + ext_coord->ext_offset));
  46904. + ON_DEBUG(ext_coord->extent =
  46905. + *ext_by_offset(uf_coord->coord.node,
  46906. + ext_coord->ext_offset));
  46907. + return 0;
  46908. + }
  46909. + /* end of item is reached */
  46910. + uf_coord->valid = 0;
  46911. + return 1;
  46912. +}
  46913. +
  46914. +/**
  46915. + * overwrite_extent -
  46916. + * @inode:
  46917. + *
  46918. + * Returns number of handled jnodes.
  46919. + */
  46920. +static int overwrite_extent(uf_coord_t *uf_coord, const reiser4_key *key,
  46921. + jnode **jnodes, int count, int *plugged_hole)
  46922. +{
  46923. + int result;
  46924. + reiser4_key k;
  46925. + int i;
  46926. + jnode *node;
  46927. +
  46928. + k = *key;
  46929. + for (i = 0; i < count; i ++) {
  46930. + node = jnodes[i];
  46931. + if (*jnode_get_block(node) == 0) {
  46932. + result = overwrite_one_block(uf_coord, &k, node, plugged_hole);
  46933. + if (result)
  46934. + return result;
  46935. + }
  46936. + /*
  46937. + * make sure that we hold long term locked twig node containing
  46938. + * all jnodes we are about to capture
  46939. + */
  46940. + check_jnodes(uf_coord->lh->node, &k, 1);
  46941. + /*
  46942. + * assign fake block numbers to all jnodes, capture and mark
  46943. + * them dirty
  46944. + */
  46945. + spin_lock_jnode(node);
  46946. + result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  46947. + BUG_ON(result != 0);
  46948. + jnode_make_dirty_locked(node);
  46949. + spin_unlock_jnode(node);
  46950. +
  46951. + if (uf_coord->valid == 0)
  46952. + return i + 1;
  46953. +
  46954. + check_uf_coord(uf_coord, &k);
  46955. +
  46956. + if (move_coord(uf_coord)) {
  46957. + /*
  46958. + * failed to move to the next node pointer. Either end
  46959. + * of file or end of twig node is reached. In the later
  46960. + * case we might go to the right neighbor.
  46961. + */
  46962. + uf_coord->valid = 0;
  46963. + return i + 1;
  46964. + }
  46965. + set_key_offset(&k, get_key_offset(&k) + PAGE_SIZE);
  46966. + }
  46967. +
  46968. + return count;
  46969. +}
  46970. +
  46971. +/**
  46972. + * reiser4_update_extent
  46973. + * @file:
  46974. + * @jnodes:
  46975. + * @count:
  46976. + * @off:
  46977. + *
  46978. + */
  46979. +int reiser4_update_extent(struct inode *inode, jnode *node, loff_t pos,
  46980. + int *plugged_hole)
  46981. +{
  46982. + int result;
  46983. + znode *loaded;
  46984. + uf_coord_t uf_coord;
  46985. + coord_t *coord;
  46986. + lock_handle lh;
  46987. + reiser4_key key;
  46988. +
  46989. + assert("", reiser4_lock_counters()->d_refs == 0);
  46990. +
  46991. + key_by_inode_and_offset_common(inode, pos, &key);
  46992. +
  46993. + init_uf_coord(&uf_coord, &lh);
  46994. + coord = &uf_coord.coord;
  46995. + result = find_file_item_nohint(coord, &lh, &key,
  46996. + ZNODE_WRITE_LOCK, inode);
  46997. + if (IS_CBKERR(result)) {
  46998. + assert("", reiser4_lock_counters()->d_refs == 0);
  46999. + return result;
  47000. + }
  47001. +
  47002. + result = zload(coord->node);
  47003. + BUG_ON(result != 0);
  47004. + loaded = coord->node;
  47005. +
  47006. + if (coord->between == AFTER_UNIT) {
  47007. + /*
  47008. + * append existing extent item with unallocated extent of width
  47009. + * nr_jnodes
  47010. + */
  47011. + init_coord_extension_extent(&uf_coord,
  47012. + get_key_offset(&key));
  47013. + result = append_last_extent(&uf_coord, &key,
  47014. + &node, 1);
  47015. + } else if (coord->between == AT_UNIT) {
  47016. + /*
  47017. + * overwrite
  47018. + * not optimal yet. Will be optimized if new write will show
  47019. + * performance win.
  47020. + */
  47021. + init_coord_extension_extent(&uf_coord,
  47022. + get_key_offset(&key));
  47023. + result = overwrite_extent(&uf_coord, &key,
  47024. + &node, 1, plugged_hole);
  47025. + } else {
  47026. + /*
  47027. + * there are no items of this file in the tree yet. Create
  47028. + * first item of the file inserting one unallocated extent of
  47029. + * width nr_jnodes
  47030. + */
  47031. + result = insert_first_extent(&uf_coord, &key, &node, 1, inode);
  47032. + }
  47033. + assert("", result == 1 || result < 0);
  47034. + zrelse(loaded);
  47035. + done_lh(&lh);
  47036. + assert("", reiser4_lock_counters()->d_refs == 0);
  47037. + return (result == 1) ? 0 : result;
  47038. +}
  47039. +
  47040. +/**
  47041. + * update_extents
  47042. + * @file:
  47043. + * @jnodes:
  47044. + * @count:
  47045. + * @off:
  47046. + *
  47047. + */
  47048. +static int update_extents(struct file *file, struct inode *inode,
  47049. + jnode **jnodes, int count, loff_t pos)
  47050. +{
  47051. + struct hint hint;
  47052. + reiser4_key key;
  47053. + int result;
  47054. + znode *loaded;
  47055. +
  47056. + result = load_file_hint(file, &hint);
  47057. + BUG_ON(result != 0);
  47058. +
  47059. + if (count != 0)
  47060. + /*
  47061. + * count == 0 is special case: expanding truncate
  47062. + */
  47063. + pos = (loff_t)index_jnode(jnodes[0]) << PAGE_SHIFT;
  47064. + key_by_inode_and_offset_common(inode, pos, &key);
  47065. +
  47066. + assert("", reiser4_lock_counters()->d_refs == 0);
  47067. +
  47068. + do {
  47069. + result = find_file_item(&hint, &key, ZNODE_WRITE_LOCK, inode);
  47070. + if (IS_CBKERR(result)) {
  47071. + assert("", reiser4_lock_counters()->d_refs == 0);
  47072. + return result;
  47073. + }
  47074. +
  47075. + result = zload(hint.ext_coord.coord.node);
  47076. + BUG_ON(result != 0);
  47077. + loaded = hint.ext_coord.coord.node;
  47078. +
  47079. + if (hint.ext_coord.coord.between == AFTER_UNIT) {
  47080. + /*
  47081. + * append existing extent item with unallocated extent
  47082. + * of width nr_jnodes
  47083. + */
  47084. + if (hint.ext_coord.valid == 0)
  47085. + /* NOTE: get statistics on this */
  47086. + init_coord_extension_extent(&hint.ext_coord,
  47087. + get_key_offset(&key));
  47088. + result = append_last_extent(&hint.ext_coord, &key,
  47089. + jnodes, count);
  47090. + } else if (hint.ext_coord.coord.between == AT_UNIT) {
  47091. + /*
  47092. + * overwrite
  47093. + * not optimal yet. Will be optimized if new write will
  47094. + * show performance win.
  47095. + */
  47096. + if (hint.ext_coord.valid == 0)
  47097. + /* NOTE: get statistics on this */
  47098. + init_coord_extension_extent(&hint.ext_coord,
  47099. + get_key_offset(&key));
  47100. + result = overwrite_extent(&hint.ext_coord, &key,
  47101. + jnodes, count, NULL);
  47102. + } else {
  47103. + /*
  47104. + * there are no items of this file in the tree
  47105. + * yet. Create first item of the file inserting one
  47106. + * unallocated extent of * width nr_jnodes
  47107. + */
  47108. + result = insert_first_extent(&hint.ext_coord, &key,
  47109. + jnodes, count, inode);
  47110. + }
  47111. + zrelse(loaded);
  47112. + if (result < 0) {
  47113. + done_lh(hint.ext_coord.lh);
  47114. + break;
  47115. + }
  47116. +
  47117. + jnodes += result;
  47118. + count -= result;
  47119. + set_key_offset(&key, get_key_offset(&key) + result * PAGE_SIZE);
  47120. +
  47121. + /* seal and unlock znode */
  47122. + if (hint.ext_coord.valid)
  47123. + reiser4_set_hint(&hint, &key, ZNODE_WRITE_LOCK);
  47124. + else
  47125. + reiser4_unset_hint(&hint);
  47126. +
  47127. + } while (count > 0);
  47128. +
  47129. + save_file_hint(file, &hint);
  47130. + assert("", reiser4_lock_counters()->d_refs == 0);
  47131. + return result;
  47132. +}
  47133. +
  47134. +/**
  47135. + * write_extent_reserve_space - reserve space for extent write operation
  47136. + * @inode:
  47137. + *
  47138. + * Estimates and reserves space which may be required for writing
  47139. + * WRITE_GRANULARITY pages of file.
  47140. + */
  47141. +static int write_extent_reserve_space(struct inode *inode)
  47142. +{
  47143. + __u64 count;
  47144. + reiser4_tree *tree;
  47145. +
  47146. + /*
  47147. + * to write WRITE_GRANULARITY pages to a file by extents we have to
  47148. + * reserve disk space for:
  47149. +
  47150. + * 1. find_file_item may have to insert empty node to the tree (empty
  47151. + * leaf node between two extent items). This requires 1 block and
  47152. + * number of blocks which are necessary to perform insertion of an
  47153. + * internal item into twig level.
  47154. +
  47155. + * 2. for each of written pages there might be needed 1 block and
  47156. + * number of blocks which might be necessary to perform insertion of or
  47157. + * paste to an extent item.
  47158. +
  47159. + * 3. stat data update
  47160. + */
  47161. + tree = reiser4_tree_by_inode(inode);
  47162. + count = estimate_one_insert_item(tree) +
  47163. + WRITE_GRANULARITY * (1 + estimate_one_insert_into_item(tree)) +
  47164. + estimate_one_insert_item(tree);
  47165. + grab_space_enable();
  47166. + return reiser4_grab_space(count, 0 /* flags */);
  47167. +}
  47168. +
  47169. +/*
  47170. + * filemap_copy_from_user no longer exists in generic code, because it
  47171. + * is deadlocky (copying from user while holding the page lock is bad).
  47172. + * As a temporary fix for reiser4, just define it here.
  47173. + */
  47174. +static inline size_t
  47175. +filemap_copy_from_user(struct page *page, unsigned long offset,
  47176. + const char __user *buf, unsigned bytes)
  47177. +{
  47178. + char *kaddr;
  47179. + int left;
  47180. +
  47181. + kaddr = kmap_atomic(page);
  47182. + left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
  47183. + kunmap_atomic(kaddr);
  47184. +
  47185. + if (left != 0) {
  47186. + /* Do it the slow way */
  47187. + kaddr = kmap(page);
  47188. + left = __copy_from_user(kaddr + offset, buf, bytes);
  47189. + kunmap(page);
  47190. + }
  47191. + return bytes - left;
  47192. +}
  47193. +
  47194. +/**
  47195. + * reiser4_write_extent - write method of extent item plugin
  47196. + * @file: file to write to
  47197. + * @buf: address of user-space buffer
  47198. + * @count: number of bytes to write
  47199. + * @pos: position in file to write to
  47200. + *
  47201. + */
  47202. +ssize_t reiser4_write_extent(struct file *file, struct inode * inode,
  47203. + const char __user *buf, size_t count, loff_t *pos)
  47204. +{
  47205. + int have_to_update_extent;
  47206. + int nr_pages, nr_dirty;
  47207. + struct page *page;
  47208. + jnode *jnodes[WRITE_GRANULARITY + 1];
  47209. + unsigned long index;
  47210. + unsigned long end;
  47211. + int i;
  47212. + int to_page, page_off;
  47213. + size_t left, written;
  47214. + int result = 0;
  47215. +
  47216. + if (write_extent_reserve_space(inode))
  47217. + return RETERR(-ENOSPC);
  47218. +
  47219. + if (count == 0) {
  47220. + /* truncate case */
  47221. + update_extents(file, inode, jnodes, 0, *pos);
  47222. + return 0;
  47223. + }
  47224. +
  47225. + BUG_ON(get_current_context()->trans->atom != NULL);
  47226. +
  47227. + left = count;
  47228. + index = *pos >> PAGE_SHIFT;
  47229. + /* calculate number of pages which are to be written */
  47230. + end = ((*pos + count - 1) >> PAGE_SHIFT);
  47231. + nr_pages = end - index + 1;
  47232. + nr_dirty = 0;
  47233. + assert("", nr_pages <= WRITE_GRANULARITY + 1);
  47234. +
  47235. + /* get pages and jnodes */
  47236. + for (i = 0; i < nr_pages; i ++) {
  47237. + page = find_or_create_page(inode->i_mapping, index + i,
  47238. + reiser4_ctx_gfp_mask_get());
  47239. + if (page == NULL) {
  47240. + nr_pages = i;
  47241. + result = RETERR(-ENOMEM);
  47242. + goto out;
  47243. + }
  47244. +
  47245. + jnodes[i] = jnode_of_page(page);
  47246. + if (IS_ERR(jnodes[i])) {
  47247. + unlock_page(page);
  47248. + put_page(page);
  47249. + nr_pages = i;
  47250. + result = RETERR(-ENOMEM);
  47251. + goto out;
  47252. + }
  47253. + /* prevent jnode and page from disconnecting */
  47254. + JF_SET(jnodes[i], JNODE_WRITE_PREPARED);
  47255. + unlock_page(page);
  47256. + }
  47257. +
  47258. + BUG_ON(get_current_context()->trans->atom != NULL);
  47259. +
  47260. + have_to_update_extent = 0;
  47261. +
  47262. + page_off = (*pos & (PAGE_SIZE - 1));
  47263. + for (i = 0; i < nr_pages; i ++) {
  47264. + to_page = PAGE_SIZE - page_off;
  47265. + if (to_page > left)
  47266. + to_page = left;
  47267. + page = jnode_page(jnodes[i]);
  47268. + if (page_offset(page) < inode->i_size &&
  47269. + !PageUptodate(page) && to_page != PAGE_SIZE) {
  47270. + /*
  47271. + * the above is not optimal for partial write to last
  47272. + * page of file when file size is not at boundary of
  47273. + * page
  47274. + */
  47275. + lock_page(page);
  47276. + if (!PageUptodate(page)) {
  47277. + result = readpage_unix_file(NULL, page);
  47278. + BUG_ON(result != 0);
  47279. + /* wait for read completion */
  47280. + lock_page(page);
  47281. + BUG_ON(!PageUptodate(page));
  47282. + } else
  47283. + result = 0;
  47284. + unlock_page(page);
  47285. + }
  47286. +
  47287. + BUG_ON(get_current_context()->trans->atom != NULL);
  47288. + fault_in_readable(buf, to_page);
  47289. + BUG_ON(get_current_context()->trans->atom != NULL);
  47290. +
  47291. + lock_page(page);
  47292. + if (!PageUptodate(page) && to_page != PAGE_SIZE)
  47293. + zero_user_segments(page, 0, page_off,
  47294. + page_off + to_page,
  47295. + PAGE_SIZE);
  47296. +
  47297. + written = filemap_copy_from_user(page, page_off, buf, to_page);
  47298. + if (unlikely(written != to_page)) {
  47299. + unlock_page(page);
  47300. + result = RETERR(-EFAULT);
  47301. + break;
  47302. + }
  47303. +
  47304. + flush_dcache_page(page);
  47305. + set_page_dirty_notag(page);
  47306. + unlock_page(page);
  47307. + nr_dirty++;
  47308. +
  47309. + mark_page_accessed(page);
  47310. + SetPageUptodate(page);
  47311. +
  47312. + if (jnodes[i]->blocknr == 0)
  47313. + have_to_update_extent ++;
  47314. +
  47315. + page_off = 0;
  47316. + buf += to_page;
  47317. + left -= to_page;
  47318. + BUG_ON(get_current_context()->trans->atom != NULL);
  47319. + }
  47320. +
  47321. + if (have_to_update_extent) {
  47322. + update_extents(file, inode, jnodes, nr_dirty, *pos);
  47323. + } else {
  47324. + for (i = 0; i < nr_dirty; i ++) {
  47325. + int ret;
  47326. + spin_lock_jnode(jnodes[i]);
  47327. + ret = reiser4_try_capture(jnodes[i],
  47328. + ZNODE_WRITE_LOCK, 0);
  47329. + BUG_ON(ret != 0);
  47330. + jnode_make_dirty_locked(jnodes[i]);
  47331. + spin_unlock_jnode(jnodes[i]);
  47332. + }
  47333. + }
  47334. +out:
  47335. + for (i = 0; i < nr_pages; i ++) {
  47336. + put_page(jnode_page(jnodes[i]));
  47337. + JF_CLR(jnodes[i], JNODE_WRITE_PREPARED);
  47338. + jput(jnodes[i]);
  47339. + }
  47340. +
  47341. + /* the only errors handled so far is ENOMEM and
  47342. + EFAULT on copy_from_user */
  47343. +
  47344. + return (count - left) ? (count - left) : result;
  47345. +}
  47346. +
  47347. +int reiser4_do_readpage_extent(reiser4_extent * ext, reiser4_block_nr pos,
  47348. + struct page *page)
  47349. +{
  47350. + jnode *j;
  47351. + struct address_space *mapping;
  47352. + unsigned long index;
  47353. + oid_t oid;
  47354. + reiser4_block_nr block;
  47355. +
  47356. + mapping = page->mapping;
  47357. + oid = get_inode_oid(mapping->host);
  47358. + index = page->index;
  47359. +
  47360. + switch (state_of_extent(ext)) {
  47361. + case HOLE_EXTENT:
  47362. + /*
  47363. + * it is possible to have hole page with jnode, if page was
  47364. + * eflushed previously.
  47365. + */
  47366. + j = jfind(mapping, index);
  47367. + if (j == NULL) {
  47368. + zero_user(page, 0, PAGE_SIZE);
  47369. + SetPageUptodate(page);
  47370. + unlock_page(page);
  47371. + return 0;
  47372. + }
  47373. + spin_lock_jnode(j);
  47374. + if (!jnode_page(j)) {
  47375. + jnode_attach_page(j, page);
  47376. + } else {
  47377. + BUG_ON(jnode_page(j) != page);
  47378. + assert("vs-1504", jnode_page(j) == page);
  47379. + }
  47380. + block = *jnode_get_io_block(j);
  47381. + spin_unlock_jnode(j);
  47382. + if (block == 0) {
  47383. + zero_user(page, 0, PAGE_SIZE);
  47384. + SetPageUptodate(page);
  47385. + unlock_page(page);
  47386. + jput(j);
  47387. + return 0;
  47388. + }
  47389. + break;
  47390. +
  47391. + case ALLOCATED_EXTENT:
  47392. + j = jnode_of_page(page);
  47393. + if (IS_ERR(j))
  47394. + return PTR_ERR(j);
  47395. + if (*jnode_get_block(j) == 0) {
  47396. + reiser4_block_nr blocknr;
  47397. +
  47398. + blocknr = extent_get_start(ext) + pos;
  47399. + jnode_set_block(j, &blocknr);
  47400. + } else
  47401. + assert("vs-1403",
  47402. + j->blocknr == extent_get_start(ext) + pos);
  47403. + break;
  47404. +
  47405. + case UNALLOCATED_EXTENT:
  47406. + j = jfind(mapping, index);
  47407. + assert("nikita-2688", j);
  47408. + assert("vs-1426", jnode_page(j) == NULL);
  47409. +
  47410. + spin_lock_jnode(j);
  47411. + jnode_attach_page(j, page);
  47412. + spin_unlock_jnode(j);
  47413. + break;
  47414. +
  47415. + default:
  47416. + warning("vs-957", "wrong extent\n");
  47417. + return RETERR(-EIO);
  47418. + }
  47419. +
  47420. + BUG_ON(j == 0);
  47421. + reiser4_page_io(page, j, READ, reiser4_ctx_gfp_mask_get());
  47422. + jput(j);
  47423. + return 0;
  47424. +}
  47425. +
  47426. +/**
  47427. + * plugin->u.item.s.file.read
  47428. + */
  47429. +int reiser4_read_extent(flow_t *flow, hint_t *hint,
  47430. + struct kiocb *iocb, struct iov_iter *iter)
  47431. +{
  47432. + uf_coord_t *uf_coord;
  47433. + coord_t *coord;
  47434. + loff_t from_extent;
  47435. + reiser4_key ikey;
  47436. + ssize_t read;
  47437. + size_t wanted;
  47438. +
  47439. + assert("vs-1353", current_blocksize == PAGE_SIZE);
  47440. + assert("vs-572", flow->user == 1);
  47441. + assert("vs-1351", flow->length > 0);
  47442. +
  47443. + uf_coord = &hint->ext_coord;
  47444. +
  47445. + check_uf_coord(uf_coord, NULL);
  47446. + assert("vs-33", uf_coord->lh == &hint->lh);
  47447. +
  47448. + coord = &uf_coord->coord;
  47449. + assert("vs-1119", znode_is_rlocked(coord->node));
  47450. + assert("vs-1120", znode_is_loaded(coord->node));
  47451. + assert("vs-1256", coord_matches_key_extent(coord, &flow->key));
  47452. + assert("edward-22",
  47453. + get_key_offset(item_key_by_coord(coord, &ikey)) +
  47454. + reiser4_extent_size(coord, nr_units_extent(coord)) >
  47455. + get_key_offset(&flow->key));
  47456. + /*
  47457. + * how many bytes can we copy out from this extent
  47458. + */
  47459. + from_extent = get_key_offset(item_key_by_coord(coord, &ikey)) +
  47460. + reiser4_extent_size(coord, nr_units_extent(coord)) -
  47461. + get_key_offset(&flow->key);
  47462. + /*
  47463. + * set a seal and release twig node
  47464. + */
  47465. + reiser4_set_hint(hint, &flow->key, ZNODE_READ_LOCK);
  47466. + /* &hint->lh is done */
  47467. +
  47468. + wanted = iov_iter_count(iter);
  47469. + iov_iter_truncate(iter, from_extent);
  47470. + /*
  47471. + * read not more than @from_extent bytes from this extent
  47472. + */
  47473. + read = generic_file_read_iter(iocb, iter);
  47474. + if (read > 0)
  47475. + wanted -= read;
  47476. + iov_iter_reexpand(iter, wanted);
  47477. +
  47478. + if (read <= 0)
  47479. + return read;
  47480. + move_flow_forward(flow, read);
  47481. + return 0;
  47482. +}
  47483. +
  47484. +/*
  47485. + * plugin->s.file.readpage
  47486. + *
  47487. + * reiser4_read->unix_file_read->page_cache_readahead->
  47488. + * ->reiser4_readpage_dispatch->readpage_unix_file->readpage_extent
  47489. + * or
  47490. + * filemap_fault->reiser4_readpage_dispatch->readpage_unix_file->
  47491. + * ->readpage_extent
  47492. + *
  47493. + * At the beginning: coord->node is read locked, zloaded, page is
  47494. + * locked, coord is set to existing unit inside of extent item (it
  47495. + * is not necessary that coord matches to page->index)
  47496. + */
  47497. +int reiser4_readpage_extent(void *vp, struct page *page)
  47498. +{
  47499. + uf_coord_t *uf_coord = vp;
  47500. + ON_DEBUG(coord_t * coord = &uf_coord->coord);
  47501. + ON_DEBUG(reiser4_key key);
  47502. +
  47503. + assert("vs-1040", PageLocked(page));
  47504. + assert("vs-1050", !PageUptodate(page));
  47505. + assert("vs-1039", page->mapping && page->mapping->host);
  47506. +
  47507. + assert("vs-1044", znode_is_loaded(coord->node));
  47508. + assert("vs-758", item_is_extent(coord));
  47509. + assert("vs-1046", coord_is_existing_unit(coord));
  47510. + assert("vs-1045", znode_is_rlocked(coord->node));
  47511. + assert("vs-1047",
  47512. + page->mapping->host->i_ino ==
  47513. + get_key_objectid(item_key_by_coord(coord, &key)));
  47514. + check_uf_coord(uf_coord, NULL);
  47515. +
  47516. + return reiser4_do_readpage_extent(ext_by_ext_coord(uf_coord),
  47517. + uf_coord->extension.extent.pos_in_unit,
  47518. + page);
  47519. +}
  47520. +
  47521. +int get_block_address_extent(const coord_t *coord, sector_t block,
  47522. + sector_t *result)
  47523. +{
  47524. + reiser4_extent *ext;
  47525. +
  47526. + if (!coord_is_existing_unit(coord))
  47527. + return RETERR(-EINVAL);
  47528. +
  47529. + ext = extent_by_coord(coord);
  47530. +
  47531. + if (state_of_extent(ext) != ALLOCATED_EXTENT)
  47532. + /* FIXME: bad things may happen if it is unallocated extent */
  47533. + *result = 0;
  47534. + else {
  47535. + reiser4_key key;
  47536. +
  47537. + unit_key_by_coord(coord, &key);
  47538. + assert("vs-1645",
  47539. + block >= get_key_offset(&key) >> current_blocksize_bits);
  47540. + assert("vs-1646",
  47541. + block <
  47542. + (get_key_offset(&key) >> current_blocksize_bits) +
  47543. + extent_get_width(ext));
  47544. + *result =
  47545. + extent_get_start(ext) + (block -
  47546. + (get_key_offset(&key) >>
  47547. + current_blocksize_bits));
  47548. + }
  47549. + return 0;
  47550. +}
  47551. +
  47552. +/*
  47553. + plugin->u.item.s.file.append_key
  47554. + key of first byte which is the next to last byte by addressed by this extent
  47555. +*/
  47556. +reiser4_key *append_key_extent(const coord_t * coord, reiser4_key * key)
  47557. +{
  47558. + item_key_by_coord(coord, key);
  47559. + set_key_offset(key,
  47560. + get_key_offset(key) + reiser4_extent_size(coord,
  47561. + nr_units_extent
  47562. + (coord)));
  47563. +
  47564. + assert("vs-610", get_key_offset(key)
  47565. + && (get_key_offset(key) & (current_blocksize - 1)) == 0);
  47566. + return key;
  47567. +}
  47568. +
  47569. +/* plugin->u.item.s.file.init_coord_extension */
  47570. +void init_coord_extension_extent(uf_coord_t * uf_coord, loff_t lookuped)
  47571. +{
  47572. + coord_t *coord;
  47573. + struct extent_coord_extension *ext_coord;
  47574. + reiser4_key key;
  47575. + loff_t offset;
  47576. +
  47577. + assert("vs-1295", uf_coord->valid == 0);
  47578. +
  47579. + coord = &uf_coord->coord;
  47580. + assert("vs-1288", coord_is_iplug_set(coord));
  47581. + assert("vs-1327", znode_is_loaded(coord->node));
  47582. +
  47583. + if (coord->between != AFTER_UNIT && coord->between != AT_UNIT)
  47584. + return;
  47585. +
  47586. + ext_coord = &uf_coord->extension.extent;
  47587. + ext_coord->nr_units = nr_units_extent(coord);
  47588. + ext_coord->ext_offset =
  47589. + (char *)extent_by_coord(coord) - zdata(coord->node);
  47590. + ext_coord->width = extent_get_width(extent_by_coord(coord));
  47591. + ON_DEBUG(ext_coord->extent = *extent_by_coord(coord));
  47592. + uf_coord->valid = 1;
  47593. +
  47594. + /* pos_in_unit is the only uninitialized field in extended coord */
  47595. + if (coord->between == AFTER_UNIT) {
  47596. + assert("vs-1330",
  47597. + coord->unit_pos == nr_units_extent(coord) - 1);
  47598. +
  47599. + ext_coord->pos_in_unit = ext_coord->width - 1;
  47600. + } else {
  47601. + /* AT_UNIT */
  47602. + unit_key_by_coord(coord, &key);
  47603. + offset = get_key_offset(&key);
  47604. +
  47605. + assert("vs-1328", offset <= lookuped);
  47606. + assert("vs-1329",
  47607. + lookuped <
  47608. + offset + ext_coord->width * current_blocksize);
  47609. + ext_coord->pos_in_unit =
  47610. + ((lookuped - offset) >> current_blocksize_bits);
  47611. + }
  47612. +}
  47613. +
  47614. +/*
  47615. + * Local variables:
  47616. + * c-indentation-style: "K&R"
  47617. + * mode-name: "LC"
  47618. + * c-basic-offset: 8
  47619. + * tab-width: 8
  47620. + * fill-column: 79
  47621. + * scroll-step: 1
  47622. + * End:
  47623. + */
  47624. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/extent_flush_ops.c linux-5.16.14/fs/reiser4/plugin/item/extent_flush_ops.c
  47625. --- linux-5.16.14.orig/fs/reiser4/plugin/item/extent_flush_ops.c 1970-01-01 01:00:00.000000000 +0100
  47626. +++ linux-5.16.14/fs/reiser4/plugin/item/extent_flush_ops.c 2022-03-12 13:26:19.678892792 +0100
  47627. @@ -0,0 +1,686 @@
  47628. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  47629. +
  47630. +#include "item.h"
  47631. +#include "../../tree.h"
  47632. +#include "../../jnode.h"
  47633. +#include "../../super.h"
  47634. +#include "../../flush.h"
  47635. +#include "../../carry.h"
  47636. +#include "../object.h"
  47637. +
  47638. +#include <linux/pagemap.h>
  47639. +
  47640. +static reiser4_block_nr extent_unit_start(const coord_t * item);
  47641. +
  47642. +/* Return either first or last extent (depending on @side) of the item
  47643. + @coord is set to. Set @pos_in_unit either to first or to last block
  47644. + of extent. */
  47645. +static reiser4_extent *extent_utmost_ext(const coord_t * coord, sideof side,
  47646. + reiser4_block_nr * pos_in_unit)
  47647. +{
  47648. + reiser4_extent *ext;
  47649. +
  47650. + if (side == LEFT_SIDE) {
  47651. + /* get first extent of item */
  47652. + ext = extent_item(coord);
  47653. + *pos_in_unit = 0;
  47654. + } else {
  47655. + /* get last extent of item and last position within it */
  47656. + assert("vs-363", side == RIGHT_SIDE);
  47657. + ext = extent_item(coord) + coord_last_unit_pos(coord);
  47658. + *pos_in_unit = extent_get_width(ext) - 1;
  47659. + }
  47660. +
  47661. + return ext;
  47662. +}
  47663. +
  47664. +/* item_plugin->f.utmost_child */
  47665. +/* Return the child. Coord is set to extent item. Find jnode corresponding
  47666. + either to first or to last unformatted node pointed by the item */
  47667. +int utmost_child_extent(const coord_t * coord, sideof side, jnode ** childp)
  47668. +{
  47669. + reiser4_extent *ext;
  47670. + reiser4_block_nr pos_in_unit;
  47671. +
  47672. + ext = extent_utmost_ext(coord, side, &pos_in_unit);
  47673. +
  47674. + switch (state_of_extent(ext)) {
  47675. + case HOLE_EXTENT:
  47676. + *childp = NULL;
  47677. + return 0;
  47678. + case ALLOCATED_EXTENT:
  47679. + case UNALLOCATED_EXTENT:
  47680. + break;
  47681. + default:
  47682. + /* this should never happen */
  47683. + assert("vs-1417", 0);
  47684. + }
  47685. +
  47686. + {
  47687. + reiser4_key key;
  47688. + reiser4_tree *tree;
  47689. + unsigned long index;
  47690. +
  47691. + if (side == LEFT_SIDE) {
  47692. + /* get key of first byte addressed by the extent */
  47693. + item_key_by_coord(coord, &key);
  47694. + } else {
  47695. + /* get key of byte which next after last byte addressed by the extent */
  47696. + append_key_extent(coord, &key);
  47697. + }
  47698. +
  47699. + assert("vs-544",
  47700. + (get_key_offset(&key) >> PAGE_SHIFT) < ~0ul);
  47701. + /* index of first or last (depending on @side) page addressed
  47702. + by the extent */
  47703. + index =
  47704. + (unsigned long)(get_key_offset(&key) >> PAGE_SHIFT);
  47705. + if (side == RIGHT_SIDE)
  47706. + index--;
  47707. +
  47708. + tree = coord->node->zjnode.tree;
  47709. + *childp = jlookup(tree, get_key_objectid(&key), index);
  47710. + }
  47711. +
  47712. + return 0;
  47713. +}
  47714. +
  47715. +/* item_plugin->f.utmost_child_real_block */
  47716. +/* Return the child's block, if allocated. */
  47717. +int
  47718. +utmost_child_real_block_extent(const coord_t * coord, sideof side,
  47719. + reiser4_block_nr * block)
  47720. +{
  47721. + reiser4_extent *ext;
  47722. +
  47723. + ext = extent_by_coord(coord);
  47724. +
  47725. + switch (state_of_extent(ext)) {
  47726. + case ALLOCATED_EXTENT:
  47727. + *block = extent_get_start(ext);
  47728. + if (side == RIGHT_SIDE)
  47729. + *block += extent_get_width(ext) - 1;
  47730. + break;
  47731. + case HOLE_EXTENT:
  47732. + case UNALLOCATED_EXTENT:
  47733. + *block = 0;
  47734. + break;
  47735. + default:
  47736. + /* this should never happen */
  47737. + assert("vs-1418", 0);
  47738. + }
  47739. +
  47740. + return 0;
  47741. +}
  47742. +
  47743. +/* item_plugin->f.scan */
  47744. +/* Performs leftward scanning starting from an unformatted node and its parent coordinate.
  47745. + This scan continues, advancing the parent coordinate, until either it encounters a
  47746. + formatted child or it finishes scanning this node.
  47747. +
  47748. + If unallocated, the entire extent must be dirty and in the same atom. (Actually, I'm
  47749. + not sure this is last property (same atom) is enforced, but it should be the case since
  47750. + one atom must write the parent and the others must read the parent, thus fusing?). In
  47751. + any case, the code below asserts this case for unallocated extents. Unallocated
  47752. + extents are thus optimized because we can skip to the endpoint when scanning.
  47753. +
  47754. + It returns control to reiser4_scan_extent, handles these terminating conditions,
  47755. + e.g., by loading the next twig.
  47756. +*/
  47757. +int reiser4_scan_extent(flush_scan * scan)
  47758. +{
  47759. + coord_t coord;
  47760. + jnode *neighbor;
  47761. + unsigned long scan_index, unit_index, unit_width, scan_max, scan_dist;
  47762. + reiser4_block_nr unit_start;
  47763. + __u64 oid;
  47764. + reiser4_key key;
  47765. + int ret = 0, allocated, incr;
  47766. + reiser4_tree *tree;
  47767. +
  47768. + if (!JF_ISSET(scan->node, JNODE_DIRTY)) {
  47769. + scan->stop = 1;
  47770. + return 0; /* Race with truncate, this node is already
  47771. + * truncated. */
  47772. + }
  47773. +
  47774. + coord_dup(&coord, &scan->parent_coord);
  47775. +
  47776. + assert("jmacd-1404", !reiser4_scan_finished(scan));
  47777. + assert("jmacd-1405", jnode_get_level(scan->node) == LEAF_LEVEL);
  47778. + assert("jmacd-1406", jnode_is_unformatted(scan->node));
  47779. +
  47780. + /* The scan_index variable corresponds to the current page index of the
  47781. + unformatted block scan position. */
  47782. + scan_index = index_jnode(scan->node);
  47783. +
  47784. + assert("jmacd-7889", item_is_extent(&coord));
  47785. +
  47786. + repeat:
  47787. + /* objectid of file */
  47788. + oid = get_key_objectid(item_key_by_coord(&coord, &key));
  47789. +
  47790. + allocated = !extent_is_unallocated(&coord);
  47791. + /* Get the values of this extent unit: */
  47792. + unit_index = extent_unit_index(&coord);
  47793. + unit_width = extent_unit_width(&coord);
  47794. + unit_start = extent_unit_start(&coord);
  47795. +
  47796. + assert("jmacd-7187", unit_width > 0);
  47797. + assert("jmacd-7188", scan_index >= unit_index);
  47798. + assert("jmacd-7189", scan_index <= unit_index + unit_width - 1);
  47799. +
  47800. + /* Depending on the scan direction, we set different maximum values for scan_index
  47801. + (scan_max) and the number of nodes that would be passed if the scan goes the
  47802. + entire way (scan_dist). Incr is an integer reflecting the incremental
  47803. + direction of scan_index. */
  47804. + if (reiser4_scanning_left(scan)) {
  47805. + scan_max = unit_index;
  47806. + scan_dist = scan_index - unit_index;
  47807. + incr = -1;
  47808. + } else {
  47809. + scan_max = unit_index + unit_width - 1;
  47810. + scan_dist = scan_max - unit_index;
  47811. + incr = +1;
  47812. + }
  47813. +
  47814. + tree = coord.node->zjnode.tree;
  47815. +
  47816. + /* If the extent is allocated we have to check each of its blocks. If the extent
  47817. + is unallocated we can skip to the scan_max. */
  47818. + if (allocated) {
  47819. + do {
  47820. + neighbor = jlookup(tree, oid, scan_index);
  47821. + if (neighbor == NULL)
  47822. + goto stop_same_parent;
  47823. +
  47824. + if (scan->node != neighbor
  47825. + && !reiser4_scan_goto(scan, neighbor)) {
  47826. + /* @neighbor was jput() by reiser4_scan_goto */
  47827. + goto stop_same_parent;
  47828. + }
  47829. +
  47830. + ret = scan_set_current(scan, neighbor, 1, &coord);
  47831. + if (ret != 0) {
  47832. + goto exit;
  47833. + }
  47834. +
  47835. + /* reference to @neighbor is stored in @scan, no need
  47836. + to jput(). */
  47837. + scan_index += incr;
  47838. +
  47839. + } while (incr + scan_max != scan_index);
  47840. +
  47841. + } else {
  47842. + /* Optimized case for unallocated extents, skip to the end. */
  47843. + neighbor = jlookup(tree, oid, scan_max /*index */ );
  47844. + if (neighbor == NULL) {
  47845. + /* Race with truncate */
  47846. + scan->stop = 1;
  47847. + ret = 0;
  47848. + goto exit;
  47849. + }
  47850. +
  47851. + assert("zam-1043",
  47852. + reiser4_blocknr_is_fake(jnode_get_block(neighbor)));
  47853. +
  47854. + ret = scan_set_current(scan, neighbor, scan_dist, &coord);
  47855. + if (ret != 0) {
  47856. + goto exit;
  47857. + }
  47858. + }
  47859. +
  47860. + if (coord_sideof_unit(&coord, scan->direction) == 0
  47861. + && item_is_extent(&coord)) {
  47862. + /* Continue as long as there are more extent units. */
  47863. +
  47864. + scan_index =
  47865. + extent_unit_index(&coord) +
  47866. + (reiser4_scanning_left(scan) ?
  47867. + extent_unit_width(&coord) - 1 : 0);
  47868. + goto repeat;
  47869. + }
  47870. +
  47871. + if (0) {
  47872. + stop_same_parent:
  47873. +
  47874. + /* If we are scanning left and we stop in the middle of an allocated
  47875. + extent, we know the preceder immediately.. */
  47876. + /* middle of extent is (scan_index - unit_index) != 0. */
  47877. + if (reiser4_scanning_left(scan) &&
  47878. + (scan_index - unit_index) != 0) {
  47879. + /* FIXME(B): Someone should step-through and verify that this preceder
  47880. + calculation is indeed correct. */
  47881. + /* @unit_start is starting block (number) of extent
  47882. + unit. Flush stopped at the @scan_index block from
  47883. + the beginning of the file, which is (scan_index -
  47884. + unit_index) block within extent.
  47885. + */
  47886. + if (unit_start) {
  47887. + /* skip preceder update when we are at hole */
  47888. + scan->preceder_blk =
  47889. + unit_start + scan_index - unit_index;
  47890. + check_preceder(scan->preceder_blk);
  47891. + }
  47892. + }
  47893. +
  47894. + /* In this case, we leave coord set to the parent of scan->node. */
  47895. + scan->stop = 1;
  47896. +
  47897. + } else {
  47898. + /* In this case, we are still scanning, coord is set to the next item which is
  47899. + either off-the-end of the node or not an extent. */
  47900. + assert("jmacd-8912", scan->stop == 0);
  47901. + assert("jmacd-7812",
  47902. + (coord_is_after_sideof_unit(&coord, scan->direction)
  47903. + || !item_is_extent(&coord)));
  47904. + }
  47905. +
  47906. + ret = 0;
  47907. + exit:
  47908. + return ret;
  47909. +}
  47910. +
  47911. +/**
  47912. + * When on flush time unallocated extent is to be replaced with allocated one
  47913. + * it may happen that one unallocated extent will have to be replaced with set
  47914. + * of allocated extents. In this case insert_into_item will be called which may
  47915. + * have to add new nodes into tree. Space for that is taken from inviolable
  47916. + * reserve (5%).
  47917. + */
  47918. +static reiser4_block_nr reserve_replace(void)
  47919. +{
  47920. + reiser4_block_nr grabbed, needed;
  47921. +
  47922. + grabbed = get_current_context()->grabbed_blocks;
  47923. + needed = estimate_one_insert_into_item(current_tree);
  47924. + check_me("vpf-340", !reiser4_grab_space_force(needed, BA_RESERVED));
  47925. + return grabbed;
  47926. +}
  47927. +
  47928. +static void free_replace_reserved(reiser4_block_nr grabbed)
  47929. +{
  47930. + reiser4_context *ctx;
  47931. +
  47932. + ctx = get_current_context();
  47933. + grabbed2free(ctx, get_super_private(ctx->super),
  47934. + ctx->grabbed_blocks - grabbed);
  47935. +}
  47936. +
  47937. +/* Block offset of first block addressed by unit */
  47938. +__u64 extent_unit_index(const coord_t * item)
  47939. +{
  47940. + reiser4_key key;
  47941. +
  47942. + assert("vs-648", coord_is_existing_unit(item));
  47943. + unit_key_by_coord(item, &key);
  47944. + return get_key_offset(&key) >> current_blocksize_bits;
  47945. +}
  47946. +
  47947. +/* AUDIT shouldn't return value be of reiser4_block_nr type?
  47948. + Josh's answer: who knows? Is a "number of blocks" the same type as "block offset"? */
  47949. +__u64 extent_unit_width(const coord_t * item)
  47950. +{
  47951. + assert("vs-649", coord_is_existing_unit(item));
  47952. + return width_by_coord(item);
  47953. +}
  47954. +
  47955. +/* Starting block location of this unit */
  47956. +static reiser4_block_nr extent_unit_start(const coord_t * item)
  47957. +{
  47958. + return extent_get_start(extent_by_coord(item));
  47959. +}
  47960. +
  47961. +/**
  47962. + * split_allocated_extent -
  47963. + * @coord:
  47964. + * @pos_in_unit:
  47965. + *
  47966. + * replace allocated extent with two allocated extents
  47967. + */
  47968. +int split_allocated_extent(coord_t *coord, reiser4_block_nr pos_in_unit)
  47969. +{
  47970. + int result;
  47971. + struct replace_handle *h;
  47972. + reiser4_extent *ext;
  47973. + reiser4_block_nr grabbed;
  47974. +
  47975. + ext = extent_by_coord(coord);
  47976. + assert("vs-1410", state_of_extent(ext) == ALLOCATED_EXTENT);
  47977. + assert("vs-1411", extent_get_width(ext) > pos_in_unit);
  47978. +
  47979. + h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get());
  47980. + if (h == NULL)
  47981. + return RETERR(-ENOMEM);
  47982. + h->coord = coord;
  47983. + h->lh = znode_lh(coord->node);
  47984. + h->pkey = &h->key;
  47985. + unit_key_by_coord(coord, h->pkey);
  47986. + set_key_offset(h->pkey,
  47987. + (get_key_offset(h->pkey) +
  47988. + pos_in_unit * current_blocksize));
  47989. + reiser4_set_extent(&h->overwrite, extent_get_start(ext),
  47990. + pos_in_unit);
  47991. + reiser4_set_extent(&h->new_extents[0],
  47992. + extent_get_start(ext) + pos_in_unit,
  47993. + extent_get_width(ext) - pos_in_unit);
  47994. + h->nr_new_extents = 1;
  47995. + h->flags = COPI_DONT_SHIFT_LEFT;
  47996. + h->paste_key = h->key;
  47997. +
  47998. + /* reserve space for extent unit paste, @grabbed is reserved before */
  47999. + grabbed = reserve_replace();
  48000. + result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten
  48001. + extent */);
  48002. + /* restore reserved */
  48003. + free_replace_reserved(grabbed);
  48004. + kfree(h);
  48005. + return result;
  48006. +}
  48007. +
  48008. +/* replace extent @ext by extent @replace. Try to merge @replace with previous extent of the item (if there is
  48009. + one). Return 1 if it succeeded, 0 - otherwise */
  48010. +static int try_to_merge_with_left(coord_t *coord, reiser4_extent *ext,
  48011. + reiser4_extent *replace)
  48012. +{
  48013. + assert("vs-1415", extent_by_coord(coord) == ext);
  48014. +
  48015. + if (coord->unit_pos == 0
  48016. + || state_of_extent(ext - 1) != ALLOCATED_EXTENT)
  48017. + /* @ext either does not exist or is not allocated extent */
  48018. + return 0;
  48019. + if (extent_get_start(ext - 1) + extent_get_width(ext - 1) !=
  48020. + extent_get_start(replace))
  48021. + return 0;
  48022. +
  48023. + /* we can glue, widen previous unit */
  48024. + extent_set_width(ext - 1,
  48025. + extent_get_width(ext - 1) + extent_get_width(replace));
  48026. +
  48027. + if (extent_get_width(ext) != extent_get_width(replace)) {
  48028. + /* make current extent narrower */
  48029. + if (state_of_extent(ext) == ALLOCATED_EXTENT)
  48030. + extent_set_start(ext,
  48031. + extent_get_start(ext) +
  48032. + extent_get_width(replace));
  48033. + extent_set_width(ext,
  48034. + extent_get_width(ext) -
  48035. + extent_get_width(replace));
  48036. + } else {
  48037. + /* current extent completely glued with its left neighbor, remove it */
  48038. + coord_t from, to;
  48039. +
  48040. + coord_dup(&from, coord);
  48041. + from.unit_pos = nr_units_extent(coord) - 1;
  48042. + coord_dup(&to, &from);
  48043. +
  48044. + /* currently cut from extent can cut either from the beginning or from the end. Move place which got
  48045. + freed after unit removal to end of item */
  48046. + memmove(ext, ext + 1,
  48047. + (from.unit_pos -
  48048. + coord->unit_pos) * sizeof(reiser4_extent));
  48049. + /* wipe part of item which is going to be cut, so that node_check will not be confused */
  48050. + cut_node_content(&from, &to, NULL, NULL, NULL);
  48051. + }
  48052. + znode_make_dirty(coord->node);
  48053. + /* move coord back */
  48054. + coord->unit_pos--;
  48055. + return 1;
  48056. +}
  48057. +
  48058. +/**
  48059. + * convert_extent - replace extent with 2 ones
  48060. + * @coord: coordinate of extent to be replaced
  48061. + * @replace: extent to overwrite the one @coord is set to
  48062. + *
  48063. + * Overwrites extent @coord is set to and paste one extent unit after
  48064. + * overwritten one if @replace is shorter than initial extent
  48065. + */
  48066. +int convert_extent(coord_t *coord, reiser4_extent *replace)
  48067. +{
  48068. + int result;
  48069. + struct replace_handle *h;
  48070. + reiser4_extent *ext;
  48071. + reiser4_block_nr start, width, new_width;
  48072. + reiser4_block_nr grabbed;
  48073. + extent_state state;
  48074. +
  48075. + ext = extent_by_coord(coord);
  48076. + state = state_of_extent(ext);
  48077. + start = extent_get_start(ext);
  48078. + width = extent_get_width(ext);
  48079. + new_width = extent_get_width(replace);
  48080. +
  48081. + assert("vs-1458", (state == UNALLOCATED_EXTENT ||
  48082. + state == ALLOCATED_EXTENT));
  48083. + assert("vs-1459", width >= new_width);
  48084. +
  48085. + if (try_to_merge_with_left(coord, ext, replace)) {
  48086. + /* merged @replace with left neighbor. Current unit is either
  48087. + removed or narrowed */
  48088. + return 0;
  48089. + }
  48090. +
  48091. + if (width == new_width) {
  48092. + /* replace current extent with @replace */
  48093. + *ext = *replace;
  48094. + znode_make_dirty(coord->node);
  48095. + return 0;
  48096. + }
  48097. +
  48098. + h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get());
  48099. + if (h == NULL)
  48100. + return RETERR(-ENOMEM);
  48101. + h->coord = coord;
  48102. + h->lh = znode_lh(coord->node);
  48103. + h->pkey = &h->key;
  48104. + unit_key_by_coord(coord, h->pkey);
  48105. + set_key_offset(h->pkey,
  48106. + (get_key_offset(h->pkey) + new_width * current_blocksize));
  48107. + h->overwrite = *replace;
  48108. +
  48109. + /* replace @ext with @replace and padding extent */
  48110. + reiser4_set_extent(&h->new_extents[0],
  48111. + (state == ALLOCATED_EXTENT) ?
  48112. + (start + new_width) :
  48113. + UNALLOCATED_EXTENT_START,
  48114. + width - new_width);
  48115. + h->nr_new_extents = 1;
  48116. + h->flags = COPI_DONT_SHIFT_LEFT;
  48117. + h->paste_key = h->key;
  48118. +
  48119. + /* reserve space for extent unit paste, @grabbed is reserved before */
  48120. + grabbed = reserve_replace();
  48121. + result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten
  48122. + extent */);
  48123. +
  48124. + /* restore reserved */
  48125. + free_replace_reserved(grabbed);
  48126. + kfree(h);
  48127. + return result;
  48128. +}
  48129. +
  48130. +/**
  48131. + * assign_real_blocknrs
  48132. + * @flush_pos:
  48133. + * @oid: objectid of file jnodes to assign block number to belongs to
  48134. + * @index: first jnode on the range
  48135. + * @count: number of jnodes to assign block numbers to
  48136. + * @first: start of allocated block range
  48137. + *
  48138. + * Assigns block numbers to each of @count jnodes. Index of first jnode is
  48139. + * @index. Jnodes get lookuped with jlookup.
  48140. + */
  48141. +void assign_real_blocknrs(flush_pos_t *flush_pos, oid_t oid,
  48142. + unsigned long index, reiser4_block_nr count,
  48143. + reiser4_block_nr first)
  48144. +{
  48145. + unsigned long i;
  48146. + reiser4_tree *tree;
  48147. + txn_atom *atom;
  48148. + int nr;
  48149. +
  48150. + atom = atom_locked_by_fq(flush_pos->fq);
  48151. + assert("vs-1468", atom);
  48152. + BUG_ON(atom == NULL);
  48153. +
  48154. + nr = 0;
  48155. + tree = current_tree;
  48156. + for (i = 0; i < count; ++i, ++index) {
  48157. + jnode *node;
  48158. +
  48159. + node = jlookup(tree, oid, index);
  48160. + assert("", node != NULL);
  48161. + BUG_ON(node == NULL);
  48162. +
  48163. + spin_lock_jnode(node);
  48164. + assert("", !jnode_is_flushprepped(node));
  48165. + assert("vs-1475", node->atom == atom);
  48166. + assert("vs-1476", atomic_read(&node->x_count) > 0);
  48167. +
  48168. + JF_CLR(node, JNODE_FLUSH_RESERVED);
  48169. + jnode_set_block(node, &first);
  48170. + unformatted_make_reloc(node, flush_pos->fq);
  48171. + ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node),
  48172. + FQ_LIST, 0));
  48173. + spin_unlock_jnode(node);
  48174. + first++;
  48175. +
  48176. + atomic_dec(&node->x_count);
  48177. + nr ++;
  48178. + }
  48179. +
  48180. + spin_unlock_atom(atom);
  48181. + return;
  48182. +}
  48183. +
  48184. +/**
  48185. + * allocated_extent_slum_size
  48186. + * @flush_pos:
  48187. + * @oid:
  48188. + * @index:
  48189. + * @count:
  48190. + *
  48191. + *
  48192. + */
  48193. +int allocated_extent_slum_size(flush_pos_t *flush_pos, oid_t oid,
  48194. + unsigned long index, unsigned long count)
  48195. +{
  48196. + unsigned long i;
  48197. + reiser4_tree *tree;
  48198. + txn_atom *atom;
  48199. + int nr;
  48200. +
  48201. + atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos));
  48202. + assert("vs-1468", atom);
  48203. +
  48204. + nr = 0;
  48205. + tree = current_tree;
  48206. + for (i = 0; i < count; ++i, ++index) {
  48207. + jnode *node;
  48208. +
  48209. + node = jlookup(tree, oid, index);
  48210. + if (!node)
  48211. + break;
  48212. +
  48213. + if (jnode_check_flushprepped(node)) {
  48214. + atomic_dec(&node->x_count);
  48215. + break;
  48216. + }
  48217. +
  48218. + if (node->atom != atom) {
  48219. + /*
  48220. + * this is possible on overwrite: extent_write may
  48221. + * capture several unformatted nodes without capturing
  48222. + * any formatted nodes.
  48223. + */
  48224. + atomic_dec(&node->x_count);
  48225. + break;
  48226. + }
  48227. +
  48228. + assert("vs-1476", atomic_read(&node->x_count) > 1);
  48229. + atomic_dec(&node->x_count);
  48230. + nr ++;
  48231. + }
  48232. +
  48233. + spin_unlock_atom(atom);
  48234. + return nr;
  48235. +}
  48236. +
  48237. +/* if @key is glueable to the item @coord is set to */
  48238. +static int must_insert(const coord_t *coord, const reiser4_key *key)
  48239. +{
  48240. + reiser4_key last;
  48241. +
  48242. + if (item_id_by_coord(coord) == EXTENT_POINTER_ID
  48243. + && keyeq(append_key_extent(coord, &last), key))
  48244. + return 0;
  48245. + return 1;
  48246. +}
  48247. +
  48248. +/**
  48249. + * copy extent @copy to the end of @node.
  48250. + * It may have to either insert new item after the last one,
  48251. + * or append last item, or modify last unit of last item to have
  48252. + * greater width
  48253. + */
  48254. +int put_unit_to_end(znode *node,
  48255. + const reiser4_key *key, reiser4_extent *copy_ext)
  48256. +{
  48257. + int result;
  48258. + coord_t coord;
  48259. + cop_insert_flag flags;
  48260. + reiser4_extent *last_ext;
  48261. + reiser4_item_data data;
  48262. +
  48263. + /* set coord after last unit in an item */
  48264. + coord_init_last_unit(&coord, node);
  48265. + coord.between = AFTER_UNIT;
  48266. +
  48267. + flags =
  48268. + COPI_DONT_SHIFT_LEFT | COPI_DONT_SHIFT_RIGHT | COPI_DONT_ALLOCATE;
  48269. + if (must_insert(&coord, key)) {
  48270. + result =
  48271. + insert_by_coord(&coord, init_new_extent(&data, copy_ext, 1),
  48272. + key, NULL /*lh */ , flags);
  48273. +
  48274. + } else {
  48275. + /* try to glue with last unit */
  48276. + last_ext = extent_by_coord(&coord);
  48277. + if (state_of_extent(last_ext) &&
  48278. + extent_get_start(last_ext) + extent_get_width(last_ext) ==
  48279. + extent_get_start(copy_ext)) {
  48280. + /* widen last unit of node */
  48281. + extent_set_width(last_ext,
  48282. + extent_get_width(last_ext) +
  48283. + extent_get_width(copy_ext));
  48284. + znode_make_dirty(node);
  48285. + return 0;
  48286. + }
  48287. +
  48288. + /* FIXME: put an assertion here that we can not merge last unit in @node and new unit */
  48289. + result =
  48290. + insert_into_item(&coord, NULL /*lh */ , key,
  48291. + init_new_extent(&data, copy_ext, 1),
  48292. + flags);
  48293. + }
  48294. +
  48295. + assert("vs-438", result == 0 || result == -E_NODE_FULL);
  48296. + return result;
  48297. +}
  48298. +
  48299. +int key_by_offset_extent(struct inode *inode, loff_t off, reiser4_key * key)
  48300. +{
  48301. + return key_by_inode_and_offset_common(inode, off, key);
  48302. +}
  48303. +
  48304. +/*
  48305. + * Local variables:
  48306. + * c-indentation-style: "K&R"
  48307. + * mode-name: "LC"
  48308. + * c-basic-offset: 8
  48309. + * tab-width: 8
  48310. + * fill-column: 79
  48311. + * scroll-step: 1
  48312. + * End:
  48313. + */
  48314. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/extent.h linux-5.16.14/fs/reiser4/plugin/item/extent.h
  48315. --- linux-5.16.14.orig/fs/reiser4/plugin/item/extent.h 1970-01-01 01:00:00.000000000 +0100
  48316. +++ linux-5.16.14/fs/reiser4/plugin/item/extent.h 2022-03-12 13:26:19.677892789 +0100
  48317. @@ -0,0 +1,231 @@
  48318. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  48319. +
  48320. +#ifndef __REISER4_EXTENT_H__
  48321. +#define __REISER4_EXTENT_H__
  48322. +
  48323. +/* on disk extent */
  48324. +typedef struct {
  48325. + reiser4_dblock_nr start;
  48326. + reiser4_dblock_nr width;
  48327. +} reiser4_extent;
  48328. +
  48329. +struct extent_stat {
  48330. + int unallocated_units;
  48331. + int unallocated_blocks;
  48332. + int allocated_units;
  48333. + int allocated_blocks;
  48334. + int hole_units;
  48335. + int hole_blocks;
  48336. +};
  48337. +
  48338. +/* extents in an extent item can be either holes, or unallocated or allocated
  48339. + extents */
  48340. +typedef enum {
  48341. + HOLE_EXTENT,
  48342. + UNALLOCATED_EXTENT,
  48343. + ALLOCATED_EXTENT
  48344. +} extent_state;
  48345. +
  48346. +#define HOLE_EXTENT_START 0
  48347. +#define UNALLOCATED_EXTENT_START 1
  48348. +#define UNALLOCATED_EXTENT_START2 2
  48349. +
  48350. +struct extent_coord_extension {
  48351. + reiser4_block_nr pos_in_unit;
  48352. + reiser4_block_nr width; /* width of current unit */
  48353. + pos_in_node_t nr_units; /* number of units */
  48354. + int ext_offset; /* offset from the beginning of zdata() */
  48355. + unsigned long expected_page;
  48356. +#if REISER4_DEBUG
  48357. + reiser4_extent extent;
  48358. +#endif
  48359. +};
  48360. +
  48361. +/* macros to set/get fields of on-disk extent */
  48362. +static inline reiser4_block_nr extent_get_start(const reiser4_extent * ext)
  48363. +{
  48364. + return le64_to_cpu(ext->start);
  48365. +}
  48366. +
  48367. +static inline reiser4_block_nr extent_get_width(const reiser4_extent * ext)
  48368. +{
  48369. + return le64_to_cpu(ext->width);
  48370. +}
  48371. +
  48372. +extern __u64 reiser4_current_block_count(void);
  48373. +
  48374. +static inline void
  48375. +extent_set_start(reiser4_extent * ext, reiser4_block_nr start)
  48376. +{
  48377. + static_assert(sizeof(ext->start) == 8);
  48378. + assert("nikita-2510",
  48379. + ergo(start > 1, start < reiser4_current_block_count()));
  48380. + put_unaligned(cpu_to_le64(start), &ext->start);
  48381. +}
  48382. +
  48383. +static inline void
  48384. +extent_set_width(reiser4_extent * ext, reiser4_block_nr width)
  48385. +{
  48386. + static_assert(sizeof(ext->width) == 8);
  48387. + assert("", width > 0);
  48388. + put_unaligned(cpu_to_le64(width), &ext->width);
  48389. + assert("nikita-2511",
  48390. + ergo(extent_get_start(ext) > 1,
  48391. + extent_get_start(ext) + width <=
  48392. + reiser4_current_block_count()));
  48393. +}
  48394. +
  48395. +#define extent_item(coord) \
  48396. +({ \
  48397. + assert("nikita-3143", item_is_extent(coord)); \
  48398. + ((reiser4_extent *)item_body_by_coord (coord)); \
  48399. +})
  48400. +
  48401. +#define extent_by_coord(coord) \
  48402. +({ \
  48403. + assert("nikita-3144", item_is_extent(coord)); \
  48404. + (extent_item (coord) + (coord)->unit_pos); \
  48405. +})
  48406. +
  48407. +#define width_by_coord(coord) \
  48408. +({ \
  48409. + assert("nikita-3145", item_is_extent(coord)); \
  48410. + extent_get_width (extent_by_coord(coord)); \
  48411. +})
  48412. +
  48413. +struct carry_cut_data;
  48414. +struct carry_kill_data;
  48415. +
  48416. +/* plugin->u.item.b.* */
  48417. +reiser4_key *max_key_inside_extent(const coord_t *, reiser4_key *);
  48418. +int can_contain_key_extent(const coord_t * coord, const reiser4_key * key,
  48419. + const reiser4_item_data *);
  48420. +int mergeable_extent(const coord_t * p1, const coord_t * p2);
  48421. +pos_in_node_t nr_units_extent(const coord_t *);
  48422. +lookup_result lookup_extent(const reiser4_key *, lookup_bias, coord_t *);
  48423. +void init_coord_extent(coord_t *);
  48424. +int init_extent(coord_t *, reiser4_item_data *);
  48425. +int paste_extent(coord_t *, reiser4_item_data *, carry_plugin_info *);
  48426. +int can_shift_extent(unsigned free_space,
  48427. + coord_t * source, znode * target, shift_direction,
  48428. + unsigned *size, unsigned want);
  48429. +void copy_units_extent(coord_t * target, coord_t * source, unsigned from,
  48430. + unsigned count, shift_direction where_is_free_space,
  48431. + unsigned free_space);
  48432. +int kill_hook_extent(const coord_t *, pos_in_node_t from, pos_in_node_t count,
  48433. + struct carry_kill_data *);
  48434. +int create_hook_extent(const coord_t * coord, void *arg);
  48435. +int cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  48436. + struct carry_cut_data *, reiser4_key * smallest_removed,
  48437. + reiser4_key * new_first);
  48438. +int kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  48439. + struct carry_kill_data *, reiser4_key * smallest_removed,
  48440. + reiser4_key * new_first);
  48441. +reiser4_key *unit_key_extent(const coord_t *, reiser4_key *);
  48442. +reiser4_key *max_unit_key_extent(const coord_t *, reiser4_key *);
  48443. +void print_extent(const char *, coord_t *);
  48444. +int utmost_child_extent(const coord_t * coord, sideof side, jnode ** child);
  48445. +int utmost_child_real_block_extent(const coord_t * coord, sideof side,
  48446. + reiser4_block_nr * block);
  48447. +void item_stat_extent(const coord_t * coord, void *vp);
  48448. +int reiser4_check_extent(const coord_t * coord, const char **error);
  48449. +
  48450. +/* plugin->u.item.s.file.* */
  48451. +ssize_t reiser4_write_extent(struct file *, struct inode * inode,
  48452. + const char __user *, size_t, loff_t *);
  48453. +int reiser4_read_extent(flow_t *, hint_t *, struct kiocb *, struct iov_iter *);
  48454. +int reiser4_readpage_extent(void *, struct page *);
  48455. +int reiser4_do_readpage_extent(reiser4_extent*, reiser4_block_nr, struct page*);
  48456. +reiser4_key *append_key_extent(const coord_t *, reiser4_key *);
  48457. +void init_coord_extension_extent(uf_coord_t *, loff_t offset);
  48458. +int get_block_address_extent(const coord_t *, sector_t block,
  48459. + sector_t * result);
  48460. +
  48461. +/* these are used in flush.c
  48462. + FIXME-VS: should they be somewhere in item_plugin? */
  48463. +int allocate_extent_item_in_place(coord_t *, lock_handle *, flush_pos_t * pos);
  48464. +int allocate_and_copy_extent(znode * left, coord_t * right, flush_pos_t * pos,
  48465. + reiser4_key * stop_key);
  48466. +
  48467. +int extent_is_unallocated(const coord_t * item); /* True if this extent is unallocated (i.e., not a hole, not allocated). */
  48468. +__u64 extent_unit_index(const coord_t * item); /* Block offset of this unit. */
  48469. +__u64 extent_unit_width(const coord_t * item); /* Number of blocks in this unit. */
  48470. +
  48471. +/* plugin->u.item.f. */
  48472. +int reiser4_scan_extent(flush_scan * scan);
  48473. +extern int key_by_offset_extent(struct inode *, loff_t, reiser4_key *);
  48474. +
  48475. +reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit,
  48476. + int nr_extents);
  48477. +reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr);
  48478. +extent_state state_of_extent(reiser4_extent * ext);
  48479. +void reiser4_set_extent(reiser4_extent *, reiser4_block_nr start,
  48480. + reiser4_block_nr width);
  48481. +int reiser4_update_extent(struct inode *, jnode *, loff_t pos,
  48482. + int *plugged_hole);
  48483. +
  48484. +#include "../../coord.h"
  48485. +#include "../../lock.h"
  48486. +#include "../../tap.h"
  48487. +
  48488. +struct replace_handle {
  48489. + /* these are to be set before calling reiser4_replace_extent */
  48490. + coord_t *coord;
  48491. + lock_handle *lh;
  48492. + reiser4_key key;
  48493. + reiser4_key *pkey;
  48494. + reiser4_extent overwrite;
  48495. + reiser4_extent new_extents[2];
  48496. + int nr_new_extents;
  48497. + unsigned flags;
  48498. +
  48499. + /* these are used by reiser4_replace_extent */
  48500. + reiser4_item_data item;
  48501. + coord_t coord_after;
  48502. + lock_handle lh_after;
  48503. + tap_t watch;
  48504. + reiser4_key paste_key;
  48505. +#if REISER4_DEBUG
  48506. + reiser4_extent orig_ext;
  48507. + reiser4_key tmp;
  48508. +#endif
  48509. +};
  48510. +
  48511. +/* this structure is kmalloced before calling make_extent to avoid excessive
  48512. + stack consumption on plug_hole->reiser4_replace_extent */
  48513. +struct make_extent_handle {
  48514. + uf_coord_t *uf_coord;
  48515. + reiser4_block_nr blocknr;
  48516. + int created;
  48517. + struct inode *inode;
  48518. + union {
  48519. + struct {
  48520. + } append;
  48521. + struct replace_handle replace;
  48522. + } u;
  48523. +};
  48524. +
  48525. +int reiser4_replace_extent(struct replace_handle *,
  48526. + int return_inserted_position);
  48527. +lock_handle *znode_lh(znode *);
  48528. +
  48529. +/* the reiser4 repacker support */
  48530. +struct repacker_cursor;
  48531. +extern int process_extent_backward_for_repacking(tap_t *,
  48532. + struct repacker_cursor *);
  48533. +extern int mark_extent_for_repacking(tap_t *, int);
  48534. +
  48535. +#define coord_by_uf_coord(uf_coord) (&((uf_coord)->coord))
  48536. +#define ext_coord_by_uf_coord(uf_coord) (&((uf_coord)->extension.extent))
  48537. +
  48538. +/* __REISER4_EXTENT_H__ */
  48539. +#endif
  48540. +/*
  48541. + Local variables:
  48542. + c-indentation-style: "K&R"
  48543. + mode-name: "LC"
  48544. + c-basic-offset: 8
  48545. + tab-width: 8
  48546. + fill-column: 120
  48547. + End:
  48548. +*/
  48549. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/extent_item_ops.c linux-5.16.14/fs/reiser4/plugin/item/extent_item_ops.c
  48550. --- linux-5.16.14.orig/fs/reiser4/plugin/item/extent_item_ops.c 1970-01-01 01:00:00.000000000 +0100
  48551. +++ linux-5.16.14/fs/reiser4/plugin/item/extent_item_ops.c 2022-03-12 13:26:19.679892794 +0100
  48552. @@ -0,0 +1,888 @@
  48553. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  48554. +
  48555. +#include "item.h"
  48556. +#include "../../inode.h"
  48557. +#include "../../tree_walk.h" /* check_sibling_list() */
  48558. +#include "../../page_cache.h"
  48559. +#include "../../carry.h"
  48560. +
  48561. +/* item_plugin->b.max_key_inside */
  48562. +reiser4_key *max_key_inside_extent(const coord_t * coord, reiser4_key * key)
  48563. +{
  48564. + item_key_by_coord(coord, key);
  48565. + set_key_offset(key, get_key_offset(reiser4_max_key()));
  48566. + return key;
  48567. +}
  48568. +
  48569. +/* item_plugin->b.can_contain_key
  48570. + this checks whether @key of @data is matching to position set by @coord */
  48571. +int
  48572. +can_contain_key_extent(const coord_t * coord, const reiser4_key * key,
  48573. + const reiser4_item_data * data)
  48574. +{
  48575. + reiser4_key item_key;
  48576. +
  48577. + if (item_plugin_by_coord(coord) != data->iplug)
  48578. + return 0;
  48579. +
  48580. + item_key_by_coord(coord, &item_key);
  48581. + if (get_key_locality(key) != get_key_locality(&item_key) ||
  48582. + get_key_objectid(key) != get_key_objectid(&item_key) ||
  48583. + get_key_ordering(key) != get_key_ordering(&item_key))
  48584. + return 0;
  48585. +
  48586. + return 1;
  48587. +}
  48588. +
  48589. +/* item_plugin->b.mergeable
  48590. + first item is of extent type */
  48591. +/* Audited by: green(2002.06.13) */
  48592. +int mergeable_extent(const coord_t * p1, const coord_t * p2)
  48593. +{
  48594. + reiser4_key key1, key2;
  48595. +
  48596. + assert("vs-299", item_id_by_coord(p1) == EXTENT_POINTER_ID);
  48597. + /* FIXME-VS: Which is it? Assert or return 0 */
  48598. + if (item_id_by_coord(p2) != EXTENT_POINTER_ID) {
  48599. + return 0;
  48600. + }
  48601. +
  48602. + item_key_by_coord(p1, &key1);
  48603. + item_key_by_coord(p2, &key2);
  48604. + if (get_key_locality(&key1) != get_key_locality(&key2) ||
  48605. + get_key_objectid(&key1) != get_key_objectid(&key2) ||
  48606. + get_key_ordering(&key1) != get_key_ordering(&key2) ||
  48607. + get_key_type(&key1) != get_key_type(&key2))
  48608. + return 0;
  48609. + if (get_key_offset(&key1) +
  48610. + reiser4_extent_size(p1, nr_units_extent(p1)) !=
  48611. + get_key_offset(&key2))
  48612. + return 0;
  48613. + return 1;
  48614. +}
  48615. +
  48616. +/* item_plugin->b.nr_units */
  48617. +pos_in_node_t nr_units_extent(const coord_t * coord)
  48618. +{
  48619. + /* length of extent item has to be multiple of extent size */
  48620. + assert("vs-1424",
  48621. + (item_length_by_coord(coord) % sizeof(reiser4_extent)) == 0);
  48622. + return item_length_by_coord(coord) / sizeof(reiser4_extent);
  48623. +}
  48624. +
  48625. +/* item_plugin->b.lookup */
  48626. +lookup_result
  48627. +lookup_extent(const reiser4_key * key, lookup_bias bias UNUSED_ARG,
  48628. + coord_t * coord)
  48629. +{ /* znode and item_pos are
  48630. + set to an extent item to
  48631. + look through */
  48632. + reiser4_key item_key;
  48633. + reiser4_block_nr lookuped, offset;
  48634. + unsigned i, nr_units;
  48635. + reiser4_extent *ext;
  48636. + unsigned blocksize;
  48637. + unsigned char blocksize_bits;
  48638. +
  48639. + item_key_by_coord(coord, &item_key);
  48640. + offset = get_key_offset(&item_key);
  48641. +
  48642. + /* key we are looking for must be greater than key of item @coord */
  48643. + assert("vs-414", keygt(key, &item_key));
  48644. +
  48645. + assert("umka-99945",
  48646. + !keygt(key, max_key_inside_extent(coord, &item_key)));
  48647. +
  48648. + ext = extent_item(coord);
  48649. + assert("vs-1350", (char *)ext == (zdata(coord->node) + coord->offset));
  48650. +
  48651. + blocksize = current_blocksize;
  48652. + blocksize_bits = current_blocksize_bits;
  48653. +
  48654. + /* offset we are looking for */
  48655. + lookuped = get_key_offset(key);
  48656. +
  48657. + nr_units = nr_units_extent(coord);
  48658. + /* go through all extents until the one which address given offset */
  48659. + for (i = 0; i < nr_units; i++, ext++) {
  48660. + offset += (extent_get_width(ext) << blocksize_bits);
  48661. + if (offset > lookuped) {
  48662. + /* desired byte is somewhere in this extent */
  48663. + coord->unit_pos = i;
  48664. + coord->between = AT_UNIT;
  48665. + return CBK_COORD_FOUND;
  48666. + }
  48667. + }
  48668. +
  48669. + /* set coord after last unit */
  48670. + coord->unit_pos = nr_units - 1;
  48671. + coord->between = AFTER_UNIT;
  48672. + return CBK_COORD_FOUND;
  48673. +}
  48674. +
  48675. +/* item_plugin->b.paste
  48676. + item @coord is set to has been appended with @data->length of free
  48677. + space. data->data contains data to be pasted into the item in position
  48678. + @coord->in_item.unit_pos. It must fit into that free space.
  48679. + @coord must be set between units.
  48680. +*/
  48681. +int
  48682. +paste_extent(coord_t * coord, reiser4_item_data * data,
  48683. + carry_plugin_info * info UNUSED_ARG)
  48684. +{
  48685. + unsigned old_nr_units;
  48686. + reiser4_extent *ext;
  48687. + int item_length;
  48688. +
  48689. + ext = extent_item(coord);
  48690. + item_length = item_length_by_coord(coord);
  48691. + old_nr_units = (item_length - data->length) / sizeof(reiser4_extent);
  48692. +
  48693. + /* this is also used to copy extent into newly created item, so
  48694. + old_nr_units could be 0 */
  48695. + assert("vs-260", item_length >= data->length);
  48696. +
  48697. + /* make sure that coord is set properly */
  48698. + assert("vs-35",
  48699. + ((!coord_is_existing_unit(coord))
  48700. + || (!old_nr_units && !coord->unit_pos)));
  48701. +
  48702. + /* first unit to be moved */
  48703. + switch (coord->between) {
  48704. + case AFTER_UNIT:
  48705. + coord->unit_pos++;
  48706. + fallthrough;
  48707. + case BEFORE_UNIT:
  48708. + coord->between = AT_UNIT;
  48709. + break;
  48710. + case AT_UNIT:
  48711. + assert("vs-331", !old_nr_units && !coord->unit_pos);
  48712. + break;
  48713. + default:
  48714. + impossible("vs-330", "coord is set improperly");
  48715. + }
  48716. +
  48717. + /* prepare space for new units */
  48718. + memmove(ext + coord->unit_pos + data->length / sizeof(reiser4_extent),
  48719. + ext + coord->unit_pos,
  48720. + (old_nr_units - coord->unit_pos) * sizeof(reiser4_extent));
  48721. +
  48722. + /* copy new data from kernel space */
  48723. + assert("vs-556", data->user == 0);
  48724. + memcpy(ext + coord->unit_pos, data->data, (unsigned)data->length);
  48725. +
  48726. + /* after paste @coord is set to first of pasted units */
  48727. + assert("vs-332", coord_is_existing_unit(coord));
  48728. + assert("vs-333",
  48729. + !memcmp(data->data, extent_by_coord(coord),
  48730. + (unsigned)data->length));
  48731. + return 0;
  48732. +}
  48733. +
  48734. +/* item_plugin->b.can_shift */
  48735. +int
  48736. +can_shift_extent(unsigned free_space, coord_t * source,
  48737. + znode * target UNUSED_ARG, shift_direction pend UNUSED_ARG,
  48738. + unsigned *size, unsigned want)
  48739. +{
  48740. + *size = item_length_by_coord(source);
  48741. + if (*size > free_space)
  48742. + /* never split a unit of extent item */
  48743. + *size = free_space - free_space % sizeof(reiser4_extent);
  48744. +
  48745. + /* we can shift *size bytes, calculate how many do we want to shift */
  48746. + if (*size > want * sizeof(reiser4_extent))
  48747. + *size = want * sizeof(reiser4_extent);
  48748. +
  48749. + if (*size % sizeof(reiser4_extent) != 0)
  48750. + impossible("vs-119", "Wrong extent size: %i %zd", *size,
  48751. + sizeof(reiser4_extent));
  48752. + return *size / sizeof(reiser4_extent);
  48753. +
  48754. +}
  48755. +
  48756. +/* item_plugin->b.copy_units */
  48757. +void
  48758. +copy_units_extent(coord_t * target, coord_t * source,
  48759. + unsigned from, unsigned count,
  48760. + shift_direction where_is_free_space, unsigned free_space)
  48761. +{
  48762. + char *from_ext, *to_ext;
  48763. +
  48764. + assert("vs-217", free_space == count * sizeof(reiser4_extent));
  48765. +
  48766. + from_ext = item_body_by_coord(source);
  48767. + to_ext = item_body_by_coord(target);
  48768. +
  48769. + if (where_is_free_space == SHIFT_LEFT) {
  48770. + assert("vs-215", from == 0);
  48771. +
  48772. + /* At this moment, item length was already updated in the item
  48773. + header by shifting code, hence nr_units_extent() will
  48774. + return "new" number of units---one we obtain after copying
  48775. + units.
  48776. + */
  48777. + to_ext +=
  48778. + (nr_units_extent(target) - count) * sizeof(reiser4_extent);
  48779. + } else {
  48780. + reiser4_key key;
  48781. + coord_t coord;
  48782. +
  48783. + assert("vs-216",
  48784. + from + count == coord_last_unit_pos(source) + 1);
  48785. +
  48786. + from_ext += item_length_by_coord(source) - free_space;
  48787. +
  48788. + /* new units are inserted before first unit in an item,
  48789. + therefore, we have to update item key */
  48790. + coord = *source;
  48791. + coord.unit_pos = from;
  48792. + unit_key_extent(&coord, &key);
  48793. +
  48794. + node_plugin_by_node(target->node)->update_item_key(target, &key,
  48795. + NULL /*info */);
  48796. + }
  48797. +
  48798. + memcpy(to_ext, from_ext, free_space);
  48799. +}
  48800. +
  48801. +/* item_plugin->b.create_hook
  48802. + @arg is znode of leaf node for which we need to update right delimiting key */
  48803. +int create_hook_extent(const coord_t * coord, void *arg)
  48804. +{
  48805. + coord_t *child_coord;
  48806. + znode *node;
  48807. + reiser4_key key;
  48808. + reiser4_tree *tree;
  48809. +
  48810. + if (!arg)
  48811. + return 0;
  48812. +
  48813. + child_coord = arg;
  48814. + tree = znode_get_tree(coord->node);
  48815. +
  48816. + assert("nikita-3246", znode_get_level(child_coord->node) == LEAF_LEVEL);
  48817. +
  48818. + write_lock_tree(tree);
  48819. + write_lock_dk(tree);
  48820. + /* find a node on the left level for which right delimiting key has to
  48821. + be updated */
  48822. + if (coord_wrt(child_coord) == COORD_ON_THE_LEFT) {
  48823. + assert("vs-411", znode_is_left_connected(child_coord->node));
  48824. + node = child_coord->node->left;
  48825. + } else {
  48826. + assert("vs-412", coord_wrt(child_coord) == COORD_ON_THE_RIGHT);
  48827. + node = child_coord->node;
  48828. + assert("nikita-3314", node != NULL);
  48829. + }
  48830. +
  48831. + if (node != NULL) {
  48832. + znode_set_rd_key(node, item_key_by_coord(coord, &key));
  48833. +
  48834. + assert("nikita-3282", check_sibling_list(node));
  48835. + /* break sibling links */
  48836. + if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && node->right) {
  48837. + ON_DEBUG(node->right->left_version =
  48838. + atomic_inc_return(&delim_key_version);
  48839. + node->right_version =
  48840. + atomic_inc_return(&delim_key_version););
  48841. +
  48842. + node->right->left = NULL;
  48843. + node->right = NULL;
  48844. + }
  48845. + }
  48846. + write_unlock_dk(tree);
  48847. + write_unlock_tree(tree);
  48848. + return 0;
  48849. +}
  48850. +
  48851. +#define ITEM_TAIL_KILLED 0
  48852. +#define ITEM_HEAD_KILLED 1
  48853. +#define ITEM_KILLED 2
  48854. +
  48855. +/* item_plugin->b.kill_hook
  48856. + this is called when @count units starting from @from-th one are going to be removed
  48857. + */
  48858. +int
  48859. +kill_hook_extent(const coord_t * coord, pos_in_node_t from, pos_in_node_t count,
  48860. + struct carry_kill_data *kdata)
  48861. +{
  48862. + reiser4_extent *ext;
  48863. + reiser4_block_nr start, length;
  48864. + const reiser4_key *pfrom_key, *pto_key;
  48865. + struct inode *inode;
  48866. + reiser4_tree *tree;
  48867. + pgoff_t from_off, to_off, offset, skip;
  48868. + int retval;
  48869. +
  48870. + /* these are located in memory kmalloc-ed by kill_node_content */
  48871. + reiser4_key *min_item_key, *max_item_key, *from_key, *to_key, *key;
  48872. + coord_t *dup, *next;
  48873. +
  48874. + assert("zam-811", znode_is_write_locked(coord->node));
  48875. + assert("nikita-3315", kdata != NULL);
  48876. + assert("vs-34", kdata->buf != NULL);
  48877. +
  48878. + /* map structures to kdata->buf */
  48879. + min_item_key = (reiser4_key *) (kdata->buf);
  48880. + max_item_key = min_item_key + 1;
  48881. + from_key = max_item_key + 1;
  48882. + to_key = from_key + 1;
  48883. + key = to_key + 1;
  48884. + dup = (coord_t *) (key + 1);
  48885. + next = dup + 1;
  48886. +
  48887. + item_key_by_coord(coord, min_item_key);
  48888. + max_item_key_by_coord(coord, max_item_key);
  48889. +
  48890. + if (kdata->params.from_key) {
  48891. + pfrom_key = kdata->params.from_key;
  48892. + pto_key = kdata->params.to_key;
  48893. + } else {
  48894. + assert("vs-1549", from == coord->unit_pos);
  48895. + unit_key_by_coord(coord, from_key);
  48896. + pfrom_key = from_key;
  48897. +
  48898. + coord_dup(dup, coord);
  48899. + dup->unit_pos = from + count - 1;
  48900. + max_unit_key_by_coord(dup, to_key);
  48901. + pto_key = to_key;
  48902. + }
  48903. +
  48904. + if (!keylt(pto_key, max_item_key)) {
  48905. + if (!keygt(pfrom_key, min_item_key)) {
  48906. + znode *left, *right;
  48907. +
  48908. + /* item is to be removed completely */
  48909. + assert("nikita-3316", kdata->left != NULL
  48910. + && kdata->right != NULL);
  48911. +
  48912. + left = kdata->left->node;
  48913. + right = kdata->right->node;
  48914. +
  48915. + tree = current_tree;
  48916. + /* we have to do two things:
  48917. + *
  48918. + * 1. link left and right formatted neighbors of
  48919. + * extent being removed, and
  48920. + *
  48921. + * 2. update their delimiting keys.
  48922. + *
  48923. + * atomicity of these operations is protected by
  48924. + * taking dk-lock and tree-lock.
  48925. + */
  48926. + /* if neighbors of item being removed are znodes -
  48927. + * link them */
  48928. + write_lock_tree(tree);
  48929. + write_lock_dk(tree);
  48930. + link_left_and_right(left, right);
  48931. + if (left) {
  48932. + /* update right delimiting key of left
  48933. + * neighbor of extent item */
  48934. + /*coord_t next;
  48935. + reiser4_key key; */
  48936. +
  48937. + coord_dup(next, coord);
  48938. +
  48939. + if (coord_next_item(next))
  48940. + *key = *znode_get_rd_key(coord->node);
  48941. + else
  48942. + item_key_by_coord(next, key);
  48943. + znode_set_rd_key(left, key);
  48944. + }
  48945. + write_unlock_dk(tree);
  48946. + write_unlock_tree(tree);
  48947. +
  48948. + from_off =
  48949. + get_key_offset(min_item_key) >> PAGE_SHIFT;
  48950. + to_off =
  48951. + (get_key_offset(max_item_key) +
  48952. + 1) >> PAGE_SHIFT;
  48953. + retval = ITEM_KILLED;
  48954. + } else {
  48955. + /* tail of item is to be removed */
  48956. + from_off =
  48957. + (get_key_offset(pfrom_key) + PAGE_SIZE -
  48958. + 1) >> PAGE_SHIFT;
  48959. + to_off =
  48960. + (get_key_offset(max_item_key) +
  48961. + 1) >> PAGE_SHIFT;
  48962. + retval = ITEM_TAIL_KILLED;
  48963. + }
  48964. + } else {
  48965. + /* head of item is to be removed */
  48966. + assert("vs-1571", keyeq(pfrom_key, min_item_key));
  48967. + assert("vs-1572",
  48968. + (get_key_offset(pfrom_key) & (PAGE_SIZE - 1)) ==
  48969. + 0);
  48970. + assert("vs-1573",
  48971. + ((get_key_offset(pto_key) + 1) & (PAGE_SIZE -
  48972. + 1)) == 0);
  48973. +
  48974. + if (kdata->left->node) {
  48975. + /* update right delimiting key of left neighbor of extent item */
  48976. + /*reiser4_key key; */
  48977. +
  48978. + *key = *pto_key;
  48979. + set_key_offset(key, get_key_offset(pto_key) + 1);
  48980. +
  48981. + write_lock_dk(current_tree);
  48982. + znode_set_rd_key(kdata->left->node, key);
  48983. + write_unlock_dk(current_tree);
  48984. + }
  48985. +
  48986. + from_off = get_key_offset(pfrom_key) >> PAGE_SHIFT;
  48987. + to_off = (get_key_offset(pto_key) + 1) >> PAGE_SHIFT;
  48988. + retval = ITEM_HEAD_KILLED;
  48989. + }
  48990. +
  48991. + inode = kdata->inode;
  48992. + assert("vs-1545", inode != NULL);
  48993. + if (inode != NULL)
  48994. + /* take care of pages and jnodes corresponding to part of item being killed */
  48995. + reiser4_invalidate_pages(inode->i_mapping, from_off,
  48996. + to_off - from_off,
  48997. + kdata->params.truncate);
  48998. +
  48999. + ext = extent_item(coord) + from;
  49000. + offset =
  49001. + (get_key_offset(min_item_key) +
  49002. + reiser4_extent_size(coord, from)) >> PAGE_SHIFT;
  49003. +
  49004. + assert("vs-1551", from_off >= offset);
  49005. + assert("vs-1552", from_off - offset <= extent_get_width(ext));
  49006. + skip = from_off - offset;
  49007. + offset = from_off;
  49008. +
  49009. + while (offset < to_off) {
  49010. + length = extent_get_width(ext) - skip;
  49011. + if (state_of_extent(ext) == HOLE_EXTENT) {
  49012. + skip = 0;
  49013. + offset += length;
  49014. + ext++;
  49015. + continue;
  49016. + }
  49017. +
  49018. + if (offset + length > to_off) {
  49019. + length = to_off - offset;
  49020. + }
  49021. +
  49022. + inode_sub_blocks(inode, length);
  49023. +
  49024. + if (state_of_extent(ext) == UNALLOCATED_EXTENT) {
  49025. + /* some jnodes corresponding to this unallocated extent */
  49026. + fake_allocated2free(length, 0 /* unformatted */ );
  49027. +
  49028. + skip = 0;
  49029. + offset += length;
  49030. + ext++;
  49031. + continue;
  49032. + }
  49033. +
  49034. + assert("vs-1218", state_of_extent(ext) == ALLOCATED_EXTENT);
  49035. +
  49036. + if (length != 0) {
  49037. + start = extent_get_start(ext) + skip;
  49038. +
  49039. + /* BA_DEFER bit parameter is turned on because blocks which get freed are not safe to be freed
  49040. + immediately */
  49041. + reiser4_dealloc_blocks(&start, &length,
  49042. + 0 /* not used */ ,
  49043. + BA_DEFER
  49044. + /* unformatted with defer */ );
  49045. + }
  49046. + skip = 0;
  49047. + offset += length;
  49048. + ext++;
  49049. + }
  49050. + return retval;
  49051. +}
  49052. +
  49053. +/* item_plugin->b.kill_units */
  49054. +int
  49055. +kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  49056. + struct carry_kill_data *kdata, reiser4_key * smallest_removed,
  49057. + reiser4_key * new_first)
  49058. +{
  49059. + reiser4_extent *ext;
  49060. + reiser4_key item_key;
  49061. + pos_in_node_t count;
  49062. + reiser4_key from_key, to_key;
  49063. + const reiser4_key *pfrom_key, *pto_key;
  49064. + loff_t off;
  49065. + int result;
  49066. +
  49067. + assert("vs-1541",
  49068. + ((kdata->params.from_key == NULL && kdata->params.to_key == NULL)
  49069. + || (kdata->params.from_key != NULL
  49070. + && kdata->params.to_key != NULL)));
  49071. +
  49072. + if (kdata->params.from_key) {
  49073. + pfrom_key = kdata->params.from_key;
  49074. + pto_key = kdata->params.to_key;
  49075. + } else {
  49076. + coord_t dup;
  49077. +
  49078. + /* calculate key range of kill */
  49079. + assert("vs-1549", from == coord->unit_pos);
  49080. + unit_key_by_coord(coord, &from_key);
  49081. + pfrom_key = &from_key;
  49082. +
  49083. + coord_dup(&dup, coord);
  49084. + dup.unit_pos = to;
  49085. + max_unit_key_by_coord(&dup, &to_key);
  49086. + pto_key = &to_key;
  49087. + }
  49088. +
  49089. + item_key_by_coord(coord, &item_key);
  49090. +
  49091. +#if REISER4_DEBUG
  49092. + {
  49093. + reiser4_key max_item_key;
  49094. +
  49095. + max_item_key_by_coord(coord, &max_item_key);
  49096. +
  49097. + if (new_first) {
  49098. + /* head of item is to be cut */
  49099. + assert("vs-1542", keyeq(pfrom_key, &item_key));
  49100. + assert("vs-1538", keylt(pto_key, &max_item_key));
  49101. + } else {
  49102. + /* tail of item is to be cut */
  49103. + assert("vs-1540", keygt(pfrom_key, &item_key));
  49104. + assert("vs-1543", !keylt(pto_key, &max_item_key));
  49105. + }
  49106. + }
  49107. +#endif
  49108. +
  49109. + if (smallest_removed)
  49110. + *smallest_removed = *pfrom_key;
  49111. +
  49112. + if (new_first) {
  49113. + /* item head is cut. Item key will change. This new key is calculated here */
  49114. + assert("vs-1556",
  49115. + (get_key_offset(pto_key) & (PAGE_SIZE - 1)) ==
  49116. + (PAGE_SIZE - 1));
  49117. + *new_first = *pto_key;
  49118. + set_key_offset(new_first, get_key_offset(new_first) + 1);
  49119. + }
  49120. +
  49121. + count = to - from + 1;
  49122. + result = kill_hook_extent(coord, from, count, kdata);
  49123. + if (result == ITEM_TAIL_KILLED) {
  49124. + assert("vs-1553",
  49125. + get_key_offset(pfrom_key) >=
  49126. + get_key_offset(&item_key) +
  49127. + reiser4_extent_size(coord, from));
  49128. + off =
  49129. + get_key_offset(pfrom_key) -
  49130. + (get_key_offset(&item_key) +
  49131. + reiser4_extent_size(coord, from));
  49132. + if (off) {
  49133. + /* unit @from is to be cut partially. Its width decreases */
  49134. + ext = extent_item(coord) + from;
  49135. + extent_set_width(ext,
  49136. + (off + PAGE_SIZE -
  49137. + 1) >> PAGE_SHIFT);
  49138. + count--;
  49139. + }
  49140. + } else {
  49141. + __u64 max_to_offset;
  49142. + __u64 rest;
  49143. +
  49144. + assert("vs-1575", result == ITEM_HEAD_KILLED);
  49145. + assert("", from == 0);
  49146. + assert("",
  49147. + ((get_key_offset(pto_key) + 1) & (PAGE_SIZE -
  49148. + 1)) == 0);
  49149. + assert("",
  49150. + get_key_offset(pto_key) + 1 >
  49151. + get_key_offset(&item_key) +
  49152. + reiser4_extent_size(coord, to));
  49153. + max_to_offset =
  49154. + get_key_offset(&item_key) +
  49155. + reiser4_extent_size(coord, to + 1) - 1;
  49156. + assert("", get_key_offset(pto_key) <= max_to_offset);
  49157. +
  49158. + rest =
  49159. + (max_to_offset -
  49160. + get_key_offset(pto_key)) >> PAGE_SHIFT;
  49161. + if (rest) {
  49162. + /* unit @to is to be cut partially */
  49163. + ext = extent_item(coord) + to;
  49164. +
  49165. + assert("", extent_get_width(ext) > rest);
  49166. +
  49167. + if (state_of_extent(ext) == ALLOCATED_EXTENT)
  49168. + extent_set_start(ext,
  49169. + extent_get_start(ext) +
  49170. + (extent_get_width(ext) -
  49171. + rest));
  49172. +
  49173. + extent_set_width(ext, rest);
  49174. + count--;
  49175. + }
  49176. + }
  49177. + return count * sizeof(reiser4_extent);
  49178. +}
  49179. +
  49180. +/* item_plugin->b.cut_units
  49181. + this is too similar to kill_units_extent */
  49182. +int
  49183. +cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  49184. + struct carry_cut_data *cdata, reiser4_key * smallest_removed,
  49185. + reiser4_key * new_first)
  49186. +{
  49187. + reiser4_extent *ext;
  49188. + reiser4_key item_key;
  49189. + pos_in_node_t count;
  49190. + reiser4_key from_key, to_key;
  49191. + const reiser4_key *pfrom_key, *pto_key;
  49192. + loff_t off;
  49193. +
  49194. + assert("vs-1541",
  49195. + ((cdata->params.from_key == NULL && cdata->params.to_key == NULL)
  49196. + || (cdata->params.from_key != NULL
  49197. + && cdata->params.to_key != NULL)));
  49198. +
  49199. + if (cdata->params.from_key) {
  49200. + pfrom_key = cdata->params.from_key;
  49201. + pto_key = cdata->params.to_key;
  49202. + } else {
  49203. + coord_t dup;
  49204. +
  49205. + /* calculate key range of kill */
  49206. + coord_dup(&dup, coord);
  49207. + dup.unit_pos = from;
  49208. + unit_key_by_coord(&dup, &from_key);
  49209. +
  49210. + dup.unit_pos = to;
  49211. + max_unit_key_by_coord(&dup, &to_key);
  49212. +
  49213. + pfrom_key = &from_key;
  49214. + pto_key = &to_key;
  49215. + }
  49216. +
  49217. + assert("vs-1555",
  49218. + (get_key_offset(pfrom_key) & (PAGE_SIZE - 1)) == 0);
  49219. + assert("vs-1556",
  49220. + (get_key_offset(pto_key) & (PAGE_SIZE - 1)) ==
  49221. + (PAGE_SIZE - 1));
  49222. +
  49223. + item_key_by_coord(coord, &item_key);
  49224. +
  49225. +#if REISER4_DEBUG
  49226. + {
  49227. + reiser4_key max_item_key;
  49228. +
  49229. + assert("vs-1584",
  49230. + get_key_locality(pfrom_key) ==
  49231. + get_key_locality(&item_key));
  49232. + assert("vs-1585",
  49233. + get_key_type(pfrom_key) == get_key_type(&item_key));
  49234. + assert("vs-1586",
  49235. + get_key_objectid(pfrom_key) ==
  49236. + get_key_objectid(&item_key));
  49237. + assert("vs-1587",
  49238. + get_key_ordering(pfrom_key) ==
  49239. + get_key_ordering(&item_key));
  49240. +
  49241. + max_item_key_by_coord(coord, &max_item_key);
  49242. +
  49243. + if (new_first != NULL) {
  49244. + /* head of item is to be cut */
  49245. + assert("vs-1542", keyeq(pfrom_key, &item_key));
  49246. + assert("vs-1538", keylt(pto_key, &max_item_key));
  49247. + } else {
  49248. + /* tail of item is to be cut */
  49249. + assert("vs-1540", keygt(pfrom_key, &item_key));
  49250. + assert("vs-1543", keyeq(pto_key, &max_item_key));
  49251. + }
  49252. + }
  49253. +#endif
  49254. +
  49255. + if (smallest_removed)
  49256. + *smallest_removed = *pfrom_key;
  49257. +
  49258. + if (new_first) {
  49259. + /* item head is cut. Item key will change. This new key is calculated here */
  49260. + *new_first = *pto_key;
  49261. + set_key_offset(new_first, get_key_offset(new_first) + 1);
  49262. + }
  49263. +
  49264. + count = to - from + 1;
  49265. +
  49266. + assert("vs-1553",
  49267. + get_key_offset(pfrom_key) >=
  49268. + get_key_offset(&item_key) + reiser4_extent_size(coord, from));
  49269. + off =
  49270. + get_key_offset(pfrom_key) - (get_key_offset(&item_key) +
  49271. + reiser4_extent_size(coord, from));
  49272. + if (off) {
  49273. + /* tail of unit @from is to be cut partially. Its width decreases */
  49274. + assert("vs-1582", new_first == NULL);
  49275. + ext = extent_item(coord) + from;
  49276. + extent_set_width(ext, off >> PAGE_SHIFT);
  49277. + count--;
  49278. + }
  49279. +
  49280. + assert("vs-1554",
  49281. + get_key_offset(pto_key) <=
  49282. + get_key_offset(&item_key) +
  49283. + reiser4_extent_size(coord, to + 1) - 1);
  49284. + off =
  49285. + (get_key_offset(&item_key) +
  49286. + reiser4_extent_size(coord, to + 1) - 1) -
  49287. + get_key_offset(pto_key);
  49288. + if (off) {
  49289. + /* @to_key is smaller than max key of unit @to. Unit @to will not be removed. It gets start increased
  49290. + and width decreased. */
  49291. + assert("vs-1583", (off & (PAGE_SIZE - 1)) == 0);
  49292. + ext = extent_item(coord) + to;
  49293. + if (state_of_extent(ext) == ALLOCATED_EXTENT)
  49294. + extent_set_start(ext,
  49295. + extent_get_start(ext) +
  49296. + (extent_get_width(ext) -
  49297. + (off >> PAGE_SHIFT)));
  49298. +
  49299. + extent_set_width(ext, (off >> PAGE_SHIFT));
  49300. + count--;
  49301. + }
  49302. + return count * sizeof(reiser4_extent);
  49303. +}
  49304. +
  49305. +/* item_plugin->b.unit_key */
  49306. +reiser4_key *unit_key_extent(const coord_t * coord, reiser4_key * key)
  49307. +{
  49308. + assert("vs-300", coord_is_existing_unit(coord));
  49309. +
  49310. + item_key_by_coord(coord, key);
  49311. + set_key_offset(key,
  49312. + (get_key_offset(key) +
  49313. + reiser4_extent_size(coord, coord->unit_pos)));
  49314. +
  49315. + return key;
  49316. +}
  49317. +
  49318. +/* item_plugin->b.max_unit_key */
  49319. +reiser4_key *max_unit_key_extent(const coord_t * coord, reiser4_key * key)
  49320. +{
  49321. + assert("vs-300", coord_is_existing_unit(coord));
  49322. +
  49323. + item_key_by_coord(coord, key);
  49324. + set_key_offset(key,
  49325. + (get_key_offset(key) +
  49326. + reiser4_extent_size(coord, coord->unit_pos + 1) - 1));
  49327. + return key;
  49328. +}
  49329. +
  49330. +/* item_plugin->b.estimate
  49331. + item_plugin->b.item_data_by_flow */
  49332. +
  49333. +#if REISER4_DEBUG
  49334. +
  49335. +/* item_plugin->b.check
  49336. + used for debugging, every item should have here the most complete
  49337. + possible check of the consistency of the item that the inventor can
  49338. + construct
  49339. +*/
  49340. +int reiser4_check_extent(const coord_t * coord /* coord of item to check */,
  49341. + const char **error /* where to store error message */)
  49342. +{
  49343. + reiser4_extent *ext, *first;
  49344. + unsigned i, j;
  49345. + reiser4_block_nr start, width, blk_cnt;
  49346. + unsigned num_units;
  49347. + reiser4_tree *tree;
  49348. + oid_t oid;
  49349. + reiser4_key key;
  49350. + coord_t scan;
  49351. +
  49352. + assert("vs-933", REISER4_DEBUG);
  49353. +
  49354. + if (znode_get_level(coord->node) != TWIG_LEVEL) {
  49355. + *error = "Extent on the wrong level";
  49356. + return -1;
  49357. + }
  49358. + if (item_length_by_coord(coord) % sizeof(reiser4_extent) != 0) {
  49359. + *error = "Wrong item size";
  49360. + return -1;
  49361. + }
  49362. + ext = first = extent_item(coord);
  49363. + blk_cnt = reiser4_block_count(reiser4_get_current_sb());
  49364. + num_units = coord_num_units(coord);
  49365. + tree = znode_get_tree(coord->node);
  49366. + item_key_by_coord(coord, &key);
  49367. + oid = get_key_objectid(&key);
  49368. + coord_dup(&scan, coord);
  49369. +
  49370. + for (i = 0; i < num_units; ++i, ++ext) {
  49371. + __u64 index;
  49372. +
  49373. + scan.unit_pos = i;
  49374. + index = extent_unit_index(&scan);
  49375. +
  49376. +#if 0
  49377. + /* check that all jnodes are present for the unallocated
  49378. + * extent */
  49379. + if (state_of_extent(ext) == UNALLOCATED_EXTENT) {
  49380. + for (j = 0; j < extent_get_width(ext); j++) {
  49381. + jnode *node;
  49382. +
  49383. + node = jlookup(tree, oid, index + j);
  49384. + if (node == NULL) {
  49385. + print_coord("scan", &scan, 0);
  49386. + *error = "Jnode missing";
  49387. + return -1;
  49388. + }
  49389. + jput(node);
  49390. + }
  49391. + }
  49392. +#endif
  49393. +
  49394. + start = extent_get_start(ext);
  49395. + if (start < 2)
  49396. + continue;
  49397. + /* extent is allocated one */
  49398. + width = extent_get_width(ext);
  49399. + if (start >= blk_cnt) {
  49400. + *error = "Start too large";
  49401. + return -1;
  49402. + }
  49403. + if (start + width > blk_cnt) {
  49404. + *error = "End too large";
  49405. + return -1;
  49406. + }
  49407. + /* make sure that this extent does not overlap with other
  49408. + allocated extents extents */
  49409. + for (j = 0; j < i; j++) {
  49410. + if (state_of_extent(first + j) != ALLOCATED_EXTENT)
  49411. + continue;
  49412. + if (!
  49413. + ((extent_get_start(ext) >=
  49414. + extent_get_start(first + j) +
  49415. + extent_get_width(first + j))
  49416. + || (extent_get_start(ext) +
  49417. + extent_get_width(ext) <=
  49418. + extent_get_start(first + j)))) {
  49419. + *error = "Extent overlaps with others";
  49420. + return -1;
  49421. + }
  49422. + }
  49423. +
  49424. + }
  49425. +
  49426. + return 0;
  49427. +}
  49428. +
  49429. +#endif /* REISER4_DEBUG */
  49430. +
  49431. +/*
  49432. + Local variables:
  49433. + c-indentation-style: "K&R"
  49434. + mode-name: "LC"
  49435. + c-basic-offset: 8
  49436. + tab-width: 8
  49437. + fill-column: 120
  49438. + scroll-step: 1
  49439. + End:
  49440. +*/
  49441. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/internal.c linux-5.16.14/fs/reiser4/plugin/item/internal.c
  49442. --- linux-5.16.14.orig/fs/reiser4/plugin/item/internal.c 1970-01-01 01:00:00.000000000 +0100
  49443. +++ linux-5.16.14/fs/reiser4/plugin/item/internal.c 2022-03-12 13:26:19.679892794 +0100
  49444. @@ -0,0 +1,405 @@
  49445. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  49446. +
  49447. +/* Implementation of internal-item plugin methods. */
  49448. +
  49449. +#include "../../forward.h"
  49450. +#include "../../debug.h"
  49451. +#include "../../dformat.h"
  49452. +#include "../../key.h"
  49453. +#include "../../coord.h"
  49454. +#include "internal.h"
  49455. +#include "item.h"
  49456. +#include "../node/node.h"
  49457. +#include "../plugin.h"
  49458. +#include "../../jnode.h"
  49459. +#include "../../znode.h"
  49460. +#include "../../tree_walk.h"
  49461. +#include "../../tree_mod.h"
  49462. +#include "../../tree.h"
  49463. +#include "../../super.h"
  49464. +#include "../../block_alloc.h"
  49465. +
  49466. +/* see internal.h for explanation */
  49467. +
  49468. +/* plugin->u.item.b.mergeable */
  49469. +int mergeable_internal(const coord_t * p1 UNUSED_ARG /* first item */ ,
  49470. + const coord_t * p2 UNUSED_ARG /* second item */ )
  49471. +{
  49472. + /* internal items are not mergeable */
  49473. + return 0;
  49474. +}
  49475. +
  49476. +/* ->lookup() method for internal items */
  49477. +lookup_result lookup_internal(const reiser4_key * key /* key to look up */ ,
  49478. + lookup_bias bias UNUSED_ARG /* lookup bias */ ,
  49479. + coord_t * coord /* coord of item */ )
  49480. +{
  49481. + reiser4_key ukey;
  49482. +
  49483. + switch (keycmp(unit_key_by_coord(coord, &ukey), key)) {
  49484. + default:
  49485. + impossible("", "keycmp()?!");
  49486. + case LESS_THAN:
  49487. + /* FIXME-VS: AFTER_ITEM used to be here. But with new coord
  49488. + item plugin can not be taken using coord set this way */
  49489. + assert("vs-681", coord->unit_pos == 0);
  49490. + coord->between = AFTER_UNIT;
  49491. + fallthrough;
  49492. + case EQUAL_TO:
  49493. + return CBK_COORD_FOUND;
  49494. + case GREATER_THAN:
  49495. + return CBK_COORD_NOTFOUND;
  49496. + }
  49497. +}
  49498. +
  49499. +/* return body of internal item at @coord */
  49500. +static internal_item_layout *internal_at(const coord_t * coord /* coord of
  49501. + * item */ )
  49502. +{
  49503. + assert("nikita-607", coord != NULL);
  49504. + assert("nikita-1650",
  49505. + item_plugin_by_coord(coord) ==
  49506. + item_plugin_by_id(NODE_POINTER_ID));
  49507. + return (internal_item_layout *) item_body_by_coord(coord);
  49508. +}
  49509. +
  49510. +void reiser4_update_internal(const coord_t * coord,
  49511. + const reiser4_block_nr * blocknr)
  49512. +{
  49513. + internal_item_layout *item = internal_at(coord);
  49514. + assert("nikita-2959", reiser4_blocknr_is_sane(blocknr));
  49515. +
  49516. + put_unaligned(cpu_to_le64(*blocknr), &item->pointer);
  49517. +}
  49518. +
  49519. +/* return child block number stored in the internal item at @coord */
  49520. +static reiser4_block_nr pointer_at(const coord_t * coord /* coord of item */ )
  49521. +{
  49522. + assert("nikita-608", coord != NULL);
  49523. + return le64_to_cpu(get_unaligned(&internal_at(coord)->pointer));
  49524. +}
  49525. +
  49526. +/* get znode pointed to by internal @item */
  49527. +static znode *znode_at(const coord_t * item /* coord of item */ ,
  49528. + znode * parent /* parent node */ )
  49529. +{
  49530. + return child_znode(item, parent, 1, 0);
  49531. +}
  49532. +
  49533. +/* store pointer from internal item into "block". Implementation of
  49534. + ->down_link() method */
  49535. +void down_link_internal(const coord_t * coord /* coord of item */ ,
  49536. + const reiser4_key * key UNUSED_ARG /* key to get
  49537. + * pointer for */ ,
  49538. + reiser4_block_nr * block /* resulting block number */ )
  49539. +{
  49540. + ON_DEBUG(reiser4_key item_key);
  49541. +
  49542. + assert("nikita-609", coord != NULL);
  49543. + assert("nikita-611", block != NULL);
  49544. + assert("nikita-612", (key == NULL) ||
  49545. + /* twig horrors */
  49546. + (znode_get_level(coord->node) == TWIG_LEVEL)
  49547. + || keyle(item_key_by_coord(coord, &item_key), key));
  49548. +
  49549. + *block = pointer_at(coord);
  49550. + assert("nikita-2960", reiser4_blocknr_is_sane(block));
  49551. +}
  49552. +
  49553. +/* Get the child's block number, or 0 if the block is unallocated. */
  49554. +int
  49555. +utmost_child_real_block_internal(const coord_t * coord, sideof side UNUSED_ARG,
  49556. + reiser4_block_nr * block)
  49557. +{
  49558. + assert("jmacd-2059", coord != NULL);
  49559. +
  49560. + *block = pointer_at(coord);
  49561. + assert("nikita-2961", reiser4_blocknr_is_sane(block));
  49562. +
  49563. + if (reiser4_blocknr_is_fake(block)) {
  49564. + *block = 0;
  49565. + }
  49566. +
  49567. + return 0;
  49568. +}
  49569. +
  49570. +/* Return the child. */
  49571. +int
  49572. +utmost_child_internal(const coord_t * coord, sideof side UNUSED_ARG,
  49573. + jnode ** childp)
  49574. +{
  49575. + reiser4_block_nr block = pointer_at(coord);
  49576. + znode *child;
  49577. +
  49578. + assert("jmacd-2059", childp != NULL);
  49579. + assert("nikita-2962", reiser4_blocknr_is_sane(&block));
  49580. +
  49581. + child = zlook(znode_get_tree(coord->node), &block);
  49582. +
  49583. + if (IS_ERR(child)) {
  49584. + return PTR_ERR(child);
  49585. + }
  49586. +
  49587. + *childp = ZJNODE(child);
  49588. +
  49589. + return 0;
  49590. +}
  49591. +
  49592. +#if REISER4_DEBUG
  49593. +
  49594. +static void check_link(znode * left, znode * right)
  49595. +{
  49596. + znode *scan;
  49597. +
  49598. + for (scan = left; scan != right; scan = scan->right) {
  49599. + if (ZF_ISSET(scan, JNODE_RIP))
  49600. + break;
  49601. + if (znode_is_right_connected(scan) && scan->right != NULL) {
  49602. + if (ZF_ISSET(scan->right, JNODE_RIP))
  49603. + break;
  49604. + assert("nikita-3285",
  49605. + znode_is_left_connected(scan->right));
  49606. + assert("nikita-3265",
  49607. + ergo(scan != left,
  49608. + ZF_ISSET(scan, JNODE_HEARD_BANSHEE)));
  49609. + assert("nikita-3284", scan->right->left == scan);
  49610. + } else
  49611. + break;
  49612. + }
  49613. +}
  49614. +
  49615. +int check__internal(const coord_t * coord, const char **error)
  49616. +{
  49617. + reiser4_block_nr blk;
  49618. + znode *child;
  49619. + coord_t cpy;
  49620. +
  49621. + blk = pointer_at(coord);
  49622. + if (!reiser4_blocknr_is_sane(&blk)) {
  49623. + *error = "Invalid pointer";
  49624. + return -1;
  49625. + }
  49626. + coord_dup(&cpy, coord);
  49627. + child = znode_at(&cpy, cpy.node);
  49628. + if (child != NULL) {
  49629. + znode *left_child;
  49630. + znode *right_child;
  49631. +
  49632. + left_child = right_child = NULL;
  49633. +
  49634. + assert("nikita-3256", znode_invariant(child));
  49635. + if (coord_prev_item(&cpy) == 0 && item_is_internal(&cpy)) {
  49636. + left_child = znode_at(&cpy, cpy.node);
  49637. + if (left_child != NULL) {
  49638. + read_lock_tree(znode_get_tree(child));
  49639. + check_link(left_child, child);
  49640. + read_unlock_tree(znode_get_tree(child));
  49641. + zput(left_child);
  49642. + }
  49643. + }
  49644. + coord_dup(&cpy, coord);
  49645. + if (coord_next_item(&cpy) == 0 && item_is_internal(&cpy)) {
  49646. + right_child = znode_at(&cpy, cpy.node);
  49647. + if (right_child != NULL) {
  49648. + read_lock_tree(znode_get_tree(child));
  49649. + check_link(child, right_child);
  49650. + read_unlock_tree(znode_get_tree(child));
  49651. + zput(right_child);
  49652. + }
  49653. + }
  49654. + zput(child);
  49655. + }
  49656. + return 0;
  49657. +}
  49658. +
  49659. +#endif /* REISER4_DEBUG */
  49660. +
  49661. +/* return true only if this item really points to "block" */
  49662. +/* Audited by: green(2002.06.14) */
  49663. +int has_pointer_to_internal(const coord_t * coord /* coord of item */ ,
  49664. + const reiser4_block_nr * block /* block number to
  49665. + * check */ )
  49666. +{
  49667. + assert("nikita-613", coord != NULL);
  49668. + assert("nikita-614", block != NULL);
  49669. +
  49670. + return pointer_at(coord) == *block;
  49671. +}
  49672. +
  49673. +/* hook called by ->create_item() method of node plugin after new internal
  49674. + item was just created.
  49675. +
  49676. + This is point where pointer to new node is inserted into tree. Initialize
  49677. + parent pointer in child znode, insert child into sibling list and slum.
  49678. +
  49679. +*/
  49680. +int create_hook_internal(const coord_t * item /* coord of item */ ,
  49681. + void *arg /* child's left neighbor, if any */ )
  49682. +{
  49683. + znode *child;
  49684. + __u64 child_ptr;
  49685. +
  49686. + assert("nikita-1252", item != NULL);
  49687. + assert("nikita-1253", item->node != NULL);
  49688. + assert("nikita-1181", znode_get_level(item->node) > LEAF_LEVEL);
  49689. + assert("nikita-1450", item->unit_pos == 0);
  49690. +
  49691. + /*
  49692. + * preparing to item insertion build_child_ptr_data sets pointer to
  49693. + * data to be inserted to jnode's blocknr which is in cpu byte
  49694. + * order. Node's create_item simply copied those data. As result we
  49695. + * have child pointer in cpu's byte order. Convert content of internal
  49696. + * item to little endian byte order.
  49697. + */
  49698. + child_ptr = get_unaligned((__u64 *)item_body_by_coord(item));
  49699. + reiser4_update_internal(item, &child_ptr);
  49700. +
  49701. + child = znode_at(item, item->node);
  49702. + if (child != NULL && !IS_ERR(child)) {
  49703. + znode *left;
  49704. + int result = 0;
  49705. + reiser4_tree *tree;
  49706. +
  49707. + left = arg;
  49708. + tree = znode_get_tree(item->node);
  49709. + write_lock_tree(tree);
  49710. + write_lock_dk(tree);
  49711. + assert("nikita-1400", (child->in_parent.node == NULL)
  49712. + || (znode_above_root(child->in_parent.node)));
  49713. + ++item->node->c_count;
  49714. + coord_to_parent_coord(item, &child->in_parent);
  49715. + sibling_list_insert_nolock(child, left);
  49716. +
  49717. + assert("nikita-3297", ZF_ISSET(child, JNODE_ORPHAN));
  49718. + ZF_CLR(child, JNODE_ORPHAN);
  49719. +
  49720. + if ((left != NULL) && !keyeq(znode_get_rd_key(left),
  49721. + znode_get_rd_key(child))) {
  49722. + znode_set_rd_key(child, znode_get_rd_key(left));
  49723. + }
  49724. + write_unlock_dk(tree);
  49725. + write_unlock_tree(tree);
  49726. + zput(child);
  49727. + return result;
  49728. + } else {
  49729. + if (child == NULL)
  49730. + child = ERR_PTR(-EIO);
  49731. + return PTR_ERR(child);
  49732. + }
  49733. +}
  49734. +
  49735. +/* hook called by ->cut_and_kill() method of node plugin just before internal
  49736. + item is removed.
  49737. +
  49738. + This is point where empty node is removed from the tree. Clear parent
  49739. + pointer in child, and mark node for pending deletion.
  49740. +
  49741. + Node will be actually deleted later and in several installations:
  49742. +
  49743. + . when last lock on this node will be released, node will be removed from
  49744. + the sibling list and its lock will be invalidated
  49745. +
  49746. + . when last reference to this node will be dropped, bitmap will be updated
  49747. + and node will be actually removed from the memory.
  49748. +
  49749. +*/
  49750. +int kill_hook_internal(const coord_t * item /* coord of item */ ,
  49751. + pos_in_node_t from UNUSED_ARG /* start unit */ ,
  49752. + pos_in_node_t count UNUSED_ARG /* stop unit */ ,
  49753. + struct carry_kill_data *p UNUSED_ARG)
  49754. +{
  49755. + znode *child;
  49756. + int result = 0;
  49757. +
  49758. + assert("nikita-1222", item != NULL);
  49759. + assert("nikita-1224", from == 0);
  49760. + assert("nikita-1225", count == 1);
  49761. +
  49762. + child = znode_at(item, item->node);
  49763. + if (child == NULL)
  49764. + return 0;
  49765. + if (IS_ERR(child))
  49766. + return PTR_ERR(child);
  49767. + result = zload(child);
  49768. + if (result) {
  49769. + zput(child);
  49770. + return result;
  49771. + }
  49772. + if (node_is_empty(child)) {
  49773. + reiser4_tree *tree;
  49774. +
  49775. + assert("nikita-1397", znode_is_write_locked(child));
  49776. + assert("nikita-1398", child->c_count == 0);
  49777. + assert("nikita-2546", ZF_ISSET(child, JNODE_HEARD_BANSHEE));
  49778. +
  49779. + tree = znode_get_tree(item->node);
  49780. + write_lock_tree(tree);
  49781. + init_parent_coord(&child->in_parent, NULL);
  49782. + --item->node->c_count;
  49783. + write_unlock_tree(tree);
  49784. + } else {
  49785. + warning("nikita-1223",
  49786. + "Cowardly refuse to remove link to non-empty node");
  49787. + result = RETERR(-EIO);
  49788. + }
  49789. + zrelse(child);
  49790. + zput(child);
  49791. + return result;
  49792. +}
  49793. +
  49794. +/* hook called by ->shift() node plugin method when iternal item was just
  49795. + moved from one node to another.
  49796. +
  49797. + Update parent pointer in child and c_counts in old and new parent
  49798. +
  49799. +*/
  49800. +int shift_hook_internal(const coord_t * item /* coord of item */ ,
  49801. + unsigned from UNUSED_ARG /* start unit */ ,
  49802. + unsigned count UNUSED_ARG /* stop unit */ ,
  49803. + znode * old_node /* old parent */ )
  49804. +{
  49805. + znode *child;
  49806. + znode *new_node;
  49807. + reiser4_tree *tree;
  49808. +
  49809. + assert("nikita-1276", item != NULL);
  49810. + assert("nikita-1277", from == 0);
  49811. + assert("nikita-1278", count == 1);
  49812. + assert("nikita-1451", item->unit_pos == 0);
  49813. +
  49814. + new_node = item->node;
  49815. + assert("nikita-2132", new_node != old_node);
  49816. + tree = znode_get_tree(item->node);
  49817. + child = child_znode(item, old_node, 1, 0);
  49818. + if (child == NULL)
  49819. + return 0;
  49820. + if (!IS_ERR(child)) {
  49821. + write_lock_tree(tree);
  49822. + ++new_node->c_count;
  49823. + assert("nikita-1395", znode_parent(child) == old_node);
  49824. + assert("nikita-1396", old_node->c_count > 0);
  49825. + coord_to_parent_coord(item, &child->in_parent);
  49826. + assert("nikita-1781", znode_parent(child) == new_node);
  49827. + assert("nikita-1782",
  49828. + check_tree_pointer(item, child) == NS_FOUND);
  49829. + --old_node->c_count;
  49830. + write_unlock_tree(tree);
  49831. + zput(child);
  49832. + return 0;
  49833. + } else
  49834. + return PTR_ERR(child);
  49835. +}
  49836. +
  49837. +/* plugin->u.item.b.max_key_inside - not defined */
  49838. +
  49839. +/* plugin->u.item.b.nr_units - item.c:single_unit */
  49840. +
  49841. +/* Make Linus happy.
  49842. + Local variables:
  49843. + c-indentation-style: "K&R"
  49844. + mode-name: "LC"
  49845. + c-basic-offset: 8
  49846. + tab-width: 8
  49847. + fill-column: 120
  49848. + End:
  49849. +*/
  49850. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/internal.h linux-5.16.14/fs/reiser4/plugin/item/internal.h
  49851. --- linux-5.16.14.orig/fs/reiser4/plugin/item/internal.h 1970-01-01 01:00:00.000000000 +0100
  49852. +++ linux-5.16.14/fs/reiser4/plugin/item/internal.h 2022-03-12 13:26:19.679892794 +0100
  49853. @@ -0,0 +1,57 @@
  49854. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  49855. +/* Internal item contains down-link to the child of the internal/twig
  49856. + node in a tree. It is internal items that are actually used during
  49857. + tree traversal. */
  49858. +
  49859. +#if !defined( __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ )
  49860. +#define __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__
  49861. +
  49862. +#include "../../forward.h"
  49863. +#include "../../dformat.h"
  49864. +
  49865. +/* on-disk layout of internal item */
  49866. +typedef struct internal_item_layout {
  49867. + /* 0 */ reiser4_dblock_nr pointer;
  49868. + /* 4 */
  49869. +} internal_item_layout;
  49870. +
  49871. +struct cut_list;
  49872. +
  49873. +int mergeable_internal(const coord_t * p1, const coord_t * p2);
  49874. +lookup_result lookup_internal(const reiser4_key * key, lookup_bias bias,
  49875. + coord_t * coord);
  49876. +/* store pointer from internal item into "block". Implementation of
  49877. + ->down_link() method */
  49878. +extern void down_link_internal(const coord_t * coord, const reiser4_key * key,
  49879. + reiser4_block_nr * block);
  49880. +extern int has_pointer_to_internal(const coord_t * coord,
  49881. + const reiser4_block_nr * block);
  49882. +extern int create_hook_internal(const coord_t * item, void *arg);
  49883. +extern int kill_hook_internal(const coord_t * item, pos_in_node_t from,
  49884. + pos_in_node_t count, struct carry_kill_data *);
  49885. +extern int shift_hook_internal(const coord_t * item, unsigned from,
  49886. + unsigned count, znode * old_node);
  49887. +extern void reiser4_print_internal(const char *prefix, coord_t * coord);
  49888. +
  49889. +extern int utmost_child_internal(const coord_t * coord, sideof side,
  49890. + jnode ** child);
  49891. +int utmost_child_real_block_internal(const coord_t * coord, sideof side,
  49892. + reiser4_block_nr * block);
  49893. +
  49894. +extern void reiser4_update_internal(const coord_t * coord,
  49895. + const reiser4_block_nr * blocknr);
  49896. +/* FIXME: reiserfs has check_internal */
  49897. +extern int check__internal(const coord_t * coord, const char **error);
  49898. +
  49899. +/* __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ */
  49900. +#endif
  49901. +
  49902. +/* Make Linus happy.
  49903. + Local variables:
  49904. + c-indentation-style: "K&R"
  49905. + mode-name: "LC"
  49906. + c-basic-offset: 8
  49907. + tab-width: 8
  49908. + fill-column: 120
  49909. + End:
  49910. +*/
  49911. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/item.c linux-5.16.14/fs/reiser4/plugin/item/item.c
  49912. --- linux-5.16.14.orig/fs/reiser4/plugin/item/item.c 1970-01-01 01:00:00.000000000 +0100
  49913. +++ linux-5.16.14/fs/reiser4/plugin/item/item.c 2022-03-12 13:26:19.680892796 +0100
  49914. @@ -0,0 +1,719 @@
  49915. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  49916. +
  49917. +/* definition of item plugins. */
  49918. +
  49919. +#include "../../forward.h"
  49920. +#include "../../debug.h"
  49921. +#include "../../key.h"
  49922. +#include "../../coord.h"
  49923. +#include "../plugin_header.h"
  49924. +#include "sde.h"
  49925. +#include "internal.h"
  49926. +#include "item.h"
  49927. +#include "static_stat.h"
  49928. +#include "../plugin.h"
  49929. +#include "../../znode.h"
  49930. +#include "../../tree.h"
  49931. +#include "../../context.h"
  49932. +#include "ctail.h"
  49933. +
  49934. +/* return pointer to item body */
  49935. +void item_body_by_coord_hard(coord_t * coord /* coord to query */ )
  49936. +{
  49937. + assert("nikita-324", coord != NULL);
  49938. + assert("nikita-325", coord->node != NULL);
  49939. + assert("nikita-326", znode_is_loaded(coord->node));
  49940. + assert("nikita-3200", coord->offset == INVALID_OFFSET);
  49941. +
  49942. + coord->offset =
  49943. + node_plugin_by_node(coord->node)->item_by_coord(coord) -
  49944. + zdata(coord->node);
  49945. + ON_DEBUG(coord->body_v = coord->node->times_locked);
  49946. +}
  49947. +
  49948. +void *item_body_by_coord_easy(const coord_t * coord /* coord to query */ )
  49949. +{
  49950. + return zdata(coord->node) + coord->offset;
  49951. +}
  49952. +
  49953. +#if REISER4_DEBUG
  49954. +
  49955. +int item_body_is_valid(const coord_t * coord)
  49956. +{
  49957. + return
  49958. + coord->offset ==
  49959. + node_plugin_by_node(coord->node)->item_by_coord(coord) -
  49960. + zdata(coord->node);
  49961. +}
  49962. +
  49963. +#endif
  49964. +
  49965. +/* return length of item at @coord */
  49966. +pos_in_node_t item_length_by_coord(const coord_t * coord /* coord to query */ )
  49967. +{
  49968. + int len;
  49969. +
  49970. + assert("nikita-327", coord != NULL);
  49971. + assert("nikita-328", coord->node != NULL);
  49972. + assert("nikita-329", znode_is_loaded(coord->node));
  49973. +
  49974. + len = node_plugin_by_node(coord->node)->length_by_coord(coord);
  49975. + return len;
  49976. +}
  49977. +
  49978. +void obtain_item_plugin(const coord_t * coord)
  49979. +{
  49980. + assert("nikita-330", coord != NULL);
  49981. + assert("nikita-331", coord->node != NULL);
  49982. + assert("nikita-332", znode_is_loaded(coord->node));
  49983. +
  49984. + coord_set_iplug((coord_t *) coord,
  49985. + node_plugin_by_node(coord->node)->
  49986. + plugin_by_coord(coord));
  49987. + assert("nikita-2479",
  49988. + coord_iplug(coord) ==
  49989. + node_plugin_by_node(coord->node)->plugin_by_coord(coord));
  49990. +}
  49991. +
  49992. +/* return id of item */
  49993. +/* Audited by: green(2002.06.15) */
  49994. +item_id item_id_by_coord(const coord_t * coord /* coord to query */ )
  49995. +{
  49996. + assert("vs-539", coord != NULL);
  49997. + assert("vs-538", coord->node != NULL);
  49998. + assert("vs-537", znode_is_loaded(coord->node));
  49999. + assert("vs-536", item_plugin_by_coord(coord) != NULL);
  50000. + assert("vs-540",
  50001. + item_id_by_plugin(item_plugin_by_coord(coord)) < LAST_ITEM_ID);
  50002. +
  50003. + return item_id_by_plugin(item_plugin_by_coord(coord));
  50004. +}
  50005. +
  50006. +/* return key of item at @coord */
  50007. +/* Audited by: green(2002.06.15) */
  50008. +reiser4_key *item_key_by_coord(const coord_t * coord /* coord to query */ ,
  50009. + reiser4_key * key /* result */ )
  50010. +{
  50011. + assert("nikita-338", coord != NULL);
  50012. + assert("nikita-339", coord->node != NULL);
  50013. + assert("nikita-340", znode_is_loaded(coord->node));
  50014. +
  50015. + return node_plugin_by_node(coord->node)->key_at(coord, key);
  50016. +}
  50017. +
  50018. +/* this returns max key in the item */
  50019. +reiser4_key *max_item_key_by_coord(const coord_t * coord /* coord to query */ ,
  50020. + reiser4_key * key /* result */ )
  50021. +{
  50022. + coord_t last;
  50023. +
  50024. + assert("nikita-338", coord != NULL);
  50025. + assert("nikita-339", coord->node != NULL);
  50026. + assert("nikita-340", znode_is_loaded(coord->node));
  50027. +
  50028. + /* make coord pointing to last item's unit */
  50029. + coord_dup(&last, coord);
  50030. + last.unit_pos = coord_num_units(&last) - 1;
  50031. + assert("vs-1560", coord_is_existing_unit(&last));
  50032. +
  50033. + max_unit_key_by_coord(&last, key);
  50034. + return key;
  50035. +}
  50036. +
  50037. +/* return key of unit at @coord */
  50038. +reiser4_key *unit_key_by_coord(const coord_t * coord /* coord to query */ ,
  50039. + reiser4_key * key /* result */ )
  50040. +{
  50041. + assert("nikita-772", coord != NULL);
  50042. + assert("nikita-774", coord->node != NULL);
  50043. + assert("nikita-775", znode_is_loaded(coord->node));
  50044. +
  50045. + if (item_plugin_by_coord(coord)->b.unit_key != NULL)
  50046. + return item_plugin_by_coord(coord)->b.unit_key(coord, key);
  50047. + else
  50048. + return item_key_by_coord(coord, key);
  50049. +}
  50050. +
  50051. +/* return the biggest key contained the unit @coord */
  50052. +reiser4_key *max_unit_key_by_coord(const coord_t * coord /* coord to query */ ,
  50053. + reiser4_key * key /* result */ )
  50054. +{
  50055. + assert("nikita-772", coord != NULL);
  50056. + assert("nikita-774", coord->node != NULL);
  50057. + assert("nikita-775", znode_is_loaded(coord->node));
  50058. +
  50059. + if (item_plugin_by_coord(coord)->b.max_unit_key != NULL)
  50060. + return item_plugin_by_coord(coord)->b.max_unit_key(coord, key);
  50061. + else
  50062. + return unit_key_by_coord(coord, key);
  50063. +}
  50064. +
  50065. +/* ->max_key_inside() method for items consisting of exactly one key (like
  50066. + stat-data) */
  50067. +static reiser4_key *max_key_inside_single_key(const coord_t *
  50068. + coord /* coord of item */ ,
  50069. + reiser4_key *
  50070. + result /* resulting key */ )
  50071. +{
  50072. + assert("nikita-604", coord != NULL);
  50073. +
  50074. + /* coord -> key is starting key of this item and it has to be already
  50075. + filled in */
  50076. + return unit_key_by_coord(coord, result);
  50077. +}
  50078. +
  50079. +/* ->nr_units() method for items consisting of exactly one unit always */
  50080. +pos_in_node_t
  50081. +nr_units_single_unit(const coord_t * coord UNUSED_ARG /* coord of item */ )
  50082. +{
  50083. + return 1;
  50084. +}
  50085. +
  50086. +static int
  50087. +paste_no_paste(coord_t * coord UNUSED_ARG,
  50088. + reiser4_item_data * data UNUSED_ARG,
  50089. + carry_plugin_info * info UNUSED_ARG)
  50090. +{
  50091. + return 0;
  50092. +}
  50093. +
  50094. +/* default ->fast_paste() method */
  50095. +static int
  50096. +agree_to_fast_op(const coord_t * coord UNUSED_ARG /* coord of item */ )
  50097. +{
  50098. + return 1;
  50099. +}
  50100. +
  50101. +int item_can_contain_key(const coord_t * item /* coord of item */ ,
  50102. + const reiser4_key * key /* key to check */ ,
  50103. + const reiser4_item_data * data /* parameters of item
  50104. + * being created */ )
  50105. +{
  50106. + item_plugin *iplug;
  50107. + reiser4_key min_key_in_item;
  50108. + reiser4_key max_key_in_item;
  50109. +
  50110. + assert("nikita-1658", item != NULL);
  50111. + assert("nikita-1659", key != NULL);
  50112. +
  50113. + iplug = item_plugin_by_coord(item);
  50114. + if (iplug->b.can_contain_key != NULL)
  50115. + return iplug->b.can_contain_key(item, key, data);
  50116. + else {
  50117. + assert("nikita-1681", iplug->b.max_key_inside != NULL);
  50118. + item_key_by_coord(item, &min_key_in_item);
  50119. + iplug->b.max_key_inside(item, &max_key_in_item);
  50120. +
  50121. + /* can contain key if
  50122. + min_key_in_item <= key &&
  50123. + key <= max_key_in_item
  50124. + */
  50125. + return keyle(&min_key_in_item, key)
  50126. + && keyle(key, &max_key_in_item);
  50127. + }
  50128. +}
  50129. +
  50130. +/* mergeable method for non mergeable items */
  50131. +static int
  50132. +not_mergeable(const coord_t * i1 UNUSED_ARG, const coord_t * i2 UNUSED_ARG)
  50133. +{
  50134. + return 0;
  50135. +}
  50136. +
  50137. +/* return 0 if @item1 and @item2 are not mergeable, !0 - otherwise */
  50138. +int are_items_mergeable(const coord_t * i1 /* coord of first item */ ,
  50139. + const coord_t * i2 /* coord of second item */ )
  50140. +{
  50141. + item_plugin *iplug;
  50142. + reiser4_key k1;
  50143. + reiser4_key k2;
  50144. +
  50145. + assert("nikita-1336", i1 != NULL);
  50146. + assert("nikita-1337", i2 != NULL);
  50147. +
  50148. + iplug = item_plugin_by_coord(i1);
  50149. + assert("nikita-1338", iplug != NULL);
  50150. +
  50151. + /* NOTE-NIKITA are_items_mergeable() is also called by assertions in
  50152. + shifting code when nodes are in "suspended" state. */
  50153. + assert("nikita-1663",
  50154. + keyle(item_key_by_coord(i1, &k1), item_key_by_coord(i2, &k2)));
  50155. +
  50156. + if (iplug->b.mergeable != NULL) {
  50157. + return iplug->b.mergeable(i1, i2);
  50158. + } else if (iplug->b.max_key_inside != NULL) {
  50159. + iplug->b.max_key_inside(i1, &k1);
  50160. + item_key_by_coord(i2, &k2);
  50161. +
  50162. + /* mergeable if ->max_key_inside() >= key of i2; */
  50163. + return keyge(iplug->b.max_key_inside(i1, &k1),
  50164. + item_key_by_coord(i2, &k2));
  50165. + } else {
  50166. + item_key_by_coord(i1, &k1);
  50167. + item_key_by_coord(i2, &k2);
  50168. +
  50169. + return
  50170. + (get_key_locality(&k1) == get_key_locality(&k2)) &&
  50171. + (get_key_objectid(&k1) == get_key_objectid(&k2))
  50172. + && (iplug == item_plugin_by_coord(i2));
  50173. + }
  50174. +}
  50175. +
  50176. +int item_is_extent(const coord_t * item)
  50177. +{
  50178. + assert("vs-482", coord_is_existing_item(item));
  50179. + return item_id_by_coord(item) == EXTENT_POINTER_ID;
  50180. +}
  50181. +
  50182. +int item_is_tail(const coord_t * item)
  50183. +{
  50184. + assert("vs-482", coord_is_existing_item(item));
  50185. + return item_id_by_coord(item) == FORMATTING_ID;
  50186. +}
  50187. +
  50188. +#if REISER4_DEBUG
  50189. +
  50190. +int item_is_statdata(const coord_t * item)
  50191. +{
  50192. + assert("vs-516", coord_is_existing_item(item));
  50193. + return plugin_of_group(item_plugin_by_coord(item), STAT_DATA_ITEM_TYPE);
  50194. +}
  50195. +
  50196. +int item_is_ctail(const coord_t * item)
  50197. +{
  50198. + assert("edward-xx", coord_is_existing_item(item));
  50199. + return item_id_by_coord(item) == CTAIL_ID;
  50200. +}
  50201. +
  50202. +#endif /* REISER4_DEBUG */
  50203. +
  50204. +static int change_item(struct inode *inode,
  50205. + reiser4_plugin * plugin,
  50206. + pset_member memb)
  50207. +{
  50208. + /* cannot change constituent item (sd, or dir_item) */
  50209. + return RETERR(-EINVAL);
  50210. +}
  50211. +
  50212. +static reiser4_plugin_ops item_plugin_ops = {
  50213. + .init = NULL,
  50214. + .load = NULL,
  50215. + .save_len = NULL,
  50216. + .save = NULL,
  50217. + .change = change_item
  50218. +};
  50219. +
  50220. +item_plugin item_plugins[LAST_ITEM_ID] = {
  50221. + [STATIC_STAT_DATA_ID] = {
  50222. + .h = {
  50223. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50224. + .id = STATIC_STAT_DATA_ID,
  50225. + .groups = (1 << STAT_DATA_ITEM_TYPE),
  50226. + .pops = &item_plugin_ops,
  50227. + .label = "sd",
  50228. + .desc = "stat-data",
  50229. + .linkage = {NULL, NULL}
  50230. + },
  50231. + .b = {
  50232. + .max_key_inside = max_key_inside_single_key,
  50233. + .can_contain_key = NULL,
  50234. + .mergeable = not_mergeable,
  50235. + .nr_units = nr_units_single_unit,
  50236. + .lookup = NULL,
  50237. + .init = NULL,
  50238. + .paste = paste_no_paste,
  50239. + .fast_paste = NULL,
  50240. + .can_shift = NULL,
  50241. + .copy_units = NULL,
  50242. + .create_hook = NULL,
  50243. + .kill_hook = NULL,
  50244. + .shift_hook = NULL,
  50245. + .cut_units = NULL,
  50246. + .kill_units = NULL,
  50247. + .unit_key = NULL,
  50248. + .max_unit_key = NULL,
  50249. + .estimate = NULL,
  50250. + .item_data_by_flow = NULL,
  50251. +#if REISER4_DEBUG
  50252. + .check = NULL
  50253. +#endif
  50254. + },
  50255. + .f = {
  50256. + .utmost_child = NULL,
  50257. + .utmost_child_real_block = NULL,
  50258. + .update = NULL,
  50259. + .scan = NULL,
  50260. + .convert = NULL
  50261. + },
  50262. + .s = {
  50263. + .sd = {
  50264. + .init_inode = init_inode_static_sd,
  50265. + .save_len = save_len_static_sd,
  50266. + .save = save_static_sd
  50267. + }
  50268. + }
  50269. + },
  50270. + [SIMPLE_DIR_ENTRY_ID] = {
  50271. + .h = {
  50272. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50273. + .id = SIMPLE_DIR_ENTRY_ID,
  50274. + .groups = (1 << DIR_ENTRY_ITEM_TYPE),
  50275. + .pops = &item_plugin_ops,
  50276. + .label = "de",
  50277. + .desc = "directory entry",
  50278. + .linkage = {NULL, NULL}
  50279. + },
  50280. + .b = {
  50281. + .max_key_inside = max_key_inside_single_key,
  50282. + .can_contain_key = NULL,
  50283. + .mergeable = NULL,
  50284. + .nr_units = nr_units_single_unit,
  50285. + .lookup = NULL,
  50286. + .init = NULL,
  50287. + .paste = NULL,
  50288. + .fast_paste = NULL,
  50289. + .can_shift = NULL,
  50290. + .copy_units = NULL,
  50291. + .create_hook = NULL,
  50292. + .kill_hook = NULL,
  50293. + .shift_hook = NULL,
  50294. + .cut_units = NULL,
  50295. + .kill_units = NULL,
  50296. + .unit_key = NULL,
  50297. + .max_unit_key = NULL,
  50298. + .estimate = NULL,
  50299. + .item_data_by_flow = NULL,
  50300. +#if REISER4_DEBUG
  50301. + .check = NULL
  50302. +#endif
  50303. + },
  50304. + .f = {
  50305. + .utmost_child = NULL,
  50306. + .utmost_child_real_block = NULL,
  50307. + .update = NULL,
  50308. + .scan = NULL,
  50309. + .convert = NULL
  50310. + },
  50311. + .s = {
  50312. + .dir = {
  50313. + .extract_key = extract_key_de,
  50314. + .update_key = update_key_de,
  50315. + .extract_name = extract_name_de,
  50316. + .extract_file_type = extract_file_type_de,
  50317. + .add_entry = add_entry_de,
  50318. + .rem_entry = rem_entry_de,
  50319. + .max_name_len = max_name_len_de
  50320. + }
  50321. + }
  50322. + },
  50323. + [COMPOUND_DIR_ID] = {
  50324. + .h = {
  50325. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50326. + .id = COMPOUND_DIR_ID,
  50327. + .groups = (1 << DIR_ENTRY_ITEM_TYPE),
  50328. + .pops = &item_plugin_ops,
  50329. + .label = "cde",
  50330. + .desc = "compressed directory entry",
  50331. + .linkage = {NULL, NULL}
  50332. + },
  50333. + .b = {
  50334. + .max_key_inside = max_key_inside_cde,
  50335. + .can_contain_key = can_contain_key_cde,
  50336. + .mergeable = mergeable_cde,
  50337. + .nr_units = nr_units_cde,
  50338. + .lookup = lookup_cde,
  50339. + .init = init_cde,
  50340. + .paste = paste_cde,
  50341. + .fast_paste = agree_to_fast_op,
  50342. + .can_shift = can_shift_cde,
  50343. + .copy_units = copy_units_cde,
  50344. + .create_hook = NULL,
  50345. + .kill_hook = NULL,
  50346. + .shift_hook = NULL,
  50347. + .cut_units = cut_units_cde,
  50348. + .kill_units = kill_units_cde,
  50349. + .unit_key = unit_key_cde,
  50350. + .max_unit_key = unit_key_cde,
  50351. + .estimate = estimate_cde,
  50352. + .item_data_by_flow = NULL,
  50353. +#if REISER4_DEBUG
  50354. + .check = reiser4_check_cde
  50355. +#endif
  50356. + },
  50357. + .f = {
  50358. + .utmost_child = NULL,
  50359. + .utmost_child_real_block = NULL,
  50360. + .update = NULL,
  50361. + .scan = NULL,
  50362. + .convert = NULL
  50363. + },
  50364. + .s = {
  50365. + .dir = {
  50366. + .extract_key = extract_key_cde,
  50367. + .update_key = update_key_cde,
  50368. + .extract_name = extract_name_cde,
  50369. + .extract_file_type = extract_file_type_de,
  50370. + .add_entry = add_entry_cde,
  50371. + .rem_entry = rem_entry_cde,
  50372. + .max_name_len = max_name_len_cde
  50373. + }
  50374. + }
  50375. + },
  50376. + [NODE_POINTER_ID] = {
  50377. + .h = {
  50378. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50379. + .id = NODE_POINTER_ID,
  50380. + .groups = (1 << INTERNAL_ITEM_TYPE),
  50381. + .pops = NULL,
  50382. + .label = "internal",
  50383. + .desc = "internal item",
  50384. + .linkage = {NULL, NULL}
  50385. + },
  50386. + .b = {
  50387. + .max_key_inside = NULL,
  50388. + .can_contain_key = NULL,
  50389. + .mergeable = mergeable_internal,
  50390. + .nr_units = nr_units_single_unit,
  50391. + .lookup = lookup_internal,
  50392. + .init = NULL,
  50393. + .paste = NULL,
  50394. + .fast_paste = NULL,
  50395. + .can_shift = NULL,
  50396. + .copy_units = NULL,
  50397. + .create_hook = create_hook_internal,
  50398. + .kill_hook = kill_hook_internal,
  50399. + .shift_hook = shift_hook_internal,
  50400. + .cut_units = NULL,
  50401. + .kill_units = NULL,
  50402. + .unit_key = NULL,
  50403. + .max_unit_key = NULL,
  50404. + .estimate = NULL,
  50405. + .item_data_by_flow = NULL,
  50406. +#if REISER4_DEBUG
  50407. + .check = check__internal
  50408. +#endif
  50409. + },
  50410. + .f = {
  50411. + .utmost_child = utmost_child_internal,
  50412. + .utmost_child_real_block =
  50413. + utmost_child_real_block_internal,
  50414. + .update = reiser4_update_internal,
  50415. + .scan = NULL,
  50416. + .convert = NULL
  50417. + },
  50418. + .s = {
  50419. + .internal = {
  50420. + .down_link = down_link_internal,
  50421. + .has_pointer_to = has_pointer_to_internal
  50422. + }
  50423. + }
  50424. + },
  50425. + [EXTENT_POINTER_ID] = {
  50426. + .h = {
  50427. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50428. + .id = EXTENT_POINTER_ID,
  50429. + .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
  50430. + .pops = NULL,
  50431. + .label = "extent",
  50432. + .desc = "extent item",
  50433. + .linkage = {NULL, NULL}
  50434. + },
  50435. + .b = {
  50436. + .max_key_inside = max_key_inside_extent,
  50437. + .can_contain_key = can_contain_key_extent,
  50438. + .mergeable = mergeable_extent,
  50439. + .nr_units = nr_units_extent,
  50440. + .lookup = lookup_extent,
  50441. + .init = NULL,
  50442. + .paste = paste_extent,
  50443. + .fast_paste = agree_to_fast_op,
  50444. + .can_shift = can_shift_extent,
  50445. + .create_hook = create_hook_extent,
  50446. + .copy_units = copy_units_extent,
  50447. + .kill_hook = kill_hook_extent,
  50448. + .shift_hook = NULL,
  50449. + .cut_units = cut_units_extent,
  50450. + .kill_units = kill_units_extent,
  50451. + .unit_key = unit_key_extent,
  50452. + .max_unit_key = max_unit_key_extent,
  50453. + .estimate = NULL,
  50454. + .item_data_by_flow = NULL,
  50455. +#if REISER4_DEBUG
  50456. + .check = reiser4_check_extent
  50457. +#endif
  50458. + },
  50459. + .f = {
  50460. + .utmost_child = utmost_child_extent,
  50461. + .utmost_child_real_block =
  50462. + utmost_child_real_block_extent,
  50463. + .update = NULL,
  50464. + .scan = reiser4_scan_extent,
  50465. + .convert = NULL,
  50466. + .key_by_offset = key_by_offset_extent
  50467. + },
  50468. + .s = {
  50469. + .file = {
  50470. + .write = reiser4_write_extent,
  50471. + .read = reiser4_read_extent,
  50472. + .readpage = reiser4_readpage_extent,
  50473. + .get_block = get_block_address_extent,
  50474. + .append_key = append_key_extent,
  50475. + .init_coord_extension =
  50476. + init_coord_extension_extent
  50477. + }
  50478. + }
  50479. + },
  50480. + [FORMATTING_ID] = {
  50481. + .h = {
  50482. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50483. + .id = FORMATTING_ID,
  50484. + .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
  50485. + .pops = NULL,
  50486. + .label = "body",
  50487. + .desc = "body (or tail?) item",
  50488. + .linkage = {NULL, NULL}
  50489. + },
  50490. + .b = {
  50491. + .max_key_inside = max_key_inside_tail,
  50492. + .can_contain_key = can_contain_key_tail,
  50493. + .mergeable = mergeable_tail,
  50494. + .nr_units = nr_units_tail,
  50495. + .lookup = lookup_tail,
  50496. + .init = NULL,
  50497. + .paste = paste_tail,
  50498. + .fast_paste = agree_to_fast_op,
  50499. + .can_shift = can_shift_tail,
  50500. + .create_hook = NULL,
  50501. + .copy_units = copy_units_tail,
  50502. + .kill_hook = kill_hook_tail,
  50503. + .shift_hook = NULL,
  50504. + .cut_units = cut_units_tail,
  50505. + .kill_units = kill_units_tail,
  50506. + .unit_key = unit_key_tail,
  50507. + .max_unit_key = unit_key_tail,
  50508. + .estimate = NULL,
  50509. + .item_data_by_flow = NULL,
  50510. +#if REISER4_DEBUG
  50511. + .check = NULL
  50512. +#endif
  50513. + },
  50514. + .f = {
  50515. + .utmost_child = NULL,
  50516. + .utmost_child_real_block = NULL,
  50517. + .update = NULL,
  50518. + .scan = NULL,
  50519. + .convert = NULL
  50520. + },
  50521. + .s = {
  50522. + .file = {
  50523. + .write = reiser4_write_tail,
  50524. + .read = reiser4_read_tail,
  50525. + .readpage = readpage_tail,
  50526. + .get_block = get_block_address_tail,
  50527. + .append_key = append_key_tail,
  50528. + .init_coord_extension =
  50529. + init_coord_extension_tail
  50530. + }
  50531. + }
  50532. + },
  50533. + [CTAIL_ID] = {
  50534. + .h = {
  50535. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50536. + .id = CTAIL_ID,
  50537. + .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
  50538. + .pops = NULL,
  50539. + .label = "ctail",
  50540. + .desc = "cryptcompress tail item",
  50541. + .linkage = {NULL, NULL}
  50542. + },
  50543. + .b = {
  50544. + .max_key_inside = max_key_inside_tail,
  50545. + .can_contain_key = can_contain_key_ctail,
  50546. + .mergeable = mergeable_ctail,
  50547. + .nr_units = nr_units_ctail,
  50548. + .lookup = NULL,
  50549. + .init = init_ctail,
  50550. + .paste = paste_ctail,
  50551. + .fast_paste = agree_to_fast_op,
  50552. + .can_shift = can_shift_ctail,
  50553. + .create_hook = create_hook_ctail,
  50554. + .copy_units = copy_units_ctail,
  50555. + .kill_hook = kill_hook_ctail,
  50556. + .shift_hook = shift_hook_ctail,
  50557. + .cut_units = cut_units_ctail,
  50558. + .kill_units = kill_units_ctail,
  50559. + .unit_key = unit_key_tail,
  50560. + .max_unit_key = unit_key_tail,
  50561. + .estimate = estimate_ctail,
  50562. + .item_data_by_flow = NULL,
  50563. +#if REISER4_DEBUG
  50564. + .check = check_ctail
  50565. +#endif
  50566. + },
  50567. + .f = {
  50568. + .utmost_child = utmost_child_ctail,
  50569. + /* FIXME-EDWARD: write this */
  50570. + .utmost_child_real_block = NULL,
  50571. + .update = NULL,
  50572. + .scan = scan_ctail,
  50573. + .convert = convert_ctail
  50574. + },
  50575. + .s = {
  50576. + .file = {
  50577. + .write = NULL,
  50578. + .read = read_ctail,
  50579. + .readpage = readpage_ctail,
  50580. + .get_block = get_block_address_tail,
  50581. + .append_key = append_key_ctail,
  50582. + .init_coord_extension =
  50583. + init_coord_extension_tail
  50584. + }
  50585. + }
  50586. + },
  50587. + [BLACK_BOX_ID] = {
  50588. + .h = {
  50589. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50590. + .id = BLACK_BOX_ID,
  50591. + .groups = (1 << OTHER_ITEM_TYPE),
  50592. + .pops = NULL,
  50593. + .label = "blackbox",
  50594. + .desc = "black box item",
  50595. + .linkage = {NULL, NULL}
  50596. + },
  50597. + .b = {
  50598. + .max_key_inside = NULL,
  50599. + .can_contain_key = NULL,
  50600. + .mergeable = not_mergeable,
  50601. + .nr_units = nr_units_single_unit,
  50602. + /* to need for ->lookup method */
  50603. + .lookup = NULL,
  50604. + .init = NULL,
  50605. + .paste = NULL,
  50606. + .fast_paste = NULL,
  50607. + .can_shift = NULL,
  50608. + .copy_units = NULL,
  50609. + .create_hook = NULL,
  50610. + .kill_hook = NULL,
  50611. + .shift_hook = NULL,
  50612. + .cut_units = NULL,
  50613. + .kill_units = NULL,
  50614. + .unit_key = NULL,
  50615. + .max_unit_key = NULL,
  50616. + .estimate = NULL,
  50617. + .item_data_by_flow = NULL,
  50618. +#if REISER4_DEBUG
  50619. + .check = NULL
  50620. +#endif
  50621. + }
  50622. + }
  50623. +};
  50624. +
  50625. +/* Make Linus happy.
  50626. + Local variables:
  50627. + c-indentation-style: "K&R"
  50628. + mode-name: "LC"
  50629. + c-basic-offset: 8
  50630. + tab-width: 8
  50631. + fill-column: 120
  50632. + End:
  50633. +*/
  50634. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/item.h linux-5.16.14/fs/reiser4/plugin/item/item.h
  50635. --- linux-5.16.14.orig/fs/reiser4/plugin/item/item.h 1970-01-01 01:00:00.000000000 +0100
  50636. +++ linux-5.16.14/fs/reiser4/plugin/item/item.h 2022-03-12 13:26:19.680892796 +0100
  50637. @@ -0,0 +1,398 @@
  50638. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  50639. +
  50640. +/* first read balance.c comments before reading this */
  50641. +
  50642. +/* An item_plugin implements all of the operations required for
  50643. + balancing that are item specific. */
  50644. +
  50645. +/* an item plugin also implements other operations that are specific to that
  50646. + item. These go into the item specific operations portion of the item
  50647. + handler, and all of the item specific portions of the item handler are put
  50648. + into a union. */
  50649. +
  50650. +#if !defined( __REISER4_ITEM_H__ )
  50651. +#define __REISER4_ITEM_H__
  50652. +
  50653. +#include "../../forward.h"
  50654. +#include "../plugin_header.h"
  50655. +#include "../../dformat.h"
  50656. +#include "../../seal.h"
  50657. +#include "../../plugin/file/file.h"
  50658. +
  50659. +#include <linux/fs.h> /* for struct file, struct inode */
  50660. +#include <linux/mm.h> /* for struct page */
  50661. +#include <linux/dcache.h> /* for struct dentry */
  50662. +
  50663. +typedef enum {
  50664. + STAT_DATA_ITEM_TYPE,
  50665. + DIR_ENTRY_ITEM_TYPE,
  50666. + INTERNAL_ITEM_TYPE,
  50667. + UNIX_FILE_METADATA_ITEM_TYPE,
  50668. + OTHER_ITEM_TYPE
  50669. +} item_type_id;
  50670. +
  50671. +/* this is the part of each item plugin that all items are expected to
  50672. + support or at least explicitly fail to support by setting the
  50673. + pointer to null. */
  50674. +struct balance_ops {
  50675. + /* operations called by balancing
  50676. +
  50677. + It is interesting to consider that some of these item
  50678. + operations could be given sources or targets that are not
  50679. + really items in nodes. This could be ok/useful.
  50680. +
  50681. + */
  50682. + /* maximal key that can _possibly_ be occupied by this item
  50683. +
  50684. + When inserting, and node ->lookup() method (called by
  50685. + coord_by_key()) reaches an item after binary search,
  50686. + the ->max_key_inside() item plugin method is used to determine
  50687. + whether new item should pasted into existing item
  50688. + (new_key<=max_key_inside()) or new item has to be created
  50689. + (new_key>max_key_inside()).
  50690. +
  50691. + For items that occupy exactly one key (like stat-data)
  50692. + this method should return this key. For items that can
  50693. + grow indefinitely (extent, directory item) this should
  50694. + return reiser4_max_key().
  50695. +
  50696. + For example extent with the key
  50697. +
  50698. + (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks,
  50699. +
  50700. + ->max_key_inside is (LOCALITY,4,OBJID,0xffffffffffffffff), and
  50701. + */
  50702. + reiser4_key *(*max_key_inside) (const coord_t *, reiser4_key *);
  50703. +
  50704. + /* true if item @coord can merge data at @key. */
  50705. + int (*can_contain_key) (const coord_t *, const reiser4_key *,
  50706. + const reiser4_item_data *);
  50707. + /* mergeable() - check items for mergeability
  50708. +
  50709. + Optional method. Returns true if two items can be merged.
  50710. +
  50711. + */
  50712. + int (*mergeable) (const coord_t *, const coord_t *);
  50713. +
  50714. + /* number of atomic things in an item.
  50715. + NOTE FOR CONTRIBUTORS: use a generic method
  50716. + nr_units_single_unit() for solid (atomic) items, as
  50717. + tree operations use it as a criterion of solidness
  50718. + (see is_solid_item macro) */
  50719. + pos_in_node_t(*nr_units) (const coord_t *);
  50720. +
  50721. + /* search within item for a unit within the item, and return a
  50722. + pointer to it. This can be used to calculate how many
  50723. + bytes to shrink an item if you use pointer arithmetic and
  50724. + compare to the start of the item body if the item's data
  50725. + are continuous in the node, if the item's data are not
  50726. + continuous in the node, all sorts of other things are maybe
  50727. + going to break as well. */
  50728. + lookup_result(*lookup) (const reiser4_key *, lookup_bias, coord_t *);
  50729. + /* method called by ode_plugin->create_item() to initialise new
  50730. + item */
  50731. + int (*init) (coord_t * target, coord_t * from,
  50732. + reiser4_item_data * data);
  50733. + /* method called (e.g., by reiser4_resize_item()) to place new data
  50734. + into item when it grows */
  50735. + int (*paste) (coord_t *, reiser4_item_data *, carry_plugin_info *);
  50736. + /* return true if paste into @coord is allowed to skip
  50737. + carry. That is, if such paste would require any changes
  50738. + at the parent level
  50739. + */
  50740. + int (*fast_paste) (const coord_t *);
  50741. + /* how many but not more than @want units of @source can be
  50742. + shifted into @target node. If pend == append - we try to
  50743. + append last item of @target by first units of @source. If
  50744. + pend == prepend - we try to "prepend" first item in @target
  50745. + by last units of @source. @target node has @free_space
  50746. + bytes of free space. Total size of those units are returned
  50747. + via @size.
  50748. +
  50749. + @target is not NULL if shifting to the mergeable item and
  50750. + NULL is new item will be created during shifting.
  50751. + */
  50752. + int (*can_shift) (unsigned free_space, coord_t *,
  50753. + znode *, shift_direction, unsigned *size,
  50754. + unsigned want);
  50755. +
  50756. + /* starting off @from-th unit of item @source append or
  50757. + prepend @count units to @target. @target has been already
  50758. + expanded by @free_space bytes. That must be exactly what is
  50759. + needed for those items in @target. If @where_is_free_space
  50760. + == SHIFT_LEFT - free space is at the end of @target item,
  50761. + othersize - it is in the beginning of it. */
  50762. + void (*copy_units) (coord_t *, coord_t *,
  50763. + unsigned from, unsigned count,
  50764. + shift_direction where_is_free_space,
  50765. + unsigned free_space);
  50766. +
  50767. + int (*create_hook) (const coord_t *, void *);
  50768. + /* do whatever is necessary to do when @count units starting
  50769. + from @from-th one are removed from the tree */
  50770. + /* FIXME-VS: this is used to be here for, in particular,
  50771. + extents and items of internal type to free blocks they point
  50772. + to at the same time with removing items from a
  50773. + tree. Problems start, however, when dealloc_block fails due
  50774. + to some reason. Item gets removed, but blocks it pointed to
  50775. + are not freed. It is not clear how to fix this for items of
  50776. + internal type because a need to remove internal item may
  50777. + appear in the middle of balancing, and there is no way to
  50778. + undo changes made. OTOH, if space allocator involves
  50779. + balancing to perform dealloc_block - this will probably
  50780. + break balancing due to deadlock issues
  50781. + */
  50782. + int (*kill_hook) (const coord_t *, pos_in_node_t from,
  50783. + pos_in_node_t count, struct carry_kill_data *);
  50784. + int (*shift_hook) (const coord_t *, unsigned from, unsigned count,
  50785. + znode * _node);
  50786. +
  50787. + /* unit @*from contains @from_key. unit @*to contains @to_key. Cut all keys between @from_key and @to_key
  50788. + including boundaries. When units are cut from item beginning - move space which gets freed to head of
  50789. + item. When units are cut from item end - move freed space to item end. When units are cut from the middle of
  50790. + item - move freed space to item head. Return amount of space which got freed. Save smallest removed key in
  50791. + @smallest_removed if it is not 0. Save new first item key in @new_first_key if it is not 0
  50792. + */
  50793. + int (*cut_units) (coord_t *, pos_in_node_t from, pos_in_node_t to,
  50794. + struct carry_cut_data *,
  50795. + reiser4_key * smallest_removed,
  50796. + reiser4_key * new_first_key);
  50797. +
  50798. + /* like cut_units, except that these units are removed from the
  50799. + tree, not only from a node */
  50800. + int (*kill_units) (coord_t *, pos_in_node_t from, pos_in_node_t to,
  50801. + struct carry_kill_data *,
  50802. + reiser4_key * smallest_removed,
  50803. + reiser4_key * new_first);
  50804. +
  50805. + /* if @key_of_coord == 1 - returned key of coord, otherwise -
  50806. + key of unit is returned. If @coord is not set to certain
  50807. + unit - ERR_PTR(-ENOENT) is returned */
  50808. + reiser4_key *(*unit_key) (const coord_t *, reiser4_key *);
  50809. + reiser4_key *(*max_unit_key) (const coord_t *, reiser4_key *);
  50810. + /* estimate how much space is needed for paste @data into item at
  50811. + @coord. if @coord==0 - estimate insertion, otherwise - estimate
  50812. + pasting
  50813. + */
  50814. + int (*estimate) (const coord_t *, const reiser4_item_data *);
  50815. +
  50816. + /* converts flow @f to item data. @coord == 0 on insert */
  50817. + int (*item_data_by_flow) (const coord_t *, const flow_t *,
  50818. + reiser4_item_data *);
  50819. +
  50820. + /*void (*show) (struct seq_file *, coord_t *); */
  50821. +
  50822. +#if REISER4_DEBUG
  50823. + /* used for debugging, every item should have here the most
  50824. + complete possible check of the consistency of the item that
  50825. + the inventor can construct */
  50826. + int (*check) (const coord_t *, const char **error);
  50827. +#endif
  50828. +
  50829. +};
  50830. +
  50831. +struct flush_ops {
  50832. + /* return the right or left child of @coord, only if it is in memory */
  50833. + int (*utmost_child) (const coord_t *, sideof side, jnode ** child);
  50834. +
  50835. + /* return whether the right or left child of @coord has a non-fake
  50836. + block number. */
  50837. + int (*utmost_child_real_block) (const coord_t *, sideof side,
  50838. + reiser4_block_nr *);
  50839. + /* relocate child at @coord to the @block */
  50840. + void (*update) (const coord_t *, const reiser4_block_nr *);
  50841. + /* count unformatted nodes per item for leave relocation policy, etc.. */
  50842. + int (*scan) (flush_scan * scan);
  50843. + /* convert item by flush */
  50844. + int (*convert) (flush_pos_t * pos);
  50845. + /* backward mapping from jnode offset to a key. */
  50846. + int (*key_by_offset) (struct inode *, loff_t, reiser4_key *);
  50847. +};
  50848. +
  50849. +/* operations specific to the directory item */
  50850. +struct dir_entry_iops {
  50851. + /* extract stat-data key from directory entry at @coord and place it
  50852. + into @key. */
  50853. + int (*extract_key) (const coord_t *, reiser4_key * key);
  50854. + /* update object key in item. */
  50855. + int (*update_key) (const coord_t *, const reiser4_key *, lock_handle *);
  50856. + /* extract name from directory entry at @coord and return it */
  50857. + char *(*extract_name) (const coord_t *, char *buf);
  50858. + /* extract file type (DT_* stuff) from directory entry at @coord and
  50859. + return it */
  50860. + unsigned (*extract_file_type) (const coord_t *);
  50861. + int (*add_entry) (struct inode * dir,
  50862. + coord_t *, lock_handle *,
  50863. + const struct dentry * name,
  50864. + reiser4_dir_entry_desc * entry);
  50865. + int (*rem_entry) (struct inode * dir, const struct qstr * name,
  50866. + coord_t *, lock_handle *,
  50867. + reiser4_dir_entry_desc * entry);
  50868. + int (*max_name_len) (const struct inode * dir);
  50869. +};
  50870. +
  50871. +/* operations specific to items regular (unix) file metadata are built of */
  50872. +struct file_iops{
  50873. + ssize_t (*write) (struct file *, struct inode *,
  50874. + const char __user *, size_t, loff_t *pos);
  50875. + int (*read) (flow_t *, hint_t *, struct kiocb *, struct iov_iter *);
  50876. + int (*readpage) (void *, struct page *);
  50877. + int (*get_block) (const coord_t *, sector_t, sector_t *);
  50878. + /*
  50879. + * key of first byte which is not addressed by the item @coord is set
  50880. + * to.
  50881. + * For example, for extent item with the key
  50882. + *
  50883. + * (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks,
  50884. + *
  50885. + * ->append_key is
  50886. + *
  50887. + * (LOCALITY,4,OBJID,STARTING-OFFSET + BLK * block_size)
  50888. + */
  50889. + reiser4_key *(*append_key) (const coord_t *, reiser4_key *);
  50890. +
  50891. + void (*init_coord_extension) (uf_coord_t *, loff_t);
  50892. +};
  50893. +
  50894. +/* operations specific to items of stat data type */
  50895. +struct sd_iops {
  50896. + int (*init_inode) (struct inode * inode, char *sd, int len);
  50897. + int (*save_len) (struct inode * inode);
  50898. + int (*save) (struct inode * inode, char **area);
  50899. +};
  50900. +
  50901. +/* operations specific to internal item */
  50902. +struct internal_iops{
  50903. + /* all tree traversal want to know from internal item is where
  50904. + to go next. */
  50905. + void (*down_link) (const coord_t * coord,
  50906. + const reiser4_key * key, reiser4_block_nr * block);
  50907. + /* check that given internal item contains given pointer. */
  50908. + int (*has_pointer_to) (const coord_t * coord,
  50909. + const reiser4_block_nr * block);
  50910. +};
  50911. +
  50912. +struct item_plugin {
  50913. + /* generic fields */
  50914. + plugin_header h;
  50915. + /* methods common for all item types */
  50916. + struct balance_ops b; /* balance operations */
  50917. + struct flush_ops f; /* flush operates with items via this methods */
  50918. +
  50919. + /* methods specific to particular type of item */
  50920. + union {
  50921. + struct dir_entry_iops dir;
  50922. + struct file_iops file;
  50923. + struct sd_iops sd;
  50924. + struct internal_iops internal;
  50925. + } s;
  50926. +};
  50927. +
  50928. +#define is_solid_item(iplug) ((iplug)->b.nr_units == nr_units_single_unit)
  50929. +
  50930. +static inline item_id item_id_by_plugin(item_plugin * plugin)
  50931. +{
  50932. + return plugin->h.id;
  50933. +}
  50934. +
  50935. +static inline char get_iplugid(item_plugin * iplug)
  50936. +{
  50937. + assert("nikita-2838", iplug != NULL);
  50938. + assert("nikita-2839", iplug->h.id < 0xff);
  50939. + return (char)item_id_by_plugin(iplug);
  50940. +}
  50941. +
  50942. +extern unsigned long znode_times_locked(const znode * z);
  50943. +
  50944. +static inline void coord_set_iplug(coord_t * coord, item_plugin * iplug)
  50945. +{
  50946. + assert("nikita-2837", coord != NULL);
  50947. + assert("nikita-2838", iplug != NULL);
  50948. + coord->iplugid = get_iplugid(iplug);
  50949. + ON_DEBUG(coord->plug_v = znode_times_locked(coord->node));
  50950. +}
  50951. +
  50952. +static inline item_plugin *coord_iplug(const coord_t * coord)
  50953. +{
  50954. + assert("nikita-2833", coord != NULL);
  50955. + assert("nikita-2834", coord->iplugid != INVALID_PLUGID);
  50956. + assert("nikita-3549", coord->plug_v == znode_times_locked(coord->node));
  50957. + return (item_plugin *) plugin_by_id(REISER4_ITEM_PLUGIN_TYPE,
  50958. + coord->iplugid);
  50959. +}
  50960. +
  50961. +extern int item_can_contain_key(const coord_t * item, const reiser4_key * key,
  50962. + const reiser4_item_data *);
  50963. +extern int are_items_mergeable(const coord_t * i1, const coord_t * i2);
  50964. +extern int item_is_extent(const coord_t *);
  50965. +extern int item_is_tail(const coord_t *);
  50966. +extern int item_is_statdata(const coord_t * item);
  50967. +extern int item_is_ctail(const coord_t *);
  50968. +
  50969. +extern pos_in_node_t item_length_by_coord(const coord_t * coord);
  50970. +extern pos_in_node_t nr_units_single_unit(const coord_t * coord);
  50971. +extern item_id item_id_by_coord(const coord_t * coord /* coord to query */ );
  50972. +extern reiser4_key *item_key_by_coord(const coord_t * coord, reiser4_key * key);
  50973. +extern reiser4_key *max_item_key_by_coord(const coord_t *, reiser4_key *);
  50974. +extern reiser4_key *unit_key_by_coord(const coord_t * coord, reiser4_key * key);
  50975. +extern reiser4_key *max_unit_key_by_coord(const coord_t * coord,
  50976. + reiser4_key * key);
  50977. +extern void obtain_item_plugin(const coord_t * coord);
  50978. +
  50979. +#if defined(REISER4_DEBUG)
  50980. +extern int znode_is_loaded(const znode * node);
  50981. +#endif
  50982. +
  50983. +/* return plugin of item at @coord */
  50984. +static inline item_plugin *item_plugin_by_coord(const coord_t *
  50985. + coord /* coord to query */ )
  50986. +{
  50987. + assert("nikita-330", coord != NULL);
  50988. + assert("nikita-331", coord->node != NULL);
  50989. + assert("nikita-332", znode_is_loaded(coord->node));
  50990. +
  50991. + if (unlikely(!coord_is_iplug_set(coord)))
  50992. + obtain_item_plugin(coord);
  50993. + return coord_iplug(coord);
  50994. +}
  50995. +
  50996. +/* this returns true if item is of internal type */
  50997. +static inline int item_is_internal(const coord_t * item)
  50998. +{
  50999. + assert("vs-483", coord_is_existing_item(item));
  51000. + return plugin_of_group(item_plugin_by_coord(item), INTERNAL_ITEM_TYPE);
  51001. +}
  51002. +
  51003. +extern void item_body_by_coord_hard(coord_t * coord);
  51004. +extern void *item_body_by_coord_easy(const coord_t * coord);
  51005. +#if REISER4_DEBUG
  51006. +extern int item_body_is_valid(const coord_t * coord);
  51007. +#endif
  51008. +
  51009. +/* return pointer to item body */
  51010. +static inline void *item_body_by_coord(const coord_t *
  51011. + coord /* coord to query */ )
  51012. +{
  51013. + assert("nikita-324", coord != NULL);
  51014. + assert("nikita-325", coord->node != NULL);
  51015. + assert("nikita-326", znode_is_loaded(coord->node));
  51016. +
  51017. + if (coord->offset == INVALID_OFFSET)
  51018. + item_body_by_coord_hard((coord_t *) coord);
  51019. + assert("nikita-3201", item_body_is_valid(coord));
  51020. + assert("nikita-3550", coord->body_v == znode_times_locked(coord->node));
  51021. + return item_body_by_coord_easy(coord);
  51022. +}
  51023. +
  51024. +/* __REISER4_ITEM_H__ */
  51025. +#endif
  51026. +/* Make Linus happy.
  51027. + Local variables:
  51028. + c-indentation-style: "K&R"
  51029. + mode-name: "LC"
  51030. + c-basic-offset: 8
  51031. + tab-width: 8
  51032. + fill-column: 120
  51033. + scroll-step: 1
  51034. + End:
  51035. +*/
  51036. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/Makefile linux-5.16.14/fs/reiser4/plugin/item/Makefile
  51037. --- linux-5.16.14.orig/fs/reiser4/plugin/item/Makefile 1970-01-01 01:00:00.000000000 +0100
  51038. +++ linux-5.16.14/fs/reiser4/plugin/item/Makefile 2022-03-12 13:26:19.675892784 +0100
  51039. @@ -0,0 +1,18 @@
  51040. +
  51041. +MODULE := item_plugins
  51042. +
  51043. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  51044. +
  51045. +$(MODULE)-objs += \
  51046. + item.o \
  51047. + static_stat.o \
  51048. + sde.o \
  51049. + cde.o \
  51050. + blackbox.o \
  51051. + internal.o \
  51052. + tail.o \
  51053. + ctail.o \
  51054. + extent.o \
  51055. + extent_item_ops.o \
  51056. + extent_file_ops.o \
  51057. + extent_flush_ops.o
  51058. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/sde.c linux-5.16.14/fs/reiser4/plugin/item/sde.c
  51059. --- linux-5.16.14.orig/fs/reiser4/plugin/item/sde.c 1970-01-01 01:00:00.000000000 +0100
  51060. +++ linux-5.16.14/fs/reiser4/plugin/item/sde.c 2022-03-12 13:26:19.680892796 +0100
  51061. @@ -0,0 +1,186 @@
  51062. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  51063. +
  51064. +/* Directory entry implementation */
  51065. +#include "../../forward.h"
  51066. +#include "../../debug.h"
  51067. +#include "../../dformat.h"
  51068. +#include "../../kassign.h"
  51069. +#include "../../coord.h"
  51070. +#include "sde.h"
  51071. +#include "item.h"
  51072. +#include "../plugin.h"
  51073. +#include "../../znode.h"
  51074. +#include "../../carry.h"
  51075. +#include "../../tree.h"
  51076. +#include "../../inode.h"
  51077. +
  51078. +#include <linux/fs.h> /* for struct inode */
  51079. +#include <linux/dcache.h> /* for struct dentry */
  51080. +
  51081. +/* ->extract_key() method of simple directory item plugin. */
  51082. +int extract_key_de(const coord_t * coord /* coord of item */ ,
  51083. + reiser4_key * key /* resulting key */ )
  51084. +{
  51085. + directory_entry_format *dent;
  51086. +
  51087. + assert("nikita-1458", coord != NULL);
  51088. + assert("nikita-1459", key != NULL);
  51089. +
  51090. + dent = (directory_entry_format *) item_body_by_coord(coord);
  51091. + assert("nikita-1158", item_length_by_coord(coord) >= (int)sizeof *dent);
  51092. + return extract_key_from_id(&dent->id, key);
  51093. +}
  51094. +
  51095. +int
  51096. +update_key_de(const coord_t * coord, const reiser4_key * key,
  51097. + lock_handle * lh UNUSED_ARG)
  51098. +{
  51099. + directory_entry_format *dent;
  51100. + obj_key_id obj_id;
  51101. + int result;
  51102. +
  51103. + assert("nikita-2342", coord != NULL);
  51104. + assert("nikita-2343", key != NULL);
  51105. +
  51106. + dent = (directory_entry_format *) item_body_by_coord(coord);
  51107. + result = build_obj_key_id(key, &obj_id);
  51108. + if (result == 0) {
  51109. + dent->id = obj_id;
  51110. + znode_make_dirty(coord->node);
  51111. + }
  51112. + return 0;
  51113. +}
  51114. +
  51115. +char *extract_dent_name(const coord_t * coord, directory_entry_format * dent,
  51116. + char *buf)
  51117. +{
  51118. + reiser4_key key;
  51119. +
  51120. + unit_key_by_coord(coord, &key);
  51121. + if (get_key_type(&key) != KEY_FILE_NAME_MINOR)
  51122. + reiser4_print_address("oops", znode_get_block(coord->node));
  51123. + if (!is_longname_key(&key)) {
  51124. + if (is_dot_key(&key))
  51125. + return (char *)".";
  51126. + else
  51127. + return extract_name_from_key(&key, buf);
  51128. + } else
  51129. + return (char *)dent->name;
  51130. +}
  51131. +
  51132. +/* ->extract_name() method of simple directory item plugin. */
  51133. +char *extract_name_de(const coord_t * coord /* coord of item */ , char *buf)
  51134. +{
  51135. + directory_entry_format *dent;
  51136. +
  51137. + assert("nikita-1460", coord != NULL);
  51138. +
  51139. + dent = (directory_entry_format *) item_body_by_coord(coord);
  51140. + return extract_dent_name(coord, dent, buf);
  51141. +}
  51142. +
  51143. +/* ->extract_file_type() method of simple directory item plugin. */
  51144. +unsigned extract_file_type_de(const coord_t * coord UNUSED_ARG /* coord of
  51145. + * item */ )
  51146. +{
  51147. + assert("nikita-1764", coord != NULL);
  51148. + /* we don't store file type in the directory entry yet.
  51149. +
  51150. + But see comments at kassign.h:obj_key_id
  51151. + */
  51152. + return DT_UNKNOWN;
  51153. +}
  51154. +
  51155. +int add_entry_de(struct inode *dir /* directory of item */ ,
  51156. + coord_t * coord /* coord of item */ ,
  51157. + lock_handle * lh /* insertion lock handle */ ,
  51158. + const struct dentry *de /* name to add */ ,
  51159. + reiser4_dir_entry_desc * entry /* parameters of new directory
  51160. + * entry */ )
  51161. +{
  51162. + reiser4_item_data data;
  51163. + directory_entry_format *dent;
  51164. + int result;
  51165. + const char *name;
  51166. + int len;
  51167. + int longname;
  51168. +
  51169. + name = de->d_name.name;
  51170. + len = de->d_name.len;
  51171. + assert("nikita-1163", strlen(name) == len);
  51172. +
  51173. + longname = is_longname(name, len);
  51174. +
  51175. + data.length = sizeof *dent;
  51176. + if (longname)
  51177. + data.length += len + 1;
  51178. + data.data = NULL;
  51179. + data.user = 0;
  51180. + data.iplug = item_plugin_by_id(SIMPLE_DIR_ENTRY_ID);
  51181. +
  51182. + inode_add_bytes(dir, data.length);
  51183. +
  51184. + result = insert_by_coord(coord, &data, &entry->key, lh, 0 /*flags */ );
  51185. + if (result != 0)
  51186. + return result;
  51187. +
  51188. + dent = (directory_entry_format *) item_body_by_coord(coord);
  51189. + build_inode_key_id(entry->obj, &dent->id);
  51190. + if (longname) {
  51191. + memcpy(dent->name, name, len);
  51192. + put_unaligned(0, &dent->name[len]);
  51193. + }
  51194. + return 0;
  51195. +}
  51196. +
  51197. +int rem_entry_de(struct inode *dir /* directory of item */ ,
  51198. + const struct qstr *name UNUSED_ARG,
  51199. + coord_t * coord /* coord of item */ ,
  51200. + lock_handle * lh UNUSED_ARG /* lock handle for
  51201. + * removal */ ,
  51202. + reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of
  51203. + * directory entry
  51204. + * being removed */ )
  51205. +{
  51206. + coord_t shadow;
  51207. + int result;
  51208. + int length;
  51209. +
  51210. + length = item_length_by_coord(coord);
  51211. + if (inode_get_bytes(dir) < length) {
  51212. + warning("nikita-2627", "Dir is broke: %llu: %llu",
  51213. + (unsigned long long)get_inode_oid(dir),
  51214. + inode_get_bytes(dir));
  51215. +
  51216. + return RETERR(-EIO);
  51217. + }
  51218. +
  51219. + /* cut_node() is supposed to take pointers to _different_
  51220. + coords, because it will modify them without respect to
  51221. + possible aliasing. To work around this, create temporary copy
  51222. + of @coord.
  51223. + */
  51224. + coord_dup(&shadow, coord);
  51225. + result =
  51226. + kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0);
  51227. + if (result == 0) {
  51228. + inode_sub_bytes(dir, length);
  51229. + }
  51230. + return result;
  51231. +}
  51232. +
  51233. +int max_name_len_de(const struct inode *dir)
  51234. +{
  51235. + return reiser4_tree_by_inode(dir)->nplug->max_item_size() -
  51236. + sizeof(directory_entry_format) - 2;
  51237. +}
  51238. +
  51239. +/* Make Linus happy.
  51240. + Local variables:
  51241. + c-indentation-style: "K&R"
  51242. + mode-name: "LC"
  51243. + c-basic-offset: 8
  51244. + tab-width: 8
  51245. + fill-column: 120
  51246. + End:
  51247. +*/
  51248. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/sde.h linux-5.16.14/fs/reiser4/plugin/item/sde.h
  51249. --- linux-5.16.14.orig/fs/reiser4/plugin/item/sde.h 1970-01-01 01:00:00.000000000 +0100
  51250. +++ linux-5.16.14/fs/reiser4/plugin/item/sde.h 2022-03-12 13:26:19.680892796 +0100
  51251. @@ -0,0 +1,66 @@
  51252. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  51253. +
  51254. +/* Directory entry. */
  51255. +
  51256. +#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ )
  51257. +#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__
  51258. +
  51259. +#include "../../forward.h"
  51260. +#include "../../dformat.h"
  51261. +#include "../../kassign.h"
  51262. +#include "../../key.h"
  51263. +
  51264. +#include <linux/fs.h>
  51265. +#include <linux/dcache.h> /* for struct dentry */
  51266. +
  51267. +typedef struct directory_entry_format {
  51268. + /* key of object stat-data. It's not necessary to store whole
  51269. + key here, because it's always key of stat-data, so minor
  51270. + packing locality and offset can be omitted here. But this
  51271. + relies on particular key allocation scheme for stat-data, so,
  51272. + for extensibility sake, whole key can be stored here.
  51273. +
  51274. + We store key as array of bytes, because we don't want 8-byte
  51275. + alignment of dir entries.
  51276. + */
  51277. + obj_key_id id;
  51278. + /* file name. Null terminated string. */
  51279. + d8 name[0];
  51280. +} directory_entry_format;
  51281. +
  51282. +void print_de(const char *prefix, coord_t * coord);
  51283. +int extract_key_de(const coord_t * coord, reiser4_key * key);
  51284. +int update_key_de(const coord_t * coord, const reiser4_key * key,
  51285. + lock_handle * lh);
  51286. +char *extract_name_de(const coord_t * coord, char *buf);
  51287. +unsigned extract_file_type_de(const coord_t * coord);
  51288. +int add_entry_de(struct inode *dir, coord_t * coord,
  51289. + lock_handle * lh, const struct dentry *name,
  51290. + reiser4_dir_entry_desc * entry);
  51291. +int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord,
  51292. + lock_handle * lh, reiser4_dir_entry_desc * entry);
  51293. +int max_name_len_de(const struct inode *dir);
  51294. +
  51295. +int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length);
  51296. +
  51297. +char *extract_dent_name(const coord_t * coord,
  51298. + directory_entry_format * dent, char *buf);
  51299. +
  51300. +#if REISER4_LARGE_KEY
  51301. +#define DE_NAME_BUF_LEN (24)
  51302. +#else
  51303. +#define DE_NAME_BUF_LEN (16)
  51304. +#endif
  51305. +
  51306. +/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */
  51307. +#endif
  51308. +
  51309. +/* Make Linus happy.
  51310. + Local variables:
  51311. + c-indentation-style: "K&R"
  51312. + mode-name: "LC"
  51313. + c-basic-offset: 8
  51314. + tab-width: 8
  51315. + fill-column: 120
  51316. + End:
  51317. +*/
  51318. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/static_stat.c linux-5.16.14/fs/reiser4/plugin/item/static_stat.c
  51319. --- linux-5.16.14.orig/fs/reiser4/plugin/item/static_stat.c 1970-01-01 01:00:00.000000000 +0100
  51320. +++ linux-5.16.14/fs/reiser4/plugin/item/static_stat.c 2022-03-12 13:26:19.681892799 +0100
  51321. @@ -0,0 +1,1114 @@
  51322. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  51323. +
  51324. +/* stat data manipulation. */
  51325. +
  51326. +#include "../../forward.h"
  51327. +#include "../../super.h"
  51328. +#include "../../vfs_ops.h"
  51329. +#include "../../inode.h"
  51330. +#include "../../debug.h"
  51331. +#include "../../dformat.h"
  51332. +#include "../object.h"
  51333. +#include "../plugin.h"
  51334. +#include "../plugin_header.h"
  51335. +#include "static_stat.h"
  51336. +#include "item.h"
  51337. +
  51338. +#include <linux/types.h>
  51339. +#include <linux/fs.h>
  51340. +
  51341. +/* see static_stat.h for explanation */
  51342. +
  51343. +/* helper function used while we are dumping/loading inode/plugin state
  51344. + to/from the stat-data. */
  51345. +
  51346. +static void move_on(int *length /* space remaining in stat-data */ ,
  51347. + char **area /* current coord in stat data */ ,
  51348. + int size_of /* how many bytes to move forward */ )
  51349. +{
  51350. + assert("nikita-615", length != NULL);
  51351. + assert("nikita-616", area != NULL);
  51352. +
  51353. + *length -= size_of;
  51354. + *area += size_of;
  51355. +
  51356. + assert("nikita-617", *length >= 0);
  51357. +}
  51358. +
  51359. +/* helper function used while loading inode/plugin state from stat-data.
  51360. + Complain if there is less space in stat-data than was expected.
  51361. + Can only happen on disk corruption. */
  51362. +static int not_enough_space(struct inode *inode /* object being processed */ ,
  51363. + const char *where /* error message */ )
  51364. +{
  51365. + assert("nikita-618", inode != NULL);
  51366. +
  51367. + warning("nikita-619", "Not enough space in %llu while loading %s",
  51368. + (unsigned long long)get_inode_oid(inode), where);
  51369. +
  51370. + return RETERR(-EINVAL);
  51371. +}
  51372. +
  51373. +/* helper function used while loading inode/plugin state from
  51374. + stat-data. Call it if invalid plugin id was found. */
  51375. +static int unknown_plugin(reiser4_plugin_id id /* invalid id */ ,
  51376. + struct inode *inode /* object being processed */ )
  51377. +{
  51378. + warning("nikita-620", "Unknown plugin %i in %llu",
  51379. + id, (unsigned long long)get_inode_oid(inode));
  51380. +
  51381. + return RETERR(-EINVAL);
  51382. +}
  51383. +
  51384. +/* this is installed as ->init_inode() method of
  51385. + item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c).
  51386. + Copies data from on-disk stat-data format into inode.
  51387. + Handles stat-data extensions. */
  51388. +/* was sd_load */
  51389. +int init_inode_static_sd(struct inode *inode /* object being processed */ ,
  51390. + char *sd /* stat-data body */ ,
  51391. + int len /* length of stat-data */ )
  51392. +{
  51393. + int result;
  51394. + int bit;
  51395. + int chunk;
  51396. + __u16 mask;
  51397. + __u64 bigmask;
  51398. + reiser4_stat_data_base *sd_base;
  51399. + reiser4_inode *state;
  51400. +
  51401. + assert("nikita-625", inode != NULL);
  51402. + assert("nikita-626", sd != NULL);
  51403. +
  51404. + result = 0;
  51405. + sd_base = (reiser4_stat_data_base *) sd;
  51406. + state = reiser4_inode_data(inode);
  51407. + mask = le16_to_cpu(get_unaligned(&sd_base->extmask));
  51408. + bigmask = mask;
  51409. + reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
  51410. +
  51411. + move_on(&len, &sd, sizeof *sd_base);
  51412. + for (bit = 0, chunk = 0;
  51413. + mask != 0 || bit <= LAST_IMPORTANT_SD_EXTENSION;
  51414. + ++bit, mask >>= 1) {
  51415. + if (((bit + 1) % 16) != 0) {
  51416. + /* handle extension */
  51417. + sd_ext_plugin *sdplug;
  51418. +
  51419. + if (bit >= LAST_SD_EXTENSION) {
  51420. + warning("vpf-1904",
  51421. + "No such extension %i in inode %llu",
  51422. + bit,
  51423. + (unsigned long long)
  51424. + get_inode_oid(inode));
  51425. +
  51426. + result = RETERR(-EINVAL);
  51427. + break;
  51428. + }
  51429. +
  51430. + sdplug = sd_ext_plugin_by_id(bit);
  51431. + if (sdplug == NULL) {
  51432. + warning("nikita-627",
  51433. + "No such extension %i in inode %llu",
  51434. + bit,
  51435. + (unsigned long long)
  51436. + get_inode_oid(inode));
  51437. +
  51438. + result = RETERR(-EINVAL);
  51439. + break;
  51440. + }
  51441. + if (mask & 1) {
  51442. + assert("nikita-628", sdplug->present);
  51443. + /* alignment is not supported in node layout
  51444. + plugin yet.
  51445. + result = align( inode, &len, &sd,
  51446. + sdplug -> alignment );
  51447. + if( result != 0 )
  51448. + return result; */
  51449. + result = sdplug->present(inode, &sd, &len);
  51450. + } else if (sdplug->absent != NULL)
  51451. + result = sdplug->absent(inode);
  51452. + if (result)
  51453. + break;
  51454. + /* else, we are looking at the last bit in 16-bit
  51455. + portion of bitmask */
  51456. + } else if (mask & 1) {
  51457. + /* next portion of bitmask */
  51458. + if (len < (int)sizeof(d16)) {
  51459. + warning("nikita-629",
  51460. + "No space for bitmap in inode %llu",
  51461. + (unsigned long long)
  51462. + get_inode_oid(inode));
  51463. +
  51464. + result = RETERR(-EINVAL);
  51465. + break;
  51466. + }
  51467. + mask = le16_to_cpu(get_unaligned((d16 *)sd));
  51468. + bigmask <<= 16;
  51469. + bigmask |= mask;
  51470. + move_on(&len, &sd, sizeof(d16));
  51471. + ++chunk;
  51472. + if (chunk == 3) {
  51473. + if (!(mask & 0x8000)) {
  51474. + /* clear last bit */
  51475. + mask &= ~0x8000;
  51476. + continue;
  51477. + }
  51478. + /* too much */
  51479. + warning("nikita-630",
  51480. + "Too many extensions in %llu",
  51481. + (unsigned long long)
  51482. + get_inode_oid(inode));
  51483. +
  51484. + result = RETERR(-EINVAL);
  51485. + break;
  51486. + }
  51487. + } else
  51488. + /* bitmask exhausted */
  51489. + break;
  51490. + }
  51491. + state->extmask = bigmask;
  51492. + /* common initialisations */
  51493. + if (len - (bit / 16 * sizeof(d16)) > 0) {
  51494. + /* alignment in save_len_static_sd() is taken into account
  51495. + -edward */
  51496. + warning("nikita-631", "unused space in inode %llu",
  51497. + (unsigned long long)get_inode_oid(inode));
  51498. + }
  51499. +
  51500. + return result;
  51501. +}
  51502. +
  51503. +/* estimates size of stat-data required to store inode.
  51504. + Installed as ->save_len() method of
  51505. + item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */
  51506. +/* was sd_len */
  51507. +int save_len_static_sd(struct inode *inode /* object being processed */ )
  51508. +{
  51509. + unsigned int result;
  51510. + __u64 mask;
  51511. + int bit;
  51512. +
  51513. + assert("nikita-632", inode != NULL);
  51514. +
  51515. + result = sizeof(reiser4_stat_data_base);
  51516. + mask = reiser4_inode_data(inode)->extmask;
  51517. + for (bit = 0; mask != 0; ++bit, mask >>= 1) {
  51518. + if (mask & 1) {
  51519. + sd_ext_plugin *sdplug;
  51520. +
  51521. + sdplug = sd_ext_plugin_by_id(bit);
  51522. + assert("nikita-633", sdplug != NULL);
  51523. + /*
  51524. + no aligment support
  51525. + result +=
  51526. + reiser4_round_up(result, sdplug -> alignment) -
  51527. + result;
  51528. + */
  51529. + result += sdplug->save_len(inode);
  51530. + }
  51531. + }
  51532. + result += bit / 16 * sizeof(d16);
  51533. + return result;
  51534. +}
  51535. +
  51536. +/* saves inode into stat-data.
  51537. + Installed as ->save() method of
  51538. + item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */
  51539. +/* was sd_save */
  51540. +int save_static_sd(struct inode *inode /* object being processed */ ,
  51541. + char **area /* where to save stat-data */ )
  51542. +{
  51543. + int result;
  51544. + __u64 emask;
  51545. + int bit;
  51546. + unsigned int len;
  51547. + reiser4_stat_data_base *sd_base;
  51548. +
  51549. + assert("nikita-634", inode != NULL);
  51550. + assert("nikita-635", area != NULL);
  51551. +
  51552. + result = 0;
  51553. + emask = reiser4_inode_data(inode)->extmask;
  51554. + sd_base = (reiser4_stat_data_base *) * area;
  51555. + put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)), &sd_base->extmask);
  51556. + /*cputod16((unsigned)(emask & 0xffff), &sd_base->extmask);*/
  51557. +
  51558. + *area += sizeof *sd_base;
  51559. + len = 0xffffffffu;
  51560. + for (bit = 0; emask != 0; ++bit, emask >>= 1) {
  51561. + if (emask & 1) {
  51562. + if ((bit + 1) % 16 != 0) {
  51563. + sd_ext_plugin *sdplug;
  51564. + sdplug = sd_ext_plugin_by_id(bit);
  51565. + assert("nikita-636", sdplug != NULL);
  51566. + /* no alignment support yet
  51567. + align( inode, &len, area,
  51568. + sdplug -> alignment ); */
  51569. + result = sdplug->save(inode, area);
  51570. + if (result)
  51571. + break;
  51572. + } else {
  51573. + put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)),
  51574. + (d16 *)(*area));
  51575. + /*cputod16((unsigned)(emask & 0xffff),
  51576. + (d16 *) * area);*/
  51577. + *area += sizeof(d16);
  51578. + }
  51579. + }
  51580. + }
  51581. + return result;
  51582. +}
  51583. +
  51584. +/* stat-data extension handling functions. */
  51585. +
  51586. +static int present_lw_sd(struct inode *inode /* object being processed */ ,
  51587. + char **area /* position in stat-data */ ,
  51588. + int *len /* remaining length */ )
  51589. +{
  51590. + if (*len >= (int)sizeof(reiser4_light_weight_stat)) {
  51591. + reiser4_light_weight_stat *sd_lw;
  51592. +
  51593. + sd_lw = (reiser4_light_weight_stat *) * area;
  51594. +
  51595. + inode->i_mode = le16_to_cpu(get_unaligned(&sd_lw->mode));
  51596. + set_nlink(inode, le32_to_cpu(get_unaligned(&sd_lw->nlink)));
  51597. + inode->i_size = le64_to_cpu(get_unaligned(&sd_lw->size));
  51598. + if ((inode->i_mode & S_IFMT) == (S_IFREG | S_IFIFO)) {
  51599. + inode->i_mode &= ~S_IFIFO;
  51600. + warning("", "partially converted file is encountered");
  51601. + reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
  51602. + }
  51603. + move_on(len, area, sizeof *sd_lw);
  51604. + return 0;
  51605. + } else
  51606. + return not_enough_space(inode, "lw sd");
  51607. +}
  51608. +
  51609. +static int save_len_lw_sd(struct inode *inode UNUSED_ARG /* object being
  51610. + * processed */ )
  51611. +{
  51612. + return sizeof(reiser4_light_weight_stat);
  51613. +}
  51614. +
  51615. +static int save_lw_sd(struct inode *inode /* object being processed */ ,
  51616. + char **area /* position in stat-data */ )
  51617. +{
  51618. + reiser4_light_weight_stat *sd;
  51619. + mode_t delta;
  51620. +
  51621. + assert("nikita-2705", inode != NULL);
  51622. + assert("nikita-2706", area != NULL);
  51623. + assert("nikita-2707", *area != NULL);
  51624. +
  51625. + sd = (reiser4_light_weight_stat *) * area;
  51626. +
  51627. + delta = (reiser4_inode_get_flag(inode,
  51628. + REISER4_PART_MIXED) ? S_IFIFO : 0);
  51629. + put_unaligned(cpu_to_le16(inode->i_mode | delta), &sd->mode);
  51630. + put_unaligned(cpu_to_le32(inode->i_nlink), &sd->nlink);
  51631. + put_unaligned(cpu_to_le64((__u64) inode->i_size), &sd->size);
  51632. + *area += sizeof *sd;
  51633. + return 0;
  51634. +}
  51635. +
  51636. +static int present_unix_sd(struct inode *inode /* object being processed */ ,
  51637. + char **area /* position in stat-data */ ,
  51638. + int *len /* remaining length */ )
  51639. +{
  51640. + assert("nikita-637", inode != NULL);
  51641. + assert("nikita-638", area != NULL);
  51642. + assert("nikita-639", *area != NULL);
  51643. + assert("nikita-640", len != NULL);
  51644. + assert("nikita-641", *len > 0);
  51645. +
  51646. + if (*len >= (int)sizeof(reiser4_unix_stat)) {
  51647. + reiser4_unix_stat *sd;
  51648. +
  51649. + sd = (reiser4_unix_stat *) * area;
  51650. +
  51651. + i_uid_write(inode, le32_to_cpu(get_unaligned(&sd->uid)));
  51652. + i_gid_write(inode, le32_to_cpu(get_unaligned(&sd->gid)));
  51653. + inode->i_atime.tv_sec = le32_to_cpu(get_unaligned(&sd->atime));
  51654. + inode->i_mtime.tv_sec = le32_to_cpu(get_unaligned(&sd->mtime));
  51655. + inode->i_ctime.tv_sec = le32_to_cpu(get_unaligned(&sd->ctime));
  51656. + if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
  51657. + inode->i_rdev = le64_to_cpu(get_unaligned(&sd->u.rdev));
  51658. + else
  51659. + inode_set_bytes(inode, (loff_t) le64_to_cpu(get_unaligned(&sd->u.bytes)));
  51660. + move_on(len, area, sizeof *sd);
  51661. + return 0;
  51662. + } else
  51663. + return not_enough_space(inode, "unix sd");
  51664. +}
  51665. +
  51666. +static int absent_unix_sd(struct inode *inode /* object being processed */ )
  51667. +{
  51668. + i_uid_write(inode, get_super_private(inode->i_sb)->default_uid);
  51669. + i_gid_write(inode, get_super_private(inode->i_sb)->default_gid);
  51670. + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
  51671. + inode_set_bytes(inode, inode->i_size);
  51672. + /* mark inode as lightweight, so that caller (lookup_common) will
  51673. + complete initialisation by copying [ug]id from a parent. */
  51674. + reiser4_inode_set_flag(inode, REISER4_LIGHT_WEIGHT);
  51675. + return 0;
  51676. +}
  51677. +
  51678. +/* Audited by: green(2002.06.14) */
  51679. +static int save_len_unix_sd(struct inode *inode UNUSED_ARG /* object being
  51680. + * processed */ )
  51681. +{
  51682. + return sizeof(reiser4_unix_stat);
  51683. +}
  51684. +
  51685. +static int save_unix_sd(struct inode *inode /* object being processed */ ,
  51686. + char **area /* position in stat-data */ )
  51687. +{
  51688. + reiser4_unix_stat *sd;
  51689. +
  51690. + assert("nikita-642", inode != NULL);
  51691. + assert("nikita-643", area != NULL);
  51692. + assert("nikita-644", *area != NULL);
  51693. +
  51694. + sd = (reiser4_unix_stat *) * area;
  51695. + put_unaligned(cpu_to_le32(i_uid_read(inode)), &sd->uid);
  51696. + put_unaligned(cpu_to_le32(i_gid_read(inode)), &sd->gid);
  51697. + put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_sec), &sd->atime);
  51698. + put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_sec), &sd->ctime);
  51699. + put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_sec), &sd->mtime);
  51700. + if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
  51701. + put_unaligned(cpu_to_le64(inode->i_rdev), &sd->u.rdev);
  51702. + else
  51703. + put_unaligned(cpu_to_le64((__u64) inode_get_bytes(inode)), &sd->u.bytes);
  51704. + *area += sizeof *sd;
  51705. + return 0;
  51706. +}
  51707. +
  51708. +static int
  51709. +present_large_times_sd(struct inode *inode /* object being processed */ ,
  51710. + char **area /* position in stat-data */ ,
  51711. + int *len /* remaining length */ )
  51712. +{
  51713. + if (*len >= (int)sizeof(reiser4_large_times_stat)) {
  51714. + reiser4_large_times_stat *sd_lt;
  51715. +
  51716. + sd_lt = (reiser4_large_times_stat *) * area;
  51717. +
  51718. + inode->i_atime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->atime));
  51719. + inode->i_mtime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->mtime));
  51720. + inode->i_ctime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->ctime));
  51721. +
  51722. + move_on(len, area, sizeof *sd_lt);
  51723. + return 0;
  51724. + } else
  51725. + return not_enough_space(inode, "large times sd");
  51726. +}
  51727. +
  51728. +static int
  51729. +save_len_large_times_sd(struct inode *inode UNUSED_ARG
  51730. + /* object being processed */ )
  51731. +{
  51732. + return sizeof(reiser4_large_times_stat);
  51733. +}
  51734. +
  51735. +static int
  51736. +save_large_times_sd(struct inode *inode /* object being processed */ ,
  51737. + char **area /* position in stat-data */ )
  51738. +{
  51739. + reiser4_large_times_stat *sd;
  51740. +
  51741. + assert("nikita-2817", inode != NULL);
  51742. + assert("nikita-2818", area != NULL);
  51743. + assert("nikita-2819", *area != NULL);
  51744. +
  51745. + sd = (reiser4_large_times_stat *) * area;
  51746. +
  51747. + put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_nsec), &sd->atime);
  51748. + put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_nsec), &sd->ctime);
  51749. + put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_nsec), &sd->mtime);
  51750. +
  51751. + *area += sizeof *sd;
  51752. + return 0;
  51753. +}
  51754. +
  51755. +/* symlink stat data extension */
  51756. +
  51757. +/* allocate memory for symlink target and attach it to inode->i_private */
  51758. +static int
  51759. +symlink_target_to_inode(struct inode *inode, const char *target, int len)
  51760. +{
  51761. + assert("vs-845", inode->i_private == NULL);
  51762. + assert("vs-846", !reiser4_inode_get_flag(inode,
  51763. + REISER4_GENERIC_PTR_USED));
  51764. + /* FIXME-VS: this is prone to deadlock. Not more than other similar
  51765. + places, though */
  51766. + inode->i_private = kmalloc((size_t) len + 1,
  51767. + reiser4_ctx_gfp_mask_get());
  51768. + if (!inode->i_private)
  51769. + return RETERR(-ENOMEM);
  51770. +
  51771. + memcpy((char *)(inode->i_private), target, (size_t) len);
  51772. + ((char *)(inode->i_private))[len] = 0;
  51773. + reiser4_inode_set_flag(inode, REISER4_GENERIC_PTR_USED);
  51774. + return 0;
  51775. +}
  51776. +
  51777. +/* this is called on read_inode. There is nothing to do actually, but some
  51778. + sanity checks */
  51779. +static int present_symlink_sd(struct inode *inode, char **area, int *len)
  51780. +{
  51781. + int result;
  51782. + int length;
  51783. + reiser4_symlink_stat *sd;
  51784. +
  51785. + length = (int)inode->i_size;
  51786. + /*
  51787. + * *len is number of bytes in stat data item from *area to the end of
  51788. + * item. It must be not less than size of symlink + 1 for ending 0
  51789. + */
  51790. + if (length > *len)
  51791. + return not_enough_space(inode, "symlink");
  51792. +
  51793. + if (*(*area + length) != 0) {
  51794. + warning("vs-840", "Symlink is not zero terminated");
  51795. + return RETERR(-EIO);
  51796. + }
  51797. +
  51798. + sd = (reiser4_symlink_stat *) * area;
  51799. + result = symlink_target_to_inode(inode, sd->body, length);
  51800. +
  51801. + move_on(len, area, length + 1);
  51802. + return result;
  51803. +}
  51804. +
  51805. +static int save_len_symlink_sd(struct inode *inode)
  51806. +{
  51807. + return inode->i_size + 1;
  51808. +}
  51809. +
  51810. +/* this is called on create and update stat data. Do nothing on update but
  51811. + update @area */
  51812. +static int save_symlink_sd(struct inode *inode, char **area)
  51813. +{
  51814. + int result;
  51815. + int length;
  51816. + reiser4_symlink_stat *sd;
  51817. +
  51818. + length = (int)inode->i_size;
  51819. + /* inode->i_size must be set already */
  51820. + assert("vs-841", length);
  51821. +
  51822. + result = 0;
  51823. + sd = (reiser4_symlink_stat *) * area;
  51824. + if (!reiser4_inode_get_flag(inode, REISER4_GENERIC_PTR_USED)) {
  51825. + const char *target;
  51826. +
  51827. + target = (const char *)(inode->i_private);
  51828. + inode->i_private = NULL;
  51829. +
  51830. + result = symlink_target_to_inode(inode, target, length);
  51831. +
  51832. + /* copy symlink to stat data */
  51833. + memcpy(sd->body, target, (size_t) length);
  51834. + (*area)[length] = 0;
  51835. + } else {
  51836. + /* there is nothing to do in update but move area */
  51837. + assert("vs-844",
  51838. + !memcmp(inode->i_private, sd->body,
  51839. + (size_t) length + 1));
  51840. + }
  51841. +
  51842. + *area += (length + 1);
  51843. + return result;
  51844. +}
  51845. +
  51846. +static int present_flags_sd(struct inode *inode /* object being processed */ ,
  51847. + char **area /* position in stat-data */ ,
  51848. + int *len /* remaining length */ )
  51849. +{
  51850. + assert("nikita-645", inode != NULL);
  51851. + assert("nikita-646", area != NULL);
  51852. + assert("nikita-647", *area != NULL);
  51853. + assert("nikita-648", len != NULL);
  51854. + assert("nikita-649", *len > 0);
  51855. +
  51856. + if (*len >= (int)sizeof(reiser4_flags_stat)) {
  51857. + reiser4_flags_stat *sd;
  51858. +
  51859. + sd = (reiser4_flags_stat *) * area;
  51860. + inode->i_flags = le32_to_cpu(get_unaligned(&sd->flags));
  51861. + move_on(len, area, sizeof *sd);
  51862. + return 0;
  51863. + } else
  51864. + return not_enough_space(inode, "generation and attrs");
  51865. +}
  51866. +
  51867. +/* Audited by: green(2002.06.14) */
  51868. +static int save_len_flags_sd(struct inode *inode UNUSED_ARG /* object being
  51869. + * processed */ )
  51870. +{
  51871. + return sizeof(reiser4_flags_stat);
  51872. +}
  51873. +
  51874. +static int save_flags_sd(struct inode *inode /* object being processed */ ,
  51875. + char **area /* position in stat-data */ )
  51876. +{
  51877. + reiser4_flags_stat *sd;
  51878. +
  51879. + assert("nikita-650", inode != NULL);
  51880. + assert("nikita-651", area != NULL);
  51881. + assert("nikita-652", *area != NULL);
  51882. +
  51883. + sd = (reiser4_flags_stat *) * area;
  51884. + put_unaligned(cpu_to_le32(inode->i_flags), &sd->flags);
  51885. + *area += sizeof *sd;
  51886. + return 0;
  51887. +}
  51888. +
  51889. +static int absent_plugin_sd(struct inode *inode);
  51890. +static int present_plugin_sd(struct inode *inode /* object being processed */ ,
  51891. + char **area /* position in stat-data */ ,
  51892. + int *len /* remaining length */,
  51893. + int is_pset /* 1 if plugin set, 0 if heir set. */)
  51894. +{
  51895. + reiser4_plugin_stat *sd;
  51896. + reiser4_plugin *plugin;
  51897. + reiser4_inode *info;
  51898. + int i;
  51899. + __u16 mask;
  51900. + int result;
  51901. + int num_of_plugins;
  51902. +
  51903. + assert("nikita-653", inode != NULL);
  51904. + assert("nikita-654", area != NULL);
  51905. + assert("nikita-655", *area != NULL);
  51906. + assert("nikita-656", len != NULL);
  51907. + assert("nikita-657", *len > 0);
  51908. +
  51909. + if (*len < (int)sizeof(reiser4_plugin_stat))
  51910. + return not_enough_space(inode, "plugin");
  51911. +
  51912. + sd = (reiser4_plugin_stat *) * area;
  51913. + info = reiser4_inode_data(inode);
  51914. +
  51915. + mask = 0;
  51916. + num_of_plugins = le16_to_cpu(get_unaligned(&sd->plugins_no));
  51917. + move_on(len, area, sizeof *sd);
  51918. + result = 0;
  51919. + for (i = 0; i < num_of_plugins; ++i) {
  51920. + reiser4_plugin_slot *slot;
  51921. + reiser4_plugin_type type;
  51922. + pset_member memb;
  51923. +
  51924. + slot = (reiser4_plugin_slot *) * area;
  51925. + if (*len < (int)sizeof *slot)
  51926. + return not_enough_space(inode, "additional plugin");
  51927. +
  51928. + memb = le16_to_cpu(get_unaligned(&slot->pset_memb));
  51929. + type = aset_member_to_type_unsafe(memb);
  51930. +
  51931. + if (type == REISER4_PLUGIN_TYPES) {
  51932. + warning("nikita-3502",
  51933. + "wrong %s member (%i) for %llu", is_pset ?
  51934. + "pset" : "hset", memb,
  51935. + (unsigned long long)get_inode_oid(inode));
  51936. + return RETERR(-EINVAL);
  51937. + }
  51938. + plugin = plugin_by_disk_id(reiser4_tree_by_inode(inode),
  51939. + type, &slot->id);
  51940. + if (plugin == NULL)
  51941. + return unknown_plugin(le16_to_cpu(get_unaligned(&slot->id)), inode);
  51942. +
  51943. + /* plugin is loaded into inode, mark this into inode's
  51944. + bitmask of loaded non-standard plugins */
  51945. + if (!(mask & (1 << memb))) {
  51946. + mask |= (1 << memb);
  51947. + } else {
  51948. + warning("nikita-658", "duplicate plugin for %llu",
  51949. + (unsigned long long)get_inode_oid(inode));
  51950. + return RETERR(-EINVAL);
  51951. + }
  51952. + move_on(len, area, sizeof *slot);
  51953. + /* load plugin data, if any */
  51954. + if (plugin->h.pops != NULL && plugin->h.pops->load)
  51955. + result = plugin->h.pops->load(inode, plugin, area, len);
  51956. + else
  51957. + result = aset_set_unsafe(is_pset ? &info->pset :
  51958. + &info->hset, memb, plugin);
  51959. + if (result)
  51960. + return result;
  51961. + }
  51962. + if (is_pset) {
  51963. + /* if object plugin wasn't loaded from stat-data, guess it by
  51964. + mode bits */
  51965. + plugin = file_plugin_to_plugin(inode_file_plugin(inode));
  51966. + if (plugin == NULL)
  51967. + result = absent_plugin_sd(inode);
  51968. + info->plugin_mask = mask;
  51969. + } else
  51970. + info->heir_mask = mask;
  51971. +
  51972. + return result;
  51973. +}
  51974. +
  51975. +static int present_pset_sd(struct inode *inode, char **area, int *len) {
  51976. + return present_plugin_sd(inode, area, len, 1 /* pset */);
  51977. +}
  51978. +
  51979. +/* Determine object plugin for @inode based on i_mode.
  51980. +
  51981. + Many objects in reiser4 file system are controlled by standard object
  51982. + plugins that emulate traditional unix objects: unix file, directory, symlink, fifo, and so on.
  51983. +
  51984. + For such files we don't explicitly store plugin id in object stat
  51985. + data. Rather required plugin is guessed from mode bits, where file "type"
  51986. + is encoded (see stat(2)).
  51987. +*/
  51988. +static int
  51989. +guess_plugin_by_mode(struct inode *inode /* object to guess plugins for */ )
  51990. +{
  51991. + int fplug_id;
  51992. + int dplug_id;
  51993. + reiser4_inode *info;
  51994. +
  51995. + assert("nikita-736", inode != NULL);
  51996. +
  51997. + dplug_id = fplug_id = -1;
  51998. +
  51999. + switch (inode->i_mode & S_IFMT) {
  52000. + case S_IFSOCK:
  52001. + case S_IFBLK:
  52002. + case S_IFCHR:
  52003. + case S_IFIFO:
  52004. + fplug_id = SPECIAL_FILE_PLUGIN_ID;
  52005. + break;
  52006. + case S_IFLNK:
  52007. + fplug_id = SYMLINK_FILE_PLUGIN_ID;
  52008. + break;
  52009. + case S_IFDIR:
  52010. + fplug_id = DIRECTORY_FILE_PLUGIN_ID;
  52011. + dplug_id = HASHED_DIR_PLUGIN_ID;
  52012. + break;
  52013. + default:
  52014. + warning("nikita-737", "wrong file mode: %o", inode->i_mode);
  52015. + return RETERR(-EIO);
  52016. + case S_IFREG:
  52017. + fplug_id = UNIX_FILE_PLUGIN_ID;
  52018. + break;
  52019. + }
  52020. + info = reiser4_inode_data(inode);
  52021. + set_plugin(&info->pset, PSET_FILE, (fplug_id >= 0) ?
  52022. + plugin_by_id(REISER4_FILE_PLUGIN_TYPE, fplug_id) : NULL);
  52023. + set_plugin(&info->pset, PSET_DIR, (dplug_id >= 0) ?
  52024. + plugin_by_id(REISER4_DIR_PLUGIN_TYPE, dplug_id) : NULL);
  52025. + return 0;
  52026. +}
  52027. +
  52028. +/* Audited by: green(2002.06.14) */
  52029. +static int absent_plugin_sd(struct inode *inode /* object being processed */ )
  52030. +{
  52031. + int result;
  52032. +
  52033. + assert("nikita-659", inode != NULL);
  52034. +
  52035. + result = guess_plugin_by_mode(inode);
  52036. + /* if mode was wrong, guess_plugin_by_mode() returns "regular file",
  52037. + but setup_inode_ops() will call make_bad_inode().
  52038. + Another, more logical but bit more complex solution is to add
  52039. + "bad-file plugin". */
  52040. + /* FIXME-VS: activate was called here */
  52041. + return result;
  52042. +}
  52043. +
  52044. +/* helper function for plugin_sd_save_len(): calculate how much space
  52045. + required to save state of given plugin */
  52046. +/* Audited by: green(2002.06.14) */
  52047. +static int len_for(reiser4_plugin * plugin /* plugin to save */ ,
  52048. + struct inode *inode /* object being processed */ ,
  52049. + pset_member memb,
  52050. + int len, int is_pset)
  52051. +{
  52052. + reiser4_inode *info;
  52053. + assert("nikita-661", inode != NULL);
  52054. +
  52055. + if (plugin == NULL)
  52056. + return len;
  52057. +
  52058. + info = reiser4_inode_data(inode);
  52059. + if (is_pset ?
  52060. + info->plugin_mask & (1 << memb) :
  52061. + info->heir_mask & (1 << memb)) {
  52062. + len += sizeof(reiser4_plugin_slot);
  52063. + if (plugin->h.pops && plugin->h.pops->save_len != NULL) {
  52064. + /*
  52065. + * non-standard plugin, call method
  52066. + * commented as it is incompatible with alignment
  52067. + * policy in save_plug() -edward
  52068. + *
  52069. + * len = reiser4_round_up(len,
  52070. + * plugin->h.pops->alignment);
  52071. + */
  52072. + len += plugin->h.pops->save_len(inode, plugin);
  52073. + }
  52074. + }
  52075. + return len;
  52076. +}
  52077. +
  52078. +/* calculate how much space is required to save state of all plugins,
  52079. + associated with inode */
  52080. +static int save_len_plugin_sd(struct inode *inode /* object being processed */,
  52081. + int is_pset)
  52082. +{
  52083. + int len;
  52084. + int last;
  52085. + reiser4_inode *state;
  52086. + pset_member memb;
  52087. +
  52088. + assert("nikita-663", inode != NULL);
  52089. +
  52090. + state = reiser4_inode_data(inode);
  52091. +
  52092. + /* common case: no non-standard plugins */
  52093. + if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0)
  52094. + return 0;
  52095. + len = sizeof(reiser4_plugin_stat);
  52096. + last = PSET_LAST;
  52097. +
  52098. + for (memb = 0; memb < last; ++memb) {
  52099. + len = len_for(aset_get(is_pset ? state->pset : state->hset, memb),
  52100. + inode, memb, len, is_pset);
  52101. + }
  52102. + assert("nikita-664", len > (int)sizeof(reiser4_plugin_stat));
  52103. + return len;
  52104. +}
  52105. +
  52106. +static int save_len_pset_sd(struct inode *inode) {
  52107. + return save_len_plugin_sd(inode, 1 /* pset */);
  52108. +}
  52109. +
  52110. +/* helper function for plugin_sd_save(): save plugin, associated with
  52111. + inode. */
  52112. +static int save_plug(reiser4_plugin * plugin /* plugin to save */ ,
  52113. + struct inode *inode /* object being processed */ ,
  52114. + int memb /* what element of pset is saved */ ,
  52115. + char **area /* position in stat-data */ ,
  52116. + int *count /* incremented if plugin were actually saved. */,
  52117. + int is_pset /* 1 for plugin set, 0 for heir set */)
  52118. +{
  52119. + reiser4_plugin_slot *slot;
  52120. + int fake_len;
  52121. + int result;
  52122. +
  52123. + assert("nikita-665", inode != NULL);
  52124. + assert("nikita-666", area != NULL);
  52125. + assert("nikita-667", *area != NULL);
  52126. +
  52127. + if (plugin == NULL)
  52128. + return 0;
  52129. +
  52130. + if (is_pset ?
  52131. + !(reiser4_inode_data(inode)->plugin_mask & (1 << memb)) :
  52132. + !(reiser4_inode_data(inode)->heir_mask & (1 << memb)))
  52133. + return 0;
  52134. + slot = (reiser4_plugin_slot *) * area;
  52135. + put_unaligned(cpu_to_le16(memb), &slot->pset_memb);
  52136. + put_unaligned(cpu_to_le16(plugin->h.id), &slot->id);
  52137. + fake_len = (int)0xffff;
  52138. + move_on(&fake_len, area, sizeof *slot);
  52139. + ++*count;
  52140. + result = 0;
  52141. + if (plugin->h.pops != NULL) {
  52142. + if (plugin->h.pops->save != NULL)
  52143. + result = plugin->h.pops->save(inode, plugin, area);
  52144. + }
  52145. + return result;
  52146. +}
  52147. +
  52148. +/* save state of all non-standard plugins associated with inode */
  52149. +static int save_plugin_sd(struct inode *inode /* object being processed */ ,
  52150. + char **area /* position in stat-data */,
  52151. + int is_pset /* 1 for pset, 0 for hset */)
  52152. +{
  52153. + int fake_len;
  52154. + int result = 0;
  52155. + int num_of_plugins;
  52156. + reiser4_plugin_stat *sd;
  52157. + reiser4_inode *state;
  52158. + pset_member memb;
  52159. +
  52160. + assert("nikita-669", inode != NULL);
  52161. + assert("nikita-670", area != NULL);
  52162. + assert("nikita-671", *area != NULL);
  52163. +
  52164. + state = reiser4_inode_data(inode);
  52165. + if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0)
  52166. + return 0;
  52167. + sd = (reiser4_plugin_stat *) * area;
  52168. + fake_len = (int)0xffff;
  52169. + move_on(&fake_len, area, sizeof *sd);
  52170. +
  52171. + num_of_plugins = 0;
  52172. + for (memb = 0; memb < PSET_LAST; ++memb) {
  52173. + result = save_plug(aset_get(is_pset ? state->pset : state->hset,
  52174. + memb),
  52175. + inode, memb, area, &num_of_plugins, is_pset);
  52176. + if (result != 0)
  52177. + break;
  52178. + }
  52179. +
  52180. + put_unaligned(cpu_to_le16((__u16)num_of_plugins), &sd->plugins_no);
  52181. + return result;
  52182. +}
  52183. +
  52184. +static int save_pset_sd(struct inode *inode, char **area) {
  52185. + return save_plugin_sd(inode, area, 1 /* pset */);
  52186. +}
  52187. +
  52188. +static int present_hset_sd(struct inode *inode, char **area, int *len) {
  52189. + return present_plugin_sd(inode, area, len, 0 /* hset */);
  52190. +}
  52191. +
  52192. +static int save_len_hset_sd(struct inode *inode) {
  52193. + return save_len_plugin_sd(inode, 0 /* pset */);
  52194. +}
  52195. +
  52196. +static int save_hset_sd(struct inode *inode, char **area) {
  52197. + return save_plugin_sd(inode, area, 0 /* hset */);
  52198. +}
  52199. +
  52200. +/* helper function for crypto_sd_present(), crypto_sd_save.
  52201. + Extract crypto info from stat-data and attach it to inode */
  52202. +static int extract_crypto_info (struct inode * inode,
  52203. + reiser4_crypto_stat * sd)
  52204. +{
  52205. + struct reiser4_crypto_info * info;
  52206. + assert("edward-11", !inode_crypto_info(inode));
  52207. + assert("edward-1413",
  52208. + !reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED));
  52209. + /* create and attach a crypto-stat without secret key loaded */
  52210. + info = reiser4_alloc_crypto_info(inode);
  52211. + if (IS_ERR(info))
  52212. + return PTR_ERR(info);
  52213. + info->keysize = le16_to_cpu(get_unaligned(&sd->keysize));
  52214. + memcpy(info->keyid, sd->keyid, inode_digest_plugin(inode)->fipsize);
  52215. + reiser4_attach_crypto_info(inode, info);
  52216. + reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED);
  52217. + return 0;
  52218. +}
  52219. +
  52220. +/* crypto stat-data extension */
  52221. +
  52222. +static int present_crypto_sd(struct inode *inode, char **area, int *len)
  52223. +{
  52224. + int result;
  52225. + reiser4_crypto_stat *sd;
  52226. + digest_plugin *dplug = inode_digest_plugin(inode);
  52227. +
  52228. + assert("edward-06", dplug != NULL);
  52229. + assert("edward-684", dplug->fipsize);
  52230. + assert("edward-07", area != NULL);
  52231. + assert("edward-08", *area != NULL);
  52232. + assert("edward-09", len != NULL);
  52233. + assert("edward-10", *len > 0);
  52234. +
  52235. + if (*len < (int)sizeof(reiser4_crypto_stat)) {
  52236. + return not_enough_space(inode, "crypto-sd");
  52237. + }
  52238. + /* *len is number of bytes in stat data item from *area to the end of
  52239. + item. It must be not less than size of this extension */
  52240. + assert("edward-75", sizeof(*sd) + dplug->fipsize <= *len);
  52241. +
  52242. + sd = (reiser4_crypto_stat *) * area;
  52243. + result = extract_crypto_info(inode, sd);
  52244. + move_on(len, area, sizeof(*sd) + dplug->fipsize);
  52245. +
  52246. + return result;
  52247. +}
  52248. +
  52249. +static int save_len_crypto_sd(struct inode *inode)
  52250. +{
  52251. + return sizeof(reiser4_crypto_stat) +
  52252. + inode_digest_plugin(inode)->fipsize;
  52253. +}
  52254. +
  52255. +static int save_crypto_sd(struct inode *inode, char **area)
  52256. +{
  52257. + int result = 0;
  52258. + reiser4_crypto_stat *sd;
  52259. + struct reiser4_crypto_info * info = inode_crypto_info(inode);
  52260. + digest_plugin *dplug = inode_digest_plugin(inode);
  52261. +
  52262. + assert("edward-12", dplug != NULL);
  52263. + assert("edward-13", area != NULL);
  52264. + assert("edward-14", *area != NULL);
  52265. + assert("edward-15", info != NULL);
  52266. + assert("edward-1414", info->keyid != NULL);
  52267. + assert("edward-1415", info->keysize != 0);
  52268. + assert("edward-76", reiser4_inode_data(inode) != NULL);
  52269. +
  52270. + if (!reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) {
  52271. + /* file is just created */
  52272. + sd = (reiser4_crypto_stat *) *area;
  52273. + /* copy everything but private key to the disk stat-data */
  52274. + put_unaligned(cpu_to_le16(info->keysize), &sd->keysize);
  52275. + memcpy(sd->keyid, info->keyid, (size_t) dplug->fipsize);
  52276. + reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED);
  52277. + }
  52278. + *area += (sizeof(*sd) + dplug->fipsize);
  52279. + return result;
  52280. +}
  52281. +
  52282. +static int eio(struct inode *inode, char **area, int *len)
  52283. +{
  52284. + return RETERR(-EIO);
  52285. +}
  52286. +
  52287. +sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION] = {
  52288. + [LIGHT_WEIGHT_STAT] = {
  52289. + .h = {
  52290. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52291. + .id = LIGHT_WEIGHT_STAT,
  52292. + .pops = NULL,
  52293. + .label = "light-weight sd",
  52294. + .desc = "sd for light-weight files",
  52295. + .linkage = {NULL,NULL}
  52296. + },
  52297. + .present = present_lw_sd,
  52298. + .absent = NULL,
  52299. + .save_len = save_len_lw_sd,
  52300. + .save = save_lw_sd,
  52301. + .alignment = 8
  52302. + },
  52303. + [UNIX_STAT] = {
  52304. + .h = {
  52305. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52306. + .id = UNIX_STAT,
  52307. + .pops = NULL,
  52308. + .label = "unix-sd",
  52309. + .desc = "unix stat-data fields",
  52310. + .linkage = {NULL,NULL}
  52311. + },
  52312. + .present = present_unix_sd,
  52313. + .absent = absent_unix_sd,
  52314. + .save_len = save_len_unix_sd,
  52315. + .save = save_unix_sd,
  52316. + .alignment = 8
  52317. + },
  52318. + [LARGE_TIMES_STAT] = {
  52319. + .h = {
  52320. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52321. + .id = LARGE_TIMES_STAT,
  52322. + .pops = NULL,
  52323. + .label = "64time-sd",
  52324. + .desc = "nanosecond resolution for times",
  52325. + .linkage = {NULL,NULL}
  52326. + },
  52327. + .present = present_large_times_sd,
  52328. + .absent = NULL,
  52329. + .save_len = save_len_large_times_sd,
  52330. + .save = save_large_times_sd,
  52331. + .alignment = 8
  52332. + },
  52333. + [SYMLINK_STAT] = {
  52334. + /* stat data of symlink has this extension */
  52335. + .h = {
  52336. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52337. + .id = SYMLINK_STAT,
  52338. + .pops = NULL,
  52339. + .label = "symlink-sd",
  52340. + .desc =
  52341. + "stat data is appended with symlink name",
  52342. + .linkage = {NULL,NULL}
  52343. + },
  52344. + .present = present_symlink_sd,
  52345. + .absent = NULL,
  52346. + .save_len = save_len_symlink_sd,
  52347. + .save = save_symlink_sd,
  52348. + .alignment = 8
  52349. + },
  52350. + [PLUGIN_STAT] = {
  52351. + .h = {
  52352. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52353. + .id = PLUGIN_STAT,
  52354. + .pops = NULL,
  52355. + .label = "plugin-sd",
  52356. + .desc = "plugin stat-data fields",
  52357. + .linkage = {NULL,NULL}
  52358. + },
  52359. + .present = present_pset_sd,
  52360. + .absent = absent_plugin_sd,
  52361. + .save_len = save_len_pset_sd,
  52362. + .save = save_pset_sd,
  52363. + .alignment = 8
  52364. + },
  52365. + [HEIR_STAT] = {
  52366. + .h = {
  52367. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52368. + .id = HEIR_STAT,
  52369. + .pops = NULL,
  52370. + .label = "heir-plugin-sd",
  52371. + .desc = "heir plugin stat-data fields",
  52372. + .linkage = {NULL,NULL}
  52373. + },
  52374. + .present = present_hset_sd,
  52375. + .absent = NULL,
  52376. + .save_len = save_len_hset_sd,
  52377. + .save = save_hset_sd,
  52378. + .alignment = 8
  52379. + },
  52380. + [FLAGS_STAT] = {
  52381. + .h = {
  52382. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52383. + .id = FLAGS_STAT,
  52384. + .pops = NULL,
  52385. + .label = "flags-sd",
  52386. + .desc = "inode bit flags",
  52387. + .linkage = {NULL, NULL}
  52388. + },
  52389. + .present = present_flags_sd,
  52390. + .absent = NULL,
  52391. + .save_len = save_len_flags_sd,
  52392. + .save = save_flags_sd,
  52393. + .alignment = 8
  52394. + },
  52395. + [CAPABILITIES_STAT] = {
  52396. + .h = {
  52397. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52398. + .id = CAPABILITIES_STAT,
  52399. + .pops = NULL,
  52400. + .label = "capabilities-sd",
  52401. + .desc = "capabilities",
  52402. + .linkage = {NULL, NULL}
  52403. + },
  52404. + .present = eio,
  52405. + .absent = NULL,
  52406. + .save_len = save_len_flags_sd,
  52407. + .save = save_flags_sd,
  52408. + .alignment = 8
  52409. + },
  52410. + [CRYPTO_STAT] = {
  52411. + .h = {
  52412. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52413. + .id = CRYPTO_STAT,
  52414. + .pops = NULL,
  52415. + .label = "crypto-sd",
  52416. + .desc = "secret key size and id",
  52417. + .linkage = {NULL, NULL}
  52418. + },
  52419. + .present = present_crypto_sd,
  52420. + .absent = NULL,
  52421. + .save_len = save_len_crypto_sd,
  52422. + .save = save_crypto_sd,
  52423. + .alignment = 8
  52424. + }
  52425. +};
  52426. +
  52427. +/* Make Linus happy.
  52428. + Local variables:
  52429. + c-indentation-style: "K&R"
  52430. + mode-name: "LC"
  52431. + c-basic-offset: 8
  52432. + tab-width: 8
  52433. + fill-column: 120
  52434. + End:
  52435. +*/
  52436. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/static_stat.h linux-5.16.14/fs/reiser4/plugin/item/static_stat.h
  52437. --- linux-5.16.14.orig/fs/reiser4/plugin/item/static_stat.h 1970-01-01 01:00:00.000000000 +0100
  52438. +++ linux-5.16.14/fs/reiser4/plugin/item/static_stat.h 2022-03-12 13:26:19.681892799 +0100
  52439. @@ -0,0 +1,224 @@
  52440. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  52441. +
  52442. +/* This describes the static_stat item, used to hold all information needed by the stat() syscall.
  52443. +
  52444. +In the case where each file has not less than the fields needed by the
  52445. +stat() syscall, it is more compact to store those fields in this
  52446. +struct.
  52447. +
  52448. +If this item does not exist, then all stats are dynamically resolved.
  52449. +At the moment, we either resolve all stats dynamically or all of them
  52450. +statically. If you think this is not fully optimal, and the rest of
  52451. +reiser4 is working, then fix it...:-)
  52452. +
  52453. +*/
  52454. +
  52455. +#if !defined( __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ )
  52456. +#define __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__
  52457. +
  52458. +#include "../../forward.h"
  52459. +#include "../../dformat.h"
  52460. +
  52461. +#include <linux/fs.h> /* for struct inode */
  52462. +
  52463. +/* Stat data layout: goals and implementation.
  52464. +
  52465. + We want to be able to have lightweight files which have complete flexibility in what semantic metadata is attached to
  52466. + them, including not having semantic metadata attached to them.
  52467. +
  52468. + There is one problem with doing that, which is that if in fact you have exactly the same metadata for most files you
  52469. + want to store, then it takes more space to store that metadata in a dynamically sized structure than in a statically
  52470. + sized structure because the statically sized structure knows without recording it what the names and lengths of the
  52471. + attributes are.
  52472. +
  52473. + This leads to a natural compromise, which is to special case those files which have simply the standard unix file
  52474. + attributes, and only employ the full dynamic stat data mechanism for those files that differ from the standard unix
  52475. + file in their use of file attributes.
  52476. +
  52477. + Yet this compromise deserves to be compromised a little.
  52478. +
  52479. + We accommodate the case where you have no more than the standard unix file attributes by using an "extension
  52480. + bitmask": each bit in it indicates presence or absence of or particular stat data extension (see sd_ext_bits enum).
  52481. +
  52482. + If the first bit of the extension bitmask bit is 0, we have light-weight file whose attributes are either inherited
  52483. + from parent directory (as uid, gid) or initialised to some sane values.
  52484. +
  52485. + To capitalize on existing code infrastructure, extensions are
  52486. + implemented as plugins of type REISER4_SD_EXT_PLUGIN_TYPE.
  52487. + Each stat-data extension plugin implements four methods:
  52488. +
  52489. + ->present() called by sd_load() when this extension is found in stat-data
  52490. + ->absent() called by sd_load() when this extension is not found in stat-data
  52491. + ->save_len() called by sd_len() to calculate total length of stat-data
  52492. + ->save() called by sd_save() to store extension data into stat-data
  52493. +
  52494. + Implementation is in fs/reiser4/plugin/item/static_stat.c
  52495. +*/
  52496. +
  52497. +/* stat-data extension. Please order this by presumed frequency of use */
  52498. +typedef enum {
  52499. + /* support for light-weight files */
  52500. + LIGHT_WEIGHT_STAT,
  52501. + /* data required to implement unix stat(2) call. Layout is in
  52502. + reiser4_unix_stat. If this is not present, file is light-weight */
  52503. + UNIX_STAT,
  52504. + /* this contains additional set of 32bit [anc]time fields to implement
  52505. + nanosecond resolution. Layout is in reiser4_large_times_stat. Usage
  52506. + if this extension is governed by 32bittimes mount option. */
  52507. + LARGE_TIMES_STAT,
  52508. + /* stat data has link name included */
  52509. + SYMLINK_STAT,
  52510. + /* on-disk slots of non-standard plugins for main plugin table
  52511. + (@reiser4_inode->pset), that is, plugins that cannot be deduced
  52512. + from file mode bits), for example, aggregation, interpolation etc. */
  52513. + PLUGIN_STAT,
  52514. + /* this extension contains persistent inode flags. These flags are
  52515. + single bits: immutable, append, only, etc. Layout is in
  52516. + reiser4_flags_stat. */
  52517. + FLAGS_STAT,
  52518. + /* this extension contains capabilities sets, associated with this
  52519. + file. Layout is in reiser4_capabilities_stat */
  52520. + CAPABILITIES_STAT,
  52521. + /* this extension contains size and public id of the secret key.
  52522. + Layout is in reiser4_crypto_stat */
  52523. + CRYPTO_STAT,
  52524. + /* on-disk slots of non-default plugins for inheritance, which
  52525. + are extracted to special plugin table (@reiser4_inode->hset).
  52526. + By default, children of the object will inherit plugins from
  52527. + its main plugin table (pset). */
  52528. + HEIR_STAT,
  52529. + LAST_SD_EXTENSION,
  52530. + /*
  52531. + * init_inode_static_sd() iterates over extension mask until all
  52532. + * non-zero bits are processed. This means, that neither ->present(),
  52533. + * nor ->absent() methods will be called for stat-data extensions that
  52534. + * go after last present extension. But some basic extensions, we want
  52535. + * either ->absent() or ->present() method to be called, because these
  52536. + * extensions set up something in inode even when they are not
  52537. + * present. This is what LAST_IMPORTANT_SD_EXTENSION is for: for all
  52538. + * extensions before and including LAST_IMPORTANT_SD_EXTENSION either
  52539. + * ->present(), or ->absent() method will be called, independently of
  52540. + * what other extensions are present.
  52541. + */
  52542. + LAST_IMPORTANT_SD_EXTENSION = PLUGIN_STAT
  52543. +} sd_ext_bits;
  52544. +
  52545. +/* minimal stat-data. This allows to support light-weight files. */
  52546. +typedef struct reiser4_stat_data_base {
  52547. + /* 0 */ __le16 extmask;
  52548. + /* 2 */
  52549. +} PACKED reiser4_stat_data_base;
  52550. +
  52551. +typedef struct reiser4_light_weight_stat {
  52552. + /* 0 */ __le16 mode;
  52553. + /* 2 */ __le32 nlink;
  52554. + /* 6 */ __le64 size;
  52555. + /* size in bytes */
  52556. + /* 14 */
  52557. +} PACKED reiser4_light_weight_stat;
  52558. +
  52559. +typedef struct reiser4_unix_stat {
  52560. + /* owner id */
  52561. + /* 0 */ __le32 uid;
  52562. + /* group id */
  52563. + /* 4 */ __le32 gid;
  52564. + /* access time */
  52565. + /* 8 */ __le32 atime;
  52566. + /* modification time */
  52567. + /* 12 */ __le32 mtime;
  52568. + /* change time */
  52569. + /* 16 */ __le32 ctime;
  52570. + union {
  52571. + /* minor:major for device files */
  52572. + /* 20 */ __le64 rdev;
  52573. + /* bytes used by file */
  52574. + /* 20 */ __le64 bytes;
  52575. + } u;
  52576. + /* 28 */
  52577. +} PACKED reiser4_unix_stat;
  52578. +
  52579. +/* symlink stored as part of inode */
  52580. +typedef struct reiser4_symlink_stat {
  52581. + char body[0];
  52582. +} PACKED reiser4_symlink_stat;
  52583. +
  52584. +typedef struct reiser4_plugin_slot {
  52585. + /* 0 */ __le16 pset_memb;
  52586. + /* 2 */ __le16 id;
  52587. + /* 4 *//* here plugin stores its persistent state */
  52588. +} PACKED reiser4_plugin_slot;
  52589. +
  52590. +/* stat-data extension for files with non-standard plugin. */
  52591. +typedef struct reiser4_plugin_stat {
  52592. + /* number of additional plugins, associated with this object */
  52593. + /* 0 */ __le16 plugins_no;
  52594. + /* 2 */ reiser4_plugin_slot slot[0];
  52595. + /* 2 */
  52596. +} PACKED reiser4_plugin_stat;
  52597. +
  52598. +/* stat-data extension for inode flags. Currently it is just fixed-width 32
  52599. + * bit mask. If need arise, this can be replaced with variable width
  52600. + * bitmask. */
  52601. +typedef struct reiser4_flags_stat {
  52602. + /* 0 */ __le32 flags;
  52603. + /* 4 */
  52604. +} PACKED reiser4_flags_stat;
  52605. +
  52606. +typedef struct reiser4_capabilities_stat {
  52607. + /* 0 */ __le32 effective;
  52608. + /* 8 */ __le32 permitted;
  52609. + /* 16 */
  52610. +} PACKED reiser4_capabilities_stat;
  52611. +
  52612. +typedef struct reiser4_cluster_stat {
  52613. +/* this defines cluster size (an attribute of cryptcompress objects) as PAGE_SIZE << cluster shift */
  52614. + /* 0 */ d8 cluster_shift;
  52615. + /* 1 */
  52616. +} PACKED reiser4_cluster_stat;
  52617. +
  52618. +typedef struct reiser4_crypto_stat {
  52619. + /* secret key size, bits */
  52620. + /* 0 */ d16 keysize;
  52621. + /* secret key id */
  52622. + /* 2 */ d8 keyid[0];
  52623. + /* 2 */
  52624. +} PACKED reiser4_crypto_stat;
  52625. +
  52626. +typedef struct reiser4_large_times_stat {
  52627. + /* access time */
  52628. + /* 0 */ d32 atime;
  52629. + /* modification time */
  52630. + /* 4 */ d32 mtime;
  52631. + /* change time */
  52632. + /* 8 */ d32 ctime;
  52633. + /* 12 */
  52634. +} PACKED reiser4_large_times_stat;
  52635. +
  52636. +/* this structure is filled by sd_item_stat */
  52637. +typedef struct sd_stat {
  52638. + int dirs;
  52639. + int files;
  52640. + int others;
  52641. +} sd_stat;
  52642. +
  52643. +/* plugin->item.common.* */
  52644. +extern void print_sd(const char *prefix, coord_t * coord);
  52645. +extern void item_stat_static_sd(const coord_t * coord, void *vp);
  52646. +
  52647. +/* plugin->item.s.sd.* */
  52648. +extern int init_inode_static_sd(struct inode *inode, char *sd, int len);
  52649. +extern int save_len_static_sd(struct inode *inode);
  52650. +extern int save_static_sd(struct inode *inode, char **area);
  52651. +
  52652. +/* __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ */
  52653. +#endif
  52654. +
  52655. +/* Make Linus happy.
  52656. + Local variables:
  52657. + c-indentation-style: "K&R"
  52658. + mode-name: "LC"
  52659. + c-basic-offset: 8
  52660. + tab-width: 8
  52661. + fill-column: 120
  52662. + End:
  52663. +*/
  52664. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/tail.c linux-5.16.14/fs/reiser4/plugin/item/tail.c
  52665. --- linux-5.16.14.orig/fs/reiser4/plugin/item/tail.c 1970-01-01 01:00:00.000000000 +0100
  52666. +++ linux-5.16.14/fs/reiser4/plugin/item/tail.c 2022-03-12 13:26:19.681892799 +0100
  52667. @@ -0,0 +1,809 @@
  52668. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  52669. +
  52670. +#include "item.h"
  52671. +#include "../../inode.h"
  52672. +#include "../../page_cache.h"
  52673. +#include "../../carry.h"
  52674. +#include "../../vfs_ops.h"
  52675. +
  52676. +#include <linux/swap.h>
  52677. +#include <linux/writeback.h>
  52678. +#include <linux/uio.h>
  52679. +
  52680. +/* plugin->u.item.b.max_key_inside */
  52681. +reiser4_key *max_key_inside_tail(const coord_t *coord, reiser4_key *key)
  52682. +{
  52683. + item_key_by_coord(coord, key);
  52684. + set_key_offset(key, get_key_offset(reiser4_max_key()));
  52685. + return key;
  52686. +}
  52687. +
  52688. +/* plugin->u.item.b.can_contain_key */
  52689. +int can_contain_key_tail(const coord_t *coord, const reiser4_key *key,
  52690. + const reiser4_item_data *data)
  52691. +{
  52692. + reiser4_key item_key;
  52693. +
  52694. + if (item_plugin_by_coord(coord) != data->iplug)
  52695. + return 0;
  52696. +
  52697. + item_key_by_coord(coord, &item_key);
  52698. + if (get_key_locality(key) != get_key_locality(&item_key) ||
  52699. + get_key_objectid(key) != get_key_objectid(&item_key))
  52700. + return 0;
  52701. +
  52702. + return 1;
  52703. +}
  52704. +
  52705. +/* plugin->u.item.b.mergeable
  52706. + first item is of tail type */
  52707. +/* Audited by: green(2002.06.14) */
  52708. +int mergeable_tail(const coord_t *p1, const coord_t *p2)
  52709. +{
  52710. + reiser4_key key1, key2;
  52711. +
  52712. + assert("vs-535", plugin_of_group(item_plugin_by_coord(p1),
  52713. + UNIX_FILE_METADATA_ITEM_TYPE));
  52714. + assert("vs-365", item_id_by_coord(p1) == FORMATTING_ID);
  52715. +
  52716. + if (item_id_by_coord(p2) != FORMATTING_ID) {
  52717. + /* second item is of another type */
  52718. + return 0;
  52719. + }
  52720. +
  52721. + item_key_by_coord(p1, &key1);
  52722. + item_key_by_coord(p2, &key2);
  52723. + if (get_key_locality(&key1) != get_key_locality(&key2) ||
  52724. + get_key_objectid(&key1) != get_key_objectid(&key2)
  52725. + || get_key_type(&key1) != get_key_type(&key2)) {
  52726. + /* items of different objects */
  52727. + return 0;
  52728. + }
  52729. + if (get_key_offset(&key1) + nr_units_tail(p1) != get_key_offset(&key2)) {
  52730. + /* not adjacent items */
  52731. + return 0;
  52732. + }
  52733. + return 1;
  52734. +}
  52735. +
  52736. +/* plugin->u.item.b.print
  52737. + plugin->u.item.b.check */
  52738. +
  52739. +/* plugin->u.item.b.nr_units */
  52740. +pos_in_node_t nr_units_tail(const coord_t * coord)
  52741. +{
  52742. + return item_length_by_coord(coord);
  52743. +}
  52744. +
  52745. +/* plugin->u.item.b.lookup */
  52746. +lookup_result
  52747. +lookup_tail(const reiser4_key * key, lookup_bias bias, coord_t * coord)
  52748. +{
  52749. + reiser4_key item_key;
  52750. + __u64 lookuped, offset;
  52751. + unsigned nr_units;
  52752. +
  52753. + item_key_by_coord(coord, &item_key);
  52754. + offset = get_key_offset(item_key_by_coord(coord, &item_key));
  52755. + nr_units = nr_units_tail(coord);
  52756. +
  52757. + /* key we are looking for must be greater than key of item @coord */
  52758. + assert("vs-416", keygt(key, &item_key));
  52759. +
  52760. + /* offset we are looking for */
  52761. + lookuped = get_key_offset(key);
  52762. +
  52763. + if (lookuped >= offset && lookuped < offset + nr_units) {
  52764. + /* byte we are looking for is in this item */
  52765. + coord->unit_pos = lookuped - offset;
  52766. + coord->between = AT_UNIT;
  52767. + return CBK_COORD_FOUND;
  52768. + }
  52769. +
  52770. + /* set coord after last unit */
  52771. + coord->unit_pos = nr_units - 1;
  52772. + coord->between = AFTER_UNIT;
  52773. + return bias ==
  52774. + FIND_MAX_NOT_MORE_THAN ? CBK_COORD_FOUND : CBK_COORD_NOTFOUND;
  52775. +}
  52776. +
  52777. +/* plugin->u.item.b.paste */
  52778. +int
  52779. +paste_tail(coord_t *coord, reiser4_item_data *data,
  52780. + carry_plugin_info *info UNUSED_ARG)
  52781. +{
  52782. + unsigned old_item_length;
  52783. + char *item;
  52784. +
  52785. + /* length the item had before resizing has been performed */
  52786. + old_item_length = item_length_by_coord(coord) - data->length;
  52787. +
  52788. + /* tail items never get pasted in the middle */
  52789. + assert("vs-363",
  52790. + (coord->unit_pos == 0 && coord->between == BEFORE_UNIT) ||
  52791. + (coord->unit_pos == old_item_length - 1 &&
  52792. + coord->between == AFTER_UNIT) ||
  52793. + (coord->unit_pos == 0 && old_item_length == 0
  52794. + && coord->between == AT_UNIT));
  52795. +
  52796. + item = item_body_by_coord(coord);
  52797. + if (coord->unit_pos == 0)
  52798. + /* make space for pasted data when pasting at the beginning of
  52799. + the item */
  52800. + memmove(item + data->length, item, old_item_length);
  52801. +
  52802. + if (coord->between == AFTER_UNIT)
  52803. + coord->unit_pos++;
  52804. +
  52805. + if (data->data) {
  52806. + assert("vs-554", data->user == 0 || data->user == 1);
  52807. + if (data->user) {
  52808. + assert("nikita-3035", reiser4_schedulable());
  52809. + /* copy from user space */
  52810. + if (__copy_from_user(item + coord->unit_pos,
  52811. + (const char __user *)data->data,
  52812. + (unsigned)data->length))
  52813. + return RETERR(-EFAULT);
  52814. + } else
  52815. + /* copy from kernel space */
  52816. + memcpy(item + coord->unit_pos, data->data,
  52817. + (unsigned)data->length);
  52818. + } else {
  52819. + memset(item + coord->unit_pos, 0, (unsigned)data->length);
  52820. + }
  52821. + return 0;
  52822. +}
  52823. +
  52824. +/* plugin->u.item.b.fast_paste */
  52825. +
  52826. +/* plugin->u.item.b.can_shift
  52827. + number of units is returned via return value, number of bytes via @size. For
  52828. + tail items they coincide */
  52829. +int
  52830. +can_shift_tail(unsigned free_space, coord_t * source UNUSED_ARG,
  52831. + znode * target UNUSED_ARG, shift_direction direction UNUSED_ARG,
  52832. + unsigned *size, unsigned want)
  52833. +{
  52834. + /* make sure that that we do not want to shift more than we have */
  52835. + assert("vs-364", want > 0
  52836. + && want <= (unsigned)item_length_by_coord(source));
  52837. +
  52838. + *size = min(want, free_space);
  52839. + return *size;
  52840. +}
  52841. +
  52842. +/* plugin->u.item.b.copy_units */
  52843. +void
  52844. +copy_units_tail(coord_t * target, coord_t * source,
  52845. + unsigned from, unsigned count,
  52846. + shift_direction where_is_free_space,
  52847. + unsigned free_space UNUSED_ARG)
  52848. +{
  52849. + /* make sure that item @target is expanded already */
  52850. + assert("vs-366", (unsigned)item_length_by_coord(target) >= count);
  52851. + assert("vs-370", free_space >= count);
  52852. +
  52853. + if (where_is_free_space == SHIFT_LEFT) {
  52854. + /* append item @target with @count first bytes of @source */
  52855. + assert("vs-365", from == 0);
  52856. +
  52857. + memcpy((char *)item_body_by_coord(target) +
  52858. + item_length_by_coord(target) - count,
  52859. + (char *)item_body_by_coord(source), count);
  52860. + } else {
  52861. + /* target item is moved to right already */
  52862. + reiser4_key key;
  52863. +
  52864. + assert("vs-367",
  52865. + (unsigned)item_length_by_coord(source) == from + count);
  52866. +
  52867. + memcpy((char *)item_body_by_coord(target),
  52868. + (char *)item_body_by_coord(source) + from, count);
  52869. +
  52870. + /* new units are inserted before first unit in an item,
  52871. + therefore, we have to update item key */
  52872. + item_key_by_coord(source, &key);
  52873. + set_key_offset(&key, get_key_offset(&key) + from);
  52874. +
  52875. + node_plugin_by_node(target->node)->update_item_key(target, &key,
  52876. + NULL /*info */);
  52877. + }
  52878. +}
  52879. +
  52880. +/* plugin->u.item.b.create_hook */
  52881. +
  52882. +/* item_plugin->b.kill_hook
  52883. + this is called when @count units starting from @from-th one are going to be removed
  52884. + */
  52885. +int
  52886. +kill_hook_tail(const coord_t * coord, pos_in_node_t from,
  52887. + pos_in_node_t count, struct carry_kill_data *kdata)
  52888. +{
  52889. + reiser4_key key;
  52890. + loff_t start, end;
  52891. +
  52892. + assert("vs-1577", kdata);
  52893. + assert("vs-1579", kdata->inode);
  52894. +
  52895. + item_key_by_coord(coord, &key);
  52896. + start = get_key_offset(&key) + from;
  52897. + end = start + count;
  52898. + fake_kill_hook_tail(kdata->inode, start, end, kdata->params.truncate);
  52899. + return 0;
  52900. +}
  52901. +
  52902. +/* plugin->u.item.b.shift_hook */
  52903. +
  52904. +/* helper for kill_units_tail and cut_units_tail */
  52905. +static int
  52906. +do_cut_or_kill(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  52907. + reiser4_key * smallest_removed, reiser4_key * new_first)
  52908. +{
  52909. + pos_in_node_t count;
  52910. +
  52911. + /* this method is only called to remove part of item */
  52912. + assert("vs-374", (to - from + 1) < item_length_by_coord(coord));
  52913. + /* tails items are never cut from the middle of an item */
  52914. + assert("vs-396", ergo(from != 0, to == coord_last_unit_pos(coord)));
  52915. + assert("vs-1558", ergo(from == 0, to < coord_last_unit_pos(coord)));
  52916. +
  52917. + count = to - from + 1;
  52918. +
  52919. + if (smallest_removed) {
  52920. + /* store smallest key removed */
  52921. + item_key_by_coord(coord, smallest_removed);
  52922. + set_key_offset(smallest_removed,
  52923. + get_key_offset(smallest_removed) + from);
  52924. + }
  52925. + if (new_first) {
  52926. + /* head of item is cut */
  52927. + assert("vs-1529", from == 0);
  52928. +
  52929. + item_key_by_coord(coord, new_first);
  52930. + set_key_offset(new_first,
  52931. + get_key_offset(new_first) + from + count);
  52932. + }
  52933. +
  52934. + if (REISER4_DEBUG)
  52935. + memset((char *)item_body_by_coord(coord) + from, 0, count);
  52936. + return count;
  52937. +}
  52938. +
  52939. +/* plugin->u.item.b.cut_units */
  52940. +int
  52941. +cut_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  52942. + struct carry_cut_data *cdata UNUSED_ARG,
  52943. + reiser4_key * smallest_removed, reiser4_key * new_first)
  52944. +{
  52945. + return do_cut_or_kill(coord, from, to, smallest_removed, new_first);
  52946. +}
  52947. +
  52948. +/* plugin->u.item.b.kill_units */
  52949. +int
  52950. +kill_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  52951. + struct carry_kill_data *kdata, reiser4_key * smallest_removed,
  52952. + reiser4_key * new_first)
  52953. +{
  52954. + kill_hook_tail(coord, from, to - from + 1, kdata);
  52955. + return do_cut_or_kill(coord, from, to, smallest_removed, new_first);
  52956. +}
  52957. +
  52958. +/* plugin->u.item.b.unit_key */
  52959. +reiser4_key *unit_key_tail(const coord_t * coord, reiser4_key * key)
  52960. +{
  52961. + assert("vs-375", coord_is_existing_unit(coord));
  52962. +
  52963. + item_key_by_coord(coord, key);
  52964. + set_key_offset(key, (get_key_offset(key) + coord->unit_pos));
  52965. +
  52966. + return key;
  52967. +}
  52968. +
  52969. +/* plugin->u.item.b.estimate
  52970. + plugin->u.item.b.item_data_by_flow */
  52971. +
  52972. +/* tail redpage function. It is called from readpage_tail(). */
  52973. +static int do_readpage_tail(uf_coord_t *uf_coord, struct page *page)
  52974. +{
  52975. + tap_t tap;
  52976. + int result;
  52977. + coord_t coord;
  52978. + lock_handle lh;
  52979. + int count, mapped;
  52980. + struct inode *inode;
  52981. + char *pagedata;
  52982. +
  52983. + /* saving passed coord in order to do not move it by tap. */
  52984. + init_lh(&lh);
  52985. + copy_lh(&lh, uf_coord->lh);
  52986. + inode = page->mapping->host;
  52987. + coord_dup(&coord, &uf_coord->coord);
  52988. +
  52989. + reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
  52990. +
  52991. + if ((result = reiser4_tap_load(&tap)))
  52992. + goto out_tap_done;
  52993. +
  52994. + /* lookup until page is filled up. */
  52995. + for (mapped = 0; mapped < PAGE_SIZE; ) {
  52996. + /* number of bytes to be copied to page */
  52997. + count = item_length_by_coord(&coord) - coord.unit_pos;
  52998. + if (count > PAGE_SIZE - mapped)
  52999. + count = PAGE_SIZE - mapped;
  53000. +
  53001. + /* attach @page to address space and get data address */
  53002. + pagedata = kmap_atomic(page);
  53003. +
  53004. + /* copy tail item to page */
  53005. + memcpy(pagedata + mapped,
  53006. + ((char *)item_body_by_coord(&coord) + coord.unit_pos),
  53007. + count);
  53008. + mapped += count;
  53009. +
  53010. + flush_dcache_page(page);
  53011. +
  53012. + /* dettach page from address space */
  53013. + kunmap_atomic(pagedata);
  53014. +
  53015. + /* Getting next tail item. */
  53016. + if (mapped < PAGE_SIZE) {
  53017. + /*
  53018. + * unlock page in order to avoid keep it locked
  53019. + * during tree lookup, which takes long term locks
  53020. + */
  53021. + unlock_page(page);
  53022. +
  53023. + /* getting right neighbour. */
  53024. + result = go_dir_el(&tap, RIGHT_SIDE, 0);
  53025. +
  53026. + /* lock page back */
  53027. + lock_page(page);
  53028. + if (PageUptodate(page)) {
  53029. + /*
  53030. + * another thread read the page, we have
  53031. + * nothing to do
  53032. + */
  53033. + result = 0;
  53034. + goto out_unlock_page;
  53035. + }
  53036. +
  53037. + if (result) {
  53038. + if (result == -E_NO_NEIGHBOR) {
  53039. + /*
  53040. + * rigth neighbor is not a formatted
  53041. + * node
  53042. + */
  53043. + result = 0;
  53044. + goto done;
  53045. + } else {
  53046. + goto out_tap_relse;
  53047. + }
  53048. + } else {
  53049. + if (!inode_file_plugin(inode)->
  53050. + owns_item(inode, &coord)) {
  53051. + /* item of another file is found */
  53052. + result = 0;
  53053. + goto done;
  53054. + }
  53055. + }
  53056. + }
  53057. + }
  53058. +
  53059. + done:
  53060. + if (mapped != PAGE_SIZE)
  53061. + zero_user_segment(page, mapped, PAGE_SIZE);
  53062. + SetPageUptodate(page);
  53063. + out_unlock_page:
  53064. + unlock_page(page);
  53065. + out_tap_relse:
  53066. + reiser4_tap_relse(&tap);
  53067. + out_tap_done:
  53068. + reiser4_tap_done(&tap);
  53069. + return result;
  53070. +}
  53071. +
  53072. +/*
  53073. + * plugin->s.file.readpage
  53074. + *
  53075. + * reiser4_read_dispatch->read_unix_file->page_cache_readahead->
  53076. + * ->reiser4_readpage_dispatch->readpage_unix_file->readpage_tail
  53077. + * or
  53078. + * filemap_fault->reiser4_readpage_dispatch->readpage_unix_file->readpage_tail
  53079. + *
  53080. + * At the beginning: coord->node is read locked, zloaded, page is locked,
  53081. + * coord is set to existing unit inside of tail item.
  53082. + */
  53083. +int readpage_tail(void *vp, struct page *page)
  53084. +{
  53085. + uf_coord_t *uf_coord = vp;
  53086. + ON_DEBUG(coord_t * coord = &uf_coord->coord);
  53087. + ON_DEBUG(reiser4_key key);
  53088. +
  53089. + assert("umka-2515", PageLocked(page));
  53090. + assert("umka-2516", !PageUptodate(page));
  53091. + assert("umka-2517", !jprivate(page) && !PagePrivate(page));
  53092. + assert("umka-2518", page->mapping && page->mapping->host);
  53093. +
  53094. + assert("umka-2519", znode_is_loaded(coord->node));
  53095. + assert("umka-2520", item_is_tail(coord));
  53096. + assert("umka-2521", coord_is_existing_unit(coord));
  53097. + assert("umka-2522", znode_is_rlocked(coord->node));
  53098. + assert("umka-2523",
  53099. + page->mapping->host->i_ino ==
  53100. + get_key_objectid(item_key_by_coord(coord, &key)));
  53101. +
  53102. + return do_readpage_tail(uf_coord, page);
  53103. +}
  53104. +
  53105. +/**
  53106. + * overwrite_tail
  53107. + * @flow:
  53108. + * @coord:
  53109. + *
  53110. + * Overwrites tail item or its part by user data. Returns number of bytes
  53111. + * written or error code.
  53112. + */
  53113. +static int overwrite_tail(flow_t *flow, coord_t *coord)
  53114. +{
  53115. + unsigned count;
  53116. +
  53117. + assert("vs-570", flow->user == 1);
  53118. + assert("vs-946", flow->data);
  53119. + assert("vs-947", coord_is_existing_unit(coord));
  53120. + assert("vs-948", znode_is_write_locked(coord->node));
  53121. + assert("nikita-3036", reiser4_schedulable());
  53122. +
  53123. + count = item_length_by_coord(coord) - coord->unit_pos;
  53124. + if (count > flow->length)
  53125. + count = flow->length;
  53126. +
  53127. + if (__copy_from_user((char *)item_body_by_coord(coord) + coord->unit_pos,
  53128. + (const char __user *)flow->data, count))
  53129. + return RETERR(-EFAULT);
  53130. +
  53131. + znode_make_dirty(coord->node);
  53132. + return count;
  53133. +}
  53134. +
  53135. +/**
  53136. + * insert_first_tail
  53137. + * @inode:
  53138. + * @flow:
  53139. + * @coord:
  53140. + * @lh:
  53141. + *
  53142. + * Returns number of bytes written or error code.
  53143. + */
  53144. +static ssize_t insert_first_tail(struct inode *inode, flow_t *flow,
  53145. + coord_t *coord, lock_handle *lh)
  53146. +{
  53147. + int result;
  53148. + loff_t to_write;
  53149. + struct unix_file_info *uf_info;
  53150. +
  53151. + if (get_key_offset(&flow->key) != 0) {
  53152. + /*
  53153. + * file is empty and we have to write not to the beginning of
  53154. + * file. Create a hole at the beginning of file. On success
  53155. + * insert_flow returns 0 as number of written bytes which is
  53156. + * what we have to return on padding a file with holes
  53157. + */
  53158. + flow->data = NULL;
  53159. + flow->length = get_key_offset(&flow->key);
  53160. + set_key_offset(&flow->key, 0);
  53161. + /*
  53162. + * holes in files built of tails are stored just like if there
  53163. + * were real data which are all zeros.
  53164. + */
  53165. + inode_add_bytes(inode, flow->length);
  53166. + result = reiser4_insert_flow(coord, lh, flow);
  53167. + if (flow->length)
  53168. + inode_sub_bytes(inode, flow->length);
  53169. +
  53170. + uf_info = unix_file_inode_data(inode);
  53171. +
  53172. + /*
  53173. + * first item insertion is only possible when writing to empty
  53174. + * file or performing tail conversion
  53175. + */
  53176. + assert("", (uf_info->container == UF_CONTAINER_EMPTY ||
  53177. + (reiser4_inode_get_flag(inode,
  53178. + REISER4_PART_MIXED) &&
  53179. + reiser4_inode_get_flag(inode,
  53180. + REISER4_PART_IN_CONV))));
  53181. + /* if file was empty - update its state */
  53182. + if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY)
  53183. + uf_info->container = UF_CONTAINER_TAILS;
  53184. + return result;
  53185. + }
  53186. +
  53187. + inode_add_bytes(inode, flow->length);
  53188. +
  53189. + to_write = flow->length;
  53190. + result = reiser4_insert_flow(coord, lh, flow);
  53191. + if (flow->length)
  53192. + inode_sub_bytes(inode, flow->length);
  53193. + return (to_write - flow->length) ? (to_write - flow->length) : result;
  53194. +}
  53195. +
  53196. +/**
  53197. + * append_tail
  53198. + * @inode:
  53199. + * @flow:
  53200. + * @coord:
  53201. + * @lh:
  53202. + *
  53203. + * Returns number of bytes written or error code.
  53204. + */
  53205. +static ssize_t append_tail(struct inode *inode,
  53206. + flow_t *flow, coord_t *coord, lock_handle *lh)
  53207. +{
  53208. + int result;
  53209. + reiser4_key append_key;
  53210. + loff_t to_write;
  53211. +
  53212. + if (!keyeq(&flow->key, append_key_tail(coord, &append_key))) {
  53213. + flow->data = NULL;
  53214. + flow->length = get_key_offset(&flow->key) - get_key_offset(&append_key);
  53215. + set_key_offset(&flow->key, get_key_offset(&append_key));
  53216. + /*
  53217. + * holes in files built of tails are stored just like if there
  53218. + * were real data which are all zeros.
  53219. + */
  53220. + inode_add_bytes(inode, flow->length);
  53221. + result = reiser4_insert_flow(coord, lh, flow);
  53222. + if (flow->length)
  53223. + inode_sub_bytes(inode, flow->length);
  53224. + return result;
  53225. + }
  53226. +
  53227. + inode_add_bytes(inode, flow->length);
  53228. +
  53229. + to_write = flow->length;
  53230. + result = reiser4_insert_flow(coord, lh, flow);
  53231. + if (flow->length)
  53232. + inode_sub_bytes(inode, flow->length);
  53233. + return (to_write - flow->length) ? (to_write - flow->length) : result;
  53234. +}
  53235. +
  53236. +/**
  53237. + * write_tail_reserve_space - reserve space for tail write operation
  53238. + * @inode:
  53239. + *
  53240. + * Estimates and reserves space which may be required for writing one flow to a
  53241. + * file
  53242. + */
  53243. +static int write_extent_reserve_space(struct inode *inode)
  53244. +{
  53245. + __u64 count;
  53246. + reiser4_tree *tree;
  53247. +
  53248. + /*
  53249. + * to write one flow to a file by tails we have to reserve disk space for:
  53250. +
  53251. + * 1. find_file_item may have to insert empty node to the tree (empty
  53252. + * leaf node between two extent items). This requires 1 block and
  53253. + * number of blocks which are necessary to perform insertion of an
  53254. + * internal item into twig level.
  53255. + *
  53256. + * 2. flow insertion
  53257. + *
  53258. + * 3. stat data update
  53259. + */
  53260. + tree = reiser4_tree_by_inode(inode);
  53261. + count = estimate_one_insert_item(tree) +
  53262. + estimate_insert_flow(tree->height) +
  53263. + estimate_one_insert_item(tree);
  53264. + grab_space_enable();
  53265. + return reiser4_grab_space(count, 0 /* flags */);
  53266. +}
  53267. +
  53268. +#define PAGE_PER_FLOW 4
  53269. +
  53270. +static loff_t faultin_user_pages(const char __user *buf, size_t count)
  53271. +{
  53272. + loff_t faulted;
  53273. + int to_fault;
  53274. +
  53275. + if (count > PAGE_PER_FLOW * PAGE_SIZE)
  53276. + count = PAGE_PER_FLOW * PAGE_SIZE;
  53277. + faulted = 0;
  53278. + while (count > 0) {
  53279. + to_fault = PAGE_SIZE;
  53280. + if (count < to_fault)
  53281. + to_fault = count;
  53282. + fault_in_readable(buf + faulted, to_fault);
  53283. + count -= to_fault;
  53284. + faulted += to_fault;
  53285. + }
  53286. + return faulted;
  53287. +}
  53288. +
  53289. +ssize_t reiser4_write_tail_noreserve(struct file *file,
  53290. + struct inode * inode,
  53291. + const char __user *buf,
  53292. + size_t count, loff_t *pos)
  53293. +{
  53294. + struct hint hint;
  53295. + int result;
  53296. + flow_t flow;
  53297. + coord_t *coord;
  53298. + lock_handle *lh;
  53299. + znode *loaded;
  53300. +
  53301. + assert("edward-1548", inode != NULL);
  53302. +
  53303. + result = load_file_hint(file, &hint);
  53304. + BUG_ON(result != 0);
  53305. +
  53306. + flow.length = faultin_user_pages(buf, count);
  53307. + flow.user = 1;
  53308. + memcpy(&flow.data, &buf, sizeof(buf));
  53309. + flow.op = WRITE_OP;
  53310. + key_by_inode_and_offset_common(inode, *pos, &flow.key);
  53311. +
  53312. + result = find_file_item(&hint, &flow.key, ZNODE_WRITE_LOCK, inode);
  53313. + if (IS_CBKERR(result))
  53314. + return result;
  53315. +
  53316. + coord = &hint.ext_coord.coord;
  53317. + lh = hint.ext_coord.lh;
  53318. +
  53319. + result = zload(coord->node);
  53320. + BUG_ON(result != 0);
  53321. + loaded = coord->node;
  53322. +
  53323. + if (coord->between == AFTER_UNIT) {
  53324. + /* append with data or hole */
  53325. + result = append_tail(inode, &flow, coord, lh);
  53326. + } else if (coord->between == AT_UNIT) {
  53327. + /* overwrite */
  53328. + result = overwrite_tail(&flow, coord);
  53329. + } else {
  53330. + /* no items of this file yet. insert data or hole */
  53331. + result = insert_first_tail(inode, &flow, coord, lh);
  53332. + }
  53333. + zrelse(loaded);
  53334. + if (result < 0) {
  53335. + done_lh(lh);
  53336. + return result;
  53337. + }
  53338. +
  53339. + /* seal and unlock znode */
  53340. + hint.ext_coord.valid = 0;
  53341. + if (hint.ext_coord.valid)
  53342. + reiser4_set_hint(&hint, &flow.key, ZNODE_WRITE_LOCK);
  53343. + else
  53344. + reiser4_unset_hint(&hint);
  53345. +
  53346. + save_file_hint(file, &hint);
  53347. + return result;
  53348. +}
  53349. +
  53350. +/**
  53351. + * reiser4_write_tail - write method of tail item plugin
  53352. + * @file: file to write to
  53353. + * @buf: address of user-space buffer
  53354. + * @count: number of bytes to write
  53355. + * @pos: position in file to write to
  53356. + *
  53357. + * Returns number of written bytes or error code.
  53358. + */
  53359. +ssize_t reiser4_write_tail(struct file *file,
  53360. + struct inode * inode,
  53361. + const char __user *buf,
  53362. + size_t count, loff_t *pos)
  53363. +{
  53364. + if (write_extent_reserve_space(inode))
  53365. + return RETERR(-ENOSPC);
  53366. + return reiser4_write_tail_noreserve(file, inode, buf, count, pos);
  53367. +}
  53368. +
  53369. +#if REISER4_DEBUG
  53370. +
  53371. +static int
  53372. +coord_matches_key_tail(const coord_t * coord, const reiser4_key * key)
  53373. +{
  53374. + reiser4_key item_key;
  53375. +
  53376. + assert("vs-1356", coord_is_existing_unit(coord));
  53377. + assert("vs-1354", keylt(key, append_key_tail(coord, &item_key)));
  53378. + assert("vs-1355", keyge(key, item_key_by_coord(coord, &item_key)));
  53379. + return get_key_offset(key) ==
  53380. + get_key_offset(&item_key) + coord->unit_pos;
  53381. +
  53382. +}
  53383. +
  53384. +#endif
  53385. +
  53386. +/**
  53387. + * plugin->u.item.s.file.read
  53388. + */
  53389. +int reiser4_read_tail(flow_t *f, hint_t *hint,
  53390. + struct kiocb *iocb, struct iov_iter *iter)
  53391. +{
  53392. + size_t from_item, copied;
  53393. + uf_coord_t *uf_coord;
  53394. + int item_length;
  53395. + coord_t *coord;
  53396. +
  53397. + uf_coord = &hint->ext_coord;
  53398. + coord = &uf_coord->coord;
  53399. +
  53400. + assert("vs-571", f->user == 1);
  53401. + assert("vs-967", coord && coord->node);
  53402. + assert("vs-1117", znode_is_rlocked(coord->node));
  53403. + assert("vs-1118", znode_is_loaded(coord->node));
  53404. +
  53405. + assert("nikita-3037", reiser4_schedulable());
  53406. + assert("vs-1357", coord_matches_key_tail(coord, &f->key));
  53407. +
  53408. + /* how many bytes can we copy out from this item */
  53409. + item_length = item_length_by_coord(coord);
  53410. + from_item = item_length_by_coord(coord) - coord->unit_pos;
  53411. + if (from_item > iov_iter_count(iter))
  53412. + from_item = iov_iter_count(iter);
  53413. +
  53414. + copied = copy_to_iter((char *)item_body_by_coord(coord) +
  53415. + coord->unit_pos, from_item, iter);
  53416. + iocb->ki_pos += copied;
  53417. +
  53418. + /* probably mark_page_accessed() should only be called if
  53419. + * coord->unit_pos is zero. */
  53420. + mark_page_accessed(znode_page(coord->node));
  53421. + move_flow_forward(f, copied);
  53422. +
  53423. + coord->unit_pos += copied;
  53424. + if (item_length == coord->unit_pos) {
  53425. + coord->unit_pos--;
  53426. + coord->between = AFTER_UNIT;
  53427. + }
  53428. + reiser4_set_hint(hint, &f->key, ZNODE_READ_LOCK);
  53429. + return 0;
  53430. +}
  53431. +
  53432. +/*
  53433. + plugin->u.item.s.file.append_key
  53434. + key of first byte which is the next to last byte by addressed by this item
  53435. +*/
  53436. +reiser4_key *append_key_tail(const coord_t * coord, reiser4_key * key)
  53437. +{
  53438. + item_key_by_coord(coord, key);
  53439. + set_key_offset(key, get_key_offset(key) + item_length_by_coord(coord));
  53440. + return key;
  53441. +}
  53442. +
  53443. +/* plugin->u.item.s.file.init_coord_extension */
  53444. +void init_coord_extension_tail(uf_coord_t * uf_coord, loff_t lookuped)
  53445. +{
  53446. + uf_coord->valid = 1;
  53447. +}
  53448. +
  53449. +/*
  53450. + plugin->u.item.s.file.get_block
  53451. +*/
  53452. +int
  53453. +get_block_address_tail(const coord_t * coord, sector_t lblock, sector_t * block)
  53454. +{
  53455. + assert("nikita-3252", znode_get_level(coord->node) == LEAF_LEVEL);
  53456. +
  53457. + if (reiser4_blocknr_is_fake(znode_get_block(coord->node)))
  53458. + /* if node has'nt obtainet its block number yet, return 0.
  53459. + * Lets avoid upsetting users with some cosmic numbers beyond
  53460. + * the device capacity.*/
  53461. + *block = 0;
  53462. + else
  53463. + *block = *znode_get_block(coord->node);
  53464. + return 0;
  53465. +}
  53466. +
  53467. +/*
  53468. + * Local variables:
  53469. + * c-indentation-style: "K&R"
  53470. + * mode-name: "LC"
  53471. + * c-basic-offset: 8
  53472. + * tab-width: 8
  53473. + * fill-column: 79
  53474. + * scroll-step: 1
  53475. + * End:
  53476. + */
  53477. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/item/tail.h linux-5.16.14/fs/reiser4/plugin/item/tail.h
  53478. --- linux-5.16.14.orig/fs/reiser4/plugin/item/tail.h 1970-01-01 01:00:00.000000000 +0100
  53479. +++ linux-5.16.14/fs/reiser4/plugin/item/tail.h 2022-03-12 13:26:19.681892799 +0100
  53480. @@ -0,0 +1,59 @@
  53481. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  53482. +
  53483. +#if !defined( __REISER4_TAIL_H__ )
  53484. +#define __REISER4_TAIL_H__
  53485. +
  53486. +struct tail_coord_extension {
  53487. + int not_used;
  53488. +};
  53489. +
  53490. +struct cut_list;
  53491. +
  53492. +/* plugin->u.item.b.* */
  53493. +reiser4_key *max_key_inside_tail(const coord_t *, reiser4_key *);
  53494. +int can_contain_key_tail(const coord_t * coord, const reiser4_key * key,
  53495. + const reiser4_item_data *);
  53496. +int mergeable_tail(const coord_t * p1, const coord_t * p2);
  53497. +pos_in_node_t nr_units_tail(const coord_t *);
  53498. +lookup_result lookup_tail(const reiser4_key *, lookup_bias, coord_t *);
  53499. +int paste_tail(coord_t *, reiser4_item_data *, carry_plugin_info *);
  53500. +int can_shift_tail(unsigned free_space, coord_t * source,
  53501. + znode * target, shift_direction, unsigned *size,
  53502. + unsigned want);
  53503. +void copy_units_tail(coord_t * target, coord_t * source, unsigned from,
  53504. + unsigned count, shift_direction, unsigned free_space);
  53505. +int kill_hook_tail(const coord_t *, pos_in_node_t from, pos_in_node_t count,
  53506. + struct carry_kill_data *);
  53507. +int cut_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to,
  53508. + struct carry_cut_data *, reiser4_key * smallest_removed,
  53509. + reiser4_key * new_first);
  53510. +int kill_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to,
  53511. + struct carry_kill_data *, reiser4_key * smallest_removed,
  53512. + reiser4_key * new_first);
  53513. +reiser4_key *unit_key_tail(const coord_t *, reiser4_key *);
  53514. +
  53515. +/* plugin->u.item.s.* */
  53516. +ssize_t reiser4_write_tail_noreserve(struct file *file, struct inode * inode,
  53517. + const char __user *buf, size_t count,
  53518. + loff_t *pos);
  53519. +ssize_t reiser4_write_tail(struct file *file, struct inode * inode,
  53520. + const char __user *buf, size_t count, loff_t *pos);
  53521. +int reiser4_read_tail(flow_t *, hint_t *, struct kiocb *, struct iov_iter *);
  53522. +int readpage_tail(void *vp, struct page *page);
  53523. +reiser4_key *append_key_tail(const coord_t *, reiser4_key *);
  53524. +void init_coord_extension_tail(uf_coord_t *, loff_t offset);
  53525. +int get_block_address_tail(const coord_t *, sector_t, sector_t *);
  53526. +
  53527. +/* __REISER4_TAIL_H__ */
  53528. +#endif
  53529. +
  53530. +/* Make Linus happy.
  53531. + Local variables:
  53532. + c-indentation-style: "K&R"
  53533. + mode-name: "LC"
  53534. + c-basic-offset: 8
  53535. + tab-width: 8
  53536. + fill-column: 120
  53537. + scroll-step: 1
  53538. + End:
  53539. +*/
  53540. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/Makefile linux-5.16.14/fs/reiser4/plugin/Makefile
  53541. --- linux-5.16.14.orig/fs/reiser4/plugin/Makefile 1970-01-01 01:00:00.000000000 +0100
  53542. +++ linux-5.16.14/fs/reiser4/plugin/Makefile 2022-03-12 13:26:19.662892753 +0100
  53543. @@ -0,0 +1,29 @@
  53544. +
  53545. +MODULE := plugins
  53546. +
  53547. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  53548. +
  53549. +$(MODULE)-objs += \
  53550. + plugin.o \
  53551. + plugin_set.o \
  53552. + object.o \
  53553. + inode_ops.o \
  53554. + inode_ops_rename.o \
  53555. + file_ops.o \
  53556. + file_ops_readdir.o \
  53557. + file_plugin_common.o \
  53558. + dir_plugin_common.o \
  53559. + digest.o \
  53560. + hash.o \
  53561. + fibration.o \
  53562. + tail_policy.o \
  53563. + regular.o
  53564. +
  53565. +obj-$(CONFIG_REISER4_FS) += item/
  53566. +obj-$(CONFIG_REISER4_FS) += file/
  53567. +obj-$(CONFIG_REISER4_FS) += dir/
  53568. +obj-$(CONFIG_REISER4_FS) += node/
  53569. +obj-$(CONFIG_REISER4_FS) += compress/
  53570. +obj-$(CONFIG_REISER4_FS) += space/
  53571. +obj-$(CONFIG_REISER4_FS) += disk_format/
  53572. +obj-$(CONFIG_REISER4_FS) += security/
  53573. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/node/Makefile linux-5.16.14/fs/reiser4/plugin/node/Makefile
  53574. --- linux-5.16.14.orig/fs/reiser4/plugin/node/Makefile 1970-01-01 01:00:00.000000000 +0100
  53575. +++ linux-5.16.14/fs/reiser4/plugin/node/Makefile 2022-03-12 13:26:19.681892799 +0100
  53576. @@ -0,0 +1,9 @@
  53577. +
  53578. +MODULE := node_plugins
  53579. +
  53580. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  53581. +
  53582. +$(MODULE)-objs += \
  53583. + node.o \
  53584. + node40.o \
  53585. + node41.o
  53586. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/node/node40.c linux-5.16.14/fs/reiser4/plugin/node/node40.c
  53587. --- linux-5.16.14.orig/fs/reiser4/plugin/node/node40.c 1970-01-01 01:00:00.000000000 +0100
  53588. +++ linux-5.16.14/fs/reiser4/plugin/node/node40.c 2022-03-12 13:26:19.683892804 +0100
  53589. @@ -0,0 +1,3073 @@
  53590. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  53591. +
  53592. +#include "../../debug.h"
  53593. +#include "../../key.h"
  53594. +#include "../../coord.h"
  53595. +#include "../plugin_header.h"
  53596. +#include "../item/item.h"
  53597. +#include "node.h"
  53598. +#include "node40.h"
  53599. +#include "../plugin.h"
  53600. +#include "../../jnode.h"
  53601. +#include "../../znode.h"
  53602. +#include "../../pool.h"
  53603. +#include "../../carry.h"
  53604. +#include "../../tap.h"
  53605. +#include "../../tree.h"
  53606. +#include "../../super.h"
  53607. +#include "../../reiser4.h"
  53608. +
  53609. +#include <asm/uaccess.h>
  53610. +#include <linux/types.h>
  53611. +#include <linux/prefetch.h>
  53612. +
  53613. +/* leaf 40 format:
  53614. +
  53615. + [node header | item 0, item 1, .., item N-1 | free space | item_head N-1, .. item_head 1, item head 0 ]
  53616. + plugin_id (16) key
  53617. + free_space (16) pluginid (16)
  53618. + free_space_start (16) offset (16)
  53619. + level (8)
  53620. + num_items (16)
  53621. + magic (32)
  53622. + flush_time (32)
  53623. +*/
  53624. +/* NIKITA-FIXME-HANS: I told you guys not less than 10 times to not call it r4fs. Change to "ReIs". */
  53625. +/* magic number that is stored in ->magic field of node header */
  53626. +static const __u32 REISER4_NODE40_MAGIC = 0x52344653; /* (*(__u32 *)"R4FS"); */
  53627. +
  53628. +static int prepare_for_update(znode * left, znode * right,
  53629. + carry_plugin_info * info);
  53630. +
  53631. +/* header of node of reiser40 format is at the beginning of node */
  53632. +static inline node40_header *node40_node_header(const znode * node /* node to
  53633. + * query */ )
  53634. +{
  53635. + assert("nikita-567", node != NULL);
  53636. + assert("nikita-568", znode_page(node) != NULL);
  53637. + assert("nikita-569", zdata(node) != NULL);
  53638. + return (node40_header *) zdata(node);
  53639. +}
  53640. +
  53641. +/* functions to get/set fields of node40_header */
  53642. +#define nh40_get_magic(nh) le32_to_cpu(get_unaligned(&(nh)->magic))
  53643. +#define nh40_get_free_space(nh) le16_to_cpu(get_unaligned(&(nh)->free_space))
  53644. +#define nh40_get_free_space_start(nh) le16_to_cpu(get_unaligned(&(nh)->free_space_start))
  53645. +#define nh40_get_level(nh) get_unaligned(&(nh)->level)
  53646. +#define nh40_get_num_items(nh) le16_to_cpu(get_unaligned(&(nh)->nr_items))
  53647. +#define nh40_get_flush_id(nh) le64_to_cpu(get_unaligned(&(nh)->flush_id))
  53648. +
  53649. +#define nh40_set_magic(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->magic)
  53650. +#define nh40_set_free_space(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space)
  53651. +#define nh40_set_free_space_start(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space_start)
  53652. +#define nh40_set_level(nh, value) put_unaligned(value, &(nh)->level)
  53653. +#define nh40_set_num_items(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->nr_items)
  53654. +#define nh40_set_mkfs_id(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->mkfs_id)
  53655. +
  53656. +/* plugin field of node header should be read/set by
  53657. + plugin_by_disk_id/save_disk_plugin */
  53658. +
  53659. +/* array of item headers is at the end of node */
  53660. +static inline item_header40 *node40_ih_at(const znode * node, unsigned pos)
  53661. +{
  53662. + return (item_header40 *) (zdata(node) + znode_size(node)) - pos - 1;
  53663. +}
  53664. +
  53665. +/* ( page_address( node -> pg ) + PAGE_CACHE_SIZE ) - pos - 1
  53666. + */
  53667. +static inline item_header40 *node40_ih_at_coord(const coord_t * coord)
  53668. +{
  53669. + return (item_header40 *) (zdata(coord->node) +
  53670. + znode_size(coord->node)) - (coord->item_pos) -
  53671. + 1;
  53672. +}
  53673. +
  53674. +/* functions to get/set fields of item_header40 */
  53675. +#define ih40_get_offset(ih) le16_to_cpu(get_unaligned(&(ih)->offset))
  53676. +
  53677. +#define ih40_set_offset(ih, value) put_unaligned(cpu_to_le16(value), &(ih)->offset)
  53678. +
  53679. +/* plugin field of item header should be read/set by
  53680. + plugin_by_disk_id/save_disk_plugin */
  53681. +
  53682. +/* plugin methods */
  53683. +
  53684. +/* plugin->u.node.item_overhead
  53685. + look for description of this method in plugin/node/node.h */
  53686. +size_t
  53687. +item_overhead_node40(const znode * node UNUSED_ARG, flow_t * f UNUSED_ARG)
  53688. +{
  53689. + return sizeof(item_header40);
  53690. +}
  53691. +
  53692. +/* plugin->u.node.free_space
  53693. + look for description of this method in plugin/node/node.h */
  53694. +size_t free_space_node40(znode * node)
  53695. +{
  53696. + assert("nikita-577", node != NULL);
  53697. + assert("nikita-578", znode_is_loaded(node));
  53698. + assert("nikita-579", zdata(node) != NULL);
  53699. +
  53700. + return nh40_get_free_space(node40_node_header(node));
  53701. +}
  53702. +
  53703. +/* private inline version of node40_num_of_items() for use in this file. This
  53704. + is necessary, because address of node40_num_of_items() is taken and it is
  53705. + never inlined as a result. */
  53706. +static inline short node40_num_of_items_internal(const znode * node)
  53707. +{
  53708. + return nh40_get_num_items(node40_node_header(node));
  53709. +}
  53710. +
  53711. +#if REISER4_DEBUG
  53712. +static inline void check_num_items(const znode * node)
  53713. +{
  53714. + assert("nikita-2749",
  53715. + node40_num_of_items_internal(node) == node->nr_items);
  53716. + assert("nikita-2746", znode_is_write_locked(node));
  53717. +}
  53718. +#else
  53719. +#define check_num_items(node) noop
  53720. +#endif
  53721. +
  53722. +/* plugin->u.node.num_of_items
  53723. + look for description of this method in plugin/node/node.h */
  53724. +int num_of_items_node40(const znode * node)
  53725. +{
  53726. + return node40_num_of_items_internal(node);
  53727. +}
  53728. +
  53729. +static void
  53730. +node40_set_num_items(znode * node, node40_header * nh, unsigned value)
  53731. +{
  53732. + assert("nikita-2751", node != NULL);
  53733. + assert("nikita-2750", nh == node40_node_header(node));
  53734. +
  53735. + check_num_items(node);
  53736. + nh40_set_num_items(nh, value);
  53737. + node->nr_items = value;
  53738. + check_num_items(node);
  53739. +}
  53740. +
  53741. +/* plugin->u.node.item_by_coord
  53742. + look for description of this method in plugin/node/node.h */
  53743. +char *item_by_coord_node40(const coord_t * coord)
  53744. +{
  53745. + item_header40 *ih;
  53746. + char *p;
  53747. +
  53748. + /* @coord is set to existing item */
  53749. + assert("nikita-596", coord != NULL);
  53750. + assert("vs-255", coord_is_existing_item(coord));
  53751. +
  53752. + ih = node40_ih_at_coord(coord);
  53753. + p = zdata(coord->node) + ih40_get_offset(ih);
  53754. + return p;
  53755. +}
  53756. +
  53757. +/* plugin->u.node.length_by_coord
  53758. + look for description of this method in plugin/node/node.h */
  53759. +int length_by_coord_node40(const coord_t * coord)
  53760. +{
  53761. + item_header40 *ih;
  53762. + int result;
  53763. +
  53764. + /* @coord is set to existing item */
  53765. + assert("vs-256", coord != NULL);
  53766. + assert("vs-257", coord_is_existing_item(coord));
  53767. +
  53768. + ih = node40_ih_at_coord(coord);
  53769. + if ((int)coord->item_pos ==
  53770. + node40_num_of_items_internal(coord->node) - 1)
  53771. + result =
  53772. + nh40_get_free_space_start(node40_node_header(coord->node)) -
  53773. + ih40_get_offset(ih);
  53774. + else
  53775. + result = ih40_get_offset(ih - 1) - ih40_get_offset(ih);
  53776. +
  53777. + return result;
  53778. +}
  53779. +
  53780. +static pos_in_node_t
  53781. +node40_item_length(const znode * node, pos_in_node_t item_pos)
  53782. +{
  53783. + item_header40 *ih;
  53784. + pos_in_node_t result;
  53785. +
  53786. + /* @coord is set to existing item */
  53787. + assert("vs-256", node != NULL);
  53788. + assert("vs-257", node40_num_of_items_internal(node) > item_pos);
  53789. +
  53790. + ih = node40_ih_at(node, item_pos);
  53791. + if (item_pos == node40_num_of_items_internal(node) - 1)
  53792. + result =
  53793. + nh40_get_free_space_start(node40_node_header(node)) -
  53794. + ih40_get_offset(ih);
  53795. + else
  53796. + result = ih40_get_offset(ih - 1) - ih40_get_offset(ih);
  53797. +
  53798. + return result;
  53799. +}
  53800. +
  53801. +/* plugin->u.node.plugin_by_coord
  53802. + look for description of this method in plugin/node/node.h */
  53803. +item_plugin *plugin_by_coord_node40(const coord_t * coord)
  53804. +{
  53805. + item_header40 *ih;
  53806. + item_plugin *result;
  53807. +
  53808. + /* @coord is set to existing item */
  53809. + assert("vs-258", coord != NULL);
  53810. + assert("vs-259", coord_is_existing_item(coord));
  53811. +
  53812. + ih = node40_ih_at_coord(coord);
  53813. + /* pass NULL in stead of current tree. This is time critical call. */
  53814. + result = item_plugin_by_disk_id(NULL, &ih->plugin_id);
  53815. + return result;
  53816. +}
  53817. +
  53818. +/* plugin->u.node.key_at
  53819. + look for description of this method in plugin/node/node.h */
  53820. +reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key)
  53821. +{
  53822. + item_header40 *ih;
  53823. +
  53824. + assert("nikita-1765", coord_is_existing_item(coord));
  53825. +
  53826. + /* @coord is set to existing item */
  53827. + ih = node40_ih_at_coord(coord);
  53828. + memcpy(key, &ih->key, sizeof(reiser4_key));
  53829. + return key;
  53830. +}
  53831. +
  53832. +/* VS-FIXME-HANS: please review whether the below are properly disabled when debugging is disabled */
  53833. +
  53834. +#define NODE_INCSTAT(n, counter) \
  53835. + reiser4_stat_inc_at_level(znode_get_level(n), node.lookup.counter)
  53836. +
  53837. +#define NODE_ADDSTAT(n, counter, val) \
  53838. + reiser4_stat_add_at_level(znode_get_level(n), node.lookup.counter, val)
  53839. +
  53840. +/* plugin->u.node.lookup
  53841. + look for description of this method in plugin/node/node.h */
  53842. +node_search_result lookup_node40(znode * node /* node to query */ ,
  53843. + const reiser4_key * key /* key to look for */ ,
  53844. + lookup_bias bias /* search bias */ ,
  53845. + coord_t * coord /* resulting coord */ )
  53846. +{
  53847. + int left;
  53848. + int right;
  53849. + int found;
  53850. + int items;
  53851. +
  53852. + item_header40 *lefth;
  53853. + item_header40 *righth;
  53854. +
  53855. + item_plugin *iplug;
  53856. + item_header40 *bstop;
  53857. + item_header40 *ih;
  53858. + cmp_t order;
  53859. +
  53860. + assert("nikita-583", node != NULL);
  53861. + assert("nikita-584", key != NULL);
  53862. + assert("nikita-585", coord != NULL);
  53863. + assert("nikita-2693", znode_is_any_locked(node));
  53864. +
  53865. + items = node_num_items(node);
  53866. +
  53867. + if (unlikely(items == 0)) {
  53868. + coord_init_first_unit(coord, node);
  53869. + return NS_NOT_FOUND;
  53870. + }
  53871. +
  53872. + /* binary search for item that can contain given key */
  53873. + left = 0;
  53874. + right = items - 1;
  53875. + coord->node = node;
  53876. + coord_clear_iplug(coord);
  53877. + found = 0;
  53878. +
  53879. + lefth = node40_ih_at(node, left);
  53880. + righth = node40_ih_at(node, right);
  53881. +
  53882. + /* It is known that for small arrays sequential search is on average
  53883. + more efficient than binary. This is because sequential search is
  53884. + coded as tight loop that can be better optimized by compilers and
  53885. + for small array size gain from this optimization makes sequential
  53886. + search the winner. Another, maybe more important, reason for this,
  53887. + is that sequential array is more CPU cache friendly, whereas binary
  53888. + search effectively destroys CPU caching.
  53889. +
  53890. + Critical here is the notion of "smallness". Reasonable value of
  53891. + REISER4_SEQ_SEARCH_BREAK can be found by playing with code in
  53892. + fs/reiser4/ulevel/ulevel.c:test_search().
  53893. +
  53894. + Don't try to further optimize sequential search by scanning from
  53895. + right to left in attempt to use more efficient loop termination
  53896. + condition (comparison with 0). This doesn't work.
  53897. +
  53898. + */
  53899. +
  53900. + while (right - left >= REISER4_SEQ_SEARCH_BREAK) {
  53901. + int median;
  53902. + item_header40 *medianh;
  53903. +
  53904. + median = (left + right) / 2;
  53905. + medianh = node40_ih_at(node, median);
  53906. +
  53907. + assert("nikita-1084", median >= 0);
  53908. + assert("nikita-1085", median < items);
  53909. + switch (keycmp(key, &medianh->key)) {
  53910. + case LESS_THAN:
  53911. + right = median;
  53912. + righth = medianh;
  53913. + break;
  53914. + default:
  53915. + wrong_return_value("nikita-586", "keycmp");
  53916. + case GREATER_THAN:
  53917. + left = median;
  53918. + lefth = medianh;
  53919. + break;
  53920. + case EQUAL_TO:
  53921. + do {
  53922. + --median;
  53923. + /* headers are ordered from right to left */
  53924. + ++medianh;
  53925. + } while (median >= 0 && keyeq(key, &medianh->key));
  53926. + right = left = median + 1;
  53927. + ih = lefth = righth = medianh - 1;
  53928. + found = 1;
  53929. + break;
  53930. + }
  53931. + }
  53932. + /* sequential scan. Item headers, and, therefore, keys are stored at
  53933. + the rightmost part of a node from right to left. We are trying to
  53934. + access memory from left to right, and hence, scan in _descending_
  53935. + order of item numbers.
  53936. + */
  53937. + if (!found) {
  53938. + for (left = right, ih = righth; left >= 0; ++ih, --left) {
  53939. + cmp_t comparison;
  53940. +
  53941. + prefetchkey(&(ih + 1)->key);
  53942. + comparison = keycmp(&ih->key, key);
  53943. + if (comparison == GREATER_THAN)
  53944. + continue;
  53945. + if (comparison == EQUAL_TO) {
  53946. + found = 1;
  53947. + do {
  53948. + --left;
  53949. + ++ih;
  53950. + } while (left >= 0 && keyeq(&ih->key, key));
  53951. + ++left;
  53952. + --ih;
  53953. + } else {
  53954. + assert("nikita-1256", comparison == LESS_THAN);
  53955. + }
  53956. + break;
  53957. + }
  53958. + if (unlikely(left < 0))
  53959. + left = 0;
  53960. + }
  53961. +
  53962. + assert("nikita-3212", right >= left);
  53963. + assert("nikita-3214",
  53964. + equi(found, keyeq(&node40_ih_at(node, left)->key, key)));
  53965. +
  53966. + coord_set_item_pos(coord, left);
  53967. + coord->unit_pos = 0;
  53968. + coord->between = AT_UNIT;
  53969. +
  53970. + /* key < leftmost key in a mode or node is corrupted and keys
  53971. + are not sorted */
  53972. + bstop = node40_ih_at(node, (unsigned)left);
  53973. + order = keycmp(&bstop->key, key);
  53974. + if (unlikely(order == GREATER_THAN)) {
  53975. + if (unlikely(left != 0)) {
  53976. + /* screw up */
  53977. + warning("nikita-587", "Key less than %i key in a node",
  53978. + left);
  53979. + reiser4_print_key("key", key);
  53980. + reiser4_print_key("min", &bstop->key);
  53981. + print_coord_content("coord", coord);
  53982. + return RETERR(-EIO);
  53983. + } else {
  53984. + coord->between = BEFORE_UNIT;
  53985. + return NS_NOT_FOUND;
  53986. + }
  53987. + }
  53988. + /* left <= key, ok */
  53989. + iplug = item_plugin_by_disk_id(znode_get_tree(node), &bstop->plugin_id);
  53990. +
  53991. + if (unlikely(iplug == NULL)) {
  53992. + warning("nikita-588", "Unknown plugin %i",
  53993. + le16_to_cpu(get_unaligned(&bstop->plugin_id)));
  53994. + reiser4_print_key("key", key);
  53995. + print_coord_content("coord", coord);
  53996. + return RETERR(-EIO);
  53997. + }
  53998. +
  53999. + coord_set_iplug(coord, iplug);
  54000. +
  54001. + /* if exact key from item header was found by binary search, no
  54002. + further checks are necessary. */
  54003. + if (found) {
  54004. + assert("nikita-1259", order == EQUAL_TO);
  54005. + return NS_FOUND;
  54006. + }
  54007. + if (iplug->b.max_key_inside != NULL) {
  54008. + reiser4_key max_item_key;
  54009. +
  54010. + /* key > max_item_key --- outside of an item */
  54011. + if (keygt(key, iplug->b.max_key_inside(coord, &max_item_key))) {
  54012. + coord->unit_pos = 0;
  54013. + coord->between = AFTER_ITEM;
  54014. + /* FIXME-VS: key we are looking for does not fit into
  54015. + found item. Return NS_NOT_FOUND then. Without that
  54016. + the following case does not work: there is extent of
  54017. + file 10000, 10001. File 10000, 10002 has been just
  54018. + created. When writing to position 0 in that file -
  54019. + traverse_tree will stop here on twig level. When we
  54020. + want it to go down to leaf level
  54021. + */
  54022. + return NS_NOT_FOUND;
  54023. + }
  54024. + }
  54025. +
  54026. + if (iplug->b.lookup != NULL) {
  54027. + return (node_search_result)iplug->b.lookup(key, bias, coord);
  54028. + } else {
  54029. + assert("nikita-1260", order == LESS_THAN);
  54030. + coord->between = AFTER_UNIT;
  54031. + return (bias == FIND_EXACT) ? NS_NOT_FOUND : NS_FOUND;
  54032. + }
  54033. +}
  54034. +
  54035. +#undef NODE_ADDSTAT
  54036. +#undef NODE_INCSTAT
  54037. +
  54038. +/* plugin->u.node.estimate
  54039. + look for description of this method in plugin/node/node.h */
  54040. +size_t estimate_node40(znode * node)
  54041. +{
  54042. + size_t result;
  54043. +
  54044. + assert("nikita-597", node != NULL);
  54045. +
  54046. + result = free_space_node40(node) - sizeof(item_header40);
  54047. +
  54048. + return (result > 0) ? result : 0;
  54049. +}
  54050. +
  54051. +/* plugin->u.node.check
  54052. + look for description of this method in plugin/node/node.h */
  54053. +int check_node40(const znode * node /* node to check */ ,
  54054. + __u32 flags /* check flags */ ,
  54055. + const char **error /* where to store error message */ )
  54056. +{
  54057. + int nr_items;
  54058. + int i;
  54059. + reiser4_key prev;
  54060. + unsigned old_offset;
  54061. + tree_level level;
  54062. + coord_t coord;
  54063. + int result;
  54064. +
  54065. + assert("nikita-580", node != NULL);
  54066. + assert("nikita-581", error != NULL);
  54067. + assert("nikita-2948", znode_is_loaded(node));
  54068. +
  54069. + if (ZF_ISSET(node, JNODE_HEARD_BANSHEE))
  54070. + return 0;
  54071. +
  54072. + assert("nikita-582", zdata(node) != NULL);
  54073. +
  54074. + nr_items = node40_num_of_items_internal(node);
  54075. + if (nr_items < 0) {
  54076. + *error = "Negative number of items";
  54077. + return -1;
  54078. + }
  54079. +
  54080. + if (flags & REISER4_NODE_DKEYS)
  54081. + prev = *znode_get_ld_key((znode *) node);
  54082. + else
  54083. + prev = *reiser4_min_key();
  54084. +
  54085. + old_offset = 0;
  54086. + coord_init_zero(&coord);
  54087. + coord.node = (znode *) node;
  54088. + coord.unit_pos = 0;
  54089. + coord.between = AT_UNIT;
  54090. + level = znode_get_level(node);
  54091. + for (i = 0; i < nr_items; i++) {
  54092. + item_header40 *ih;
  54093. + reiser4_key unit_key;
  54094. + unsigned j;
  54095. +
  54096. + ih = node40_ih_at(node, (unsigned)i);
  54097. + coord_set_item_pos(&coord, i);
  54098. + if ((ih40_get_offset(ih) >=
  54099. + znode_size(node) - nr_items * sizeof(item_header40)) ||
  54100. + (ih40_get_offset(ih) < sizeof(node40_header))) {
  54101. + *error = "Offset is out of bounds";
  54102. + return -1;
  54103. + }
  54104. + if (ih40_get_offset(ih) <= old_offset) {
  54105. + *error = "Offsets are in wrong order";
  54106. + return -1;
  54107. + }
  54108. + if ((i == 0) && (ih40_get_offset(ih) != sizeof(node40_header))) {
  54109. + *error = "Wrong offset of first item";
  54110. + return -1;
  54111. + }
  54112. + old_offset = ih40_get_offset(ih);
  54113. +
  54114. + if (keygt(&prev, &ih->key)) {
  54115. + *error = "Keys are in wrong order";
  54116. + return -1;
  54117. + }
  54118. + if (!keyeq(&ih->key, unit_key_by_coord(&coord, &unit_key))) {
  54119. + *error = "Wrong key of first unit";
  54120. + return -1;
  54121. + }
  54122. + prev = ih->key;
  54123. + for (j = 0; j < coord_num_units(&coord); ++j) {
  54124. + coord.unit_pos = j;
  54125. + unit_key_by_coord(&coord, &unit_key);
  54126. + if (keygt(&prev, &unit_key)) {
  54127. + *error = "Unit keys are in wrong order";
  54128. + return -1;
  54129. + }
  54130. + prev = unit_key;
  54131. + }
  54132. + coord.unit_pos = 0;
  54133. + if (level != TWIG_LEVEL && item_is_extent(&coord)) {
  54134. + *error = "extent on the wrong level";
  54135. + return -1;
  54136. + }
  54137. + if (level == LEAF_LEVEL && item_is_internal(&coord)) {
  54138. + *error = "internal item on the wrong level";
  54139. + return -1;
  54140. + }
  54141. + if (level != LEAF_LEVEL &&
  54142. + !item_is_internal(&coord) && !item_is_extent(&coord)) {
  54143. + *error = "wrong item on the internal level";
  54144. + return -1;
  54145. + }
  54146. + if (level > TWIG_LEVEL && !item_is_internal(&coord)) {
  54147. + *error = "non-internal item on the internal level";
  54148. + return -1;
  54149. + }
  54150. +#if REISER4_DEBUG
  54151. + if (item_plugin_by_coord(&coord)->b.check
  54152. + && item_plugin_by_coord(&coord)->b.check(&coord, error))
  54153. + return -1;
  54154. +#endif
  54155. + if (i) {
  54156. + coord_t prev_coord;
  54157. + /* two neighboring items can not be mergeable */
  54158. + coord_dup(&prev_coord, &coord);
  54159. + coord_prev_item(&prev_coord);
  54160. + if (are_items_mergeable(&prev_coord, &coord)) {
  54161. + *error = "mergeable items in one node";
  54162. + return -1;
  54163. + }
  54164. +
  54165. + }
  54166. + }
  54167. +
  54168. + if ((flags & REISER4_NODE_DKEYS) && !node_is_empty(node)) {
  54169. + coord_t coord;
  54170. + item_plugin *iplug;
  54171. +
  54172. + coord_init_last_unit(&coord, node);
  54173. + iplug = item_plugin_by_coord(&coord);
  54174. + if ((item_is_extent(&coord) || item_is_tail(&coord)) &&
  54175. + iplug->s.file.append_key != NULL) {
  54176. + reiser4_key mkey;
  54177. +
  54178. + iplug->s.file.append_key(&coord, &mkey);
  54179. + set_key_offset(&mkey, get_key_offset(&mkey) - 1);
  54180. + read_lock_dk(current_tree);
  54181. + result = keygt(&mkey, znode_get_rd_key((znode *) node));
  54182. + read_unlock_dk(current_tree);
  54183. + if (result) {
  54184. + *error = "key of rightmost item is too large";
  54185. + return -1;
  54186. + }
  54187. + }
  54188. + }
  54189. + if (flags & REISER4_NODE_DKEYS) {
  54190. + read_lock_tree(current_tree);
  54191. + read_lock_dk(current_tree);
  54192. +
  54193. + flags |= REISER4_NODE_TREE_STABLE;
  54194. +
  54195. + if (keygt(&prev, znode_get_rd_key((znode *) node))) {
  54196. + if (flags & REISER4_NODE_TREE_STABLE) {
  54197. + *error = "Last key is greater than rdkey";
  54198. + read_unlock_dk(current_tree);
  54199. + read_unlock_tree(current_tree);
  54200. + return -1;
  54201. + }
  54202. + }
  54203. + if (keygt
  54204. + (znode_get_ld_key((znode *) node),
  54205. + znode_get_rd_key((znode *) node))) {
  54206. + *error = "ldkey is greater than rdkey";
  54207. + read_unlock_dk(current_tree);
  54208. + read_unlock_tree(current_tree);
  54209. + return -1;
  54210. + }
  54211. + if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) &&
  54212. + (node->left != NULL) &&
  54213. + !ZF_ISSET(node->left, JNODE_HEARD_BANSHEE) &&
  54214. + ergo(flags & REISER4_NODE_TREE_STABLE,
  54215. + !keyeq(znode_get_rd_key(node->left),
  54216. + znode_get_ld_key((znode *) node)))
  54217. + && ergo(!(flags & REISER4_NODE_TREE_STABLE),
  54218. + keygt(znode_get_rd_key(node->left),
  54219. + znode_get_ld_key((znode *) node)))) {
  54220. + *error = "left rdkey or ldkey is wrong";
  54221. + read_unlock_dk(current_tree);
  54222. + read_unlock_tree(current_tree);
  54223. + return -1;
  54224. + }
  54225. + if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
  54226. + (node->right != NULL) &&
  54227. + !ZF_ISSET(node->right, JNODE_HEARD_BANSHEE) &&
  54228. + ergo(flags & REISER4_NODE_TREE_STABLE,
  54229. + !keyeq(znode_get_rd_key((znode *) node),
  54230. + znode_get_ld_key(node->right)))
  54231. + && ergo(!(flags & REISER4_NODE_TREE_STABLE),
  54232. + keygt(znode_get_rd_key((znode *) node),
  54233. + znode_get_ld_key(node->right)))) {
  54234. + *error = "rdkey or right ldkey is wrong";
  54235. + read_unlock_dk(current_tree);
  54236. + read_unlock_tree(current_tree);
  54237. + return -1;
  54238. + }
  54239. +
  54240. + read_unlock_dk(current_tree);
  54241. + read_unlock_tree(current_tree);
  54242. + }
  54243. +
  54244. + return 0;
  54245. +}
  54246. +
  54247. +int parse_node40_common(znode *node, const __u32 magic)
  54248. +{
  54249. + node40_header *header;
  54250. + int result;
  54251. + d8 level;
  54252. +
  54253. + header = node40_node_header((znode *) node);
  54254. + result = -EIO;
  54255. + level = nh40_get_level(header);
  54256. + if (unlikely(((__u8) znode_get_level(node)) != level))
  54257. + warning("nikita-494", "Wrong level found in node: %i != %i",
  54258. + znode_get_level(node), level);
  54259. + else if (unlikely(nh40_get_magic(header) != magic))
  54260. + warning("nikita-495",
  54261. + "Wrong magic in tree node: want %x, got %x",
  54262. + magic, nh40_get_magic(header));
  54263. + else {
  54264. + node->nr_items = node40_num_of_items_internal(node);
  54265. + result = 0;
  54266. + }
  54267. + return RETERR(result);
  54268. +}
  54269. +
  54270. +/*
  54271. + * plugin->u.node.parse
  54272. + * look for description of this method in plugin/node/node.h
  54273. + */
  54274. +int parse_node40(znode *node /* node to parse */)
  54275. +{
  54276. + return parse_node40_common(node, REISER4_NODE40_MAGIC);
  54277. +}
  54278. +
  54279. +/*
  54280. + * common part of ->init_node() for all nodes,
  54281. + * which contain node40_header at the beginning
  54282. + */
  54283. +int init_node40_common(znode *node, node_plugin *nplug,
  54284. + size_t node_header_size, const __u32 magic)
  54285. +{
  54286. + node40_header *header40;
  54287. +
  54288. + assert("nikita-570", node != NULL);
  54289. + assert("nikita-572", zdata(node) != NULL);
  54290. +
  54291. + header40 = node40_node_header(node);
  54292. + memset(header40, 0, sizeof(node40_header));
  54293. +
  54294. + nh40_set_free_space(header40, znode_size(node) - node_header_size);
  54295. + nh40_set_free_space_start(header40, node_header_size);
  54296. + /*
  54297. + * sane hypothesis: 0 in CPU format is 0 in disk format
  54298. + */
  54299. + save_plugin_id(node_plugin_to_plugin(nplug),
  54300. + &header40->common_header.plugin_id);
  54301. + nh40_set_level(header40, znode_get_level(node));
  54302. + nh40_set_magic(header40, magic);
  54303. + nh40_set_mkfs_id(header40, reiser4_mkfs_id(reiser4_get_current_sb()));
  54304. + /*
  54305. + * nr_items: 0
  54306. + * flags: 0
  54307. + */
  54308. + return 0;
  54309. +}
  54310. +
  54311. +/*
  54312. + * plugin->u.node.init
  54313. + * look for description of this method in plugin/node/node.h
  54314. + */
  54315. +int init_node40(znode *node /* node to initialise */)
  54316. +{
  54317. + return init_node40_common(node, node_plugin_by_id(NODE40_ID),
  54318. + sizeof(node40_header), REISER4_NODE40_MAGIC);
  54319. +}
  54320. +
  54321. +#ifdef GUESS_EXISTS
  54322. +int guess_node40_common(const znode *node, reiser4_node_id id,
  54323. + const __u32 magic)
  54324. +{
  54325. + node40_header *header;
  54326. +
  54327. + assert("nikita-1058", node != NULL);
  54328. + header = node40_node_header(node);
  54329. + return (nh40_get_magic(header) == magic) &&
  54330. + (id == plugin_by_disk_id(znode_get_tree(node),
  54331. + REISER4_NODE_PLUGIN_TYPE,
  54332. + &header->common_header.plugin_id)->h.id);
  54333. +}
  54334. +
  54335. +int guess_node40(const znode *node /* node to guess plugin of */)
  54336. +{
  54337. + return guess_node40_common(node, NODE40_ID, REISER4_NODE40_MAGIC);
  54338. +}
  54339. +#endif
  54340. +
  54341. +/* plugin->u.node.chage_item_size
  54342. + look for description of this method in plugin/node/node.h */
  54343. +void change_item_size_node40(coord_t * coord, int by)
  54344. +{
  54345. + node40_header *nh;
  54346. + item_header40 *ih;
  54347. + char *item_data;
  54348. + int item_length;
  54349. + unsigned i;
  54350. +
  54351. + /* make sure that @item is coord of existing item */
  54352. + assert("vs-210", coord_is_existing_item(coord));
  54353. +
  54354. + nh = node40_node_header(coord->node);
  54355. +
  54356. + item_data = item_by_coord_node40(coord);
  54357. + item_length = length_by_coord_node40(coord);
  54358. +
  54359. + /* move item bodies */
  54360. + ih = node40_ih_at_coord(coord);
  54361. + memmove(item_data + item_length + by, item_data + item_length,
  54362. + nh40_get_free_space_start(node40_node_header(coord->node)) -
  54363. + (ih40_get_offset(ih) + item_length));
  54364. +
  54365. + /* update offsets of moved items */
  54366. + for (i = coord->item_pos + 1; i < nh40_get_num_items(nh); i++) {
  54367. + ih = node40_ih_at(coord->node, i);
  54368. + ih40_set_offset(ih, ih40_get_offset(ih) + by);
  54369. + }
  54370. +
  54371. + /* update node header */
  54372. + nh40_set_free_space(nh, nh40_get_free_space(nh) - by);
  54373. + nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) + by);
  54374. +}
  54375. +
  54376. +static int should_notify_parent(const znode * node)
  54377. +{
  54378. + /* FIXME_JMACD This looks equivalent to znode_is_root(), right? -josh */
  54379. + return !disk_addr_eq(znode_get_block(node),
  54380. + &znode_get_tree(node)->root_block);
  54381. +}
  54382. +
  54383. +/* plugin->u.node.create_item
  54384. + look for description of this method in plugin/node/node.h */
  54385. +int
  54386. +create_item_node40(coord_t *target, const reiser4_key *key,
  54387. + reiser4_item_data *data, carry_plugin_info *info)
  54388. +{
  54389. + node40_header *nh;
  54390. + item_header40 *ih;
  54391. + unsigned offset;
  54392. + unsigned i;
  54393. +
  54394. + nh = node40_node_header(target->node);
  54395. +
  54396. + assert("vs-212", coord_is_between_items(target));
  54397. + /* node must have enough free space */
  54398. + assert("vs-254",
  54399. + free_space_node40(target->node) >=
  54400. + data->length + sizeof(item_header40));
  54401. + assert("vs-1410", data->length >= 0);
  54402. +
  54403. + if (coord_set_to_right(target))
  54404. + /* there are not items to the right of @target, so, new item
  54405. + will be inserted after last one */
  54406. + coord_set_item_pos(target, nh40_get_num_items(nh));
  54407. +
  54408. + if (target->item_pos < nh40_get_num_items(nh)) {
  54409. + /* there are items to be moved to prepare space for new
  54410. + item */
  54411. + ih = node40_ih_at_coord(target);
  54412. + /* new item will start at this offset */
  54413. + offset = ih40_get_offset(ih);
  54414. +
  54415. + memmove(zdata(target->node) + offset + data->length,
  54416. + zdata(target->node) + offset,
  54417. + nh40_get_free_space_start(nh) - offset);
  54418. + /* update headers of moved items */
  54419. + for (i = target->item_pos; i < nh40_get_num_items(nh); i++) {
  54420. + ih = node40_ih_at(target->node, i);
  54421. + ih40_set_offset(ih, ih40_get_offset(ih) + data->length);
  54422. + }
  54423. +
  54424. + /* @ih is set to item header of the last item, move item headers */
  54425. + memmove(ih - 1, ih,
  54426. + sizeof(item_header40) * (nh40_get_num_items(nh) -
  54427. + target->item_pos));
  54428. + } else {
  54429. + /* new item will start at this offset */
  54430. + offset = nh40_get_free_space_start(nh);
  54431. + }
  54432. +
  54433. + /* make item header for the new item */
  54434. + ih = node40_ih_at_coord(target);
  54435. + memcpy(&ih->key, key, sizeof(reiser4_key));
  54436. + ih40_set_offset(ih, offset);
  54437. + save_plugin_id(item_plugin_to_plugin(data->iplug), &ih->plugin_id);
  54438. +
  54439. + /* update node header */
  54440. + nh40_set_free_space(nh,
  54441. + nh40_get_free_space(nh) - data->length -
  54442. + sizeof(item_header40));
  54443. + nh40_set_free_space_start(nh,
  54444. + nh40_get_free_space_start(nh) + data->length);
  54445. + node40_set_num_items(target->node, nh, nh40_get_num_items(nh) + 1);
  54446. +
  54447. + /* FIXME: check how does create_item work when between is set to BEFORE_UNIT */
  54448. + target->unit_pos = 0;
  54449. + target->between = AT_UNIT;
  54450. + coord_clear_iplug(target);
  54451. +
  54452. + /* initialize item */
  54453. + if (data->iplug->b.init != NULL) {
  54454. + data->iplug->b.init(target, NULL, data);
  54455. + }
  54456. + /* copy item body */
  54457. + if (data->iplug->b.paste != NULL) {
  54458. + data->iplug->b.paste(target, data, info);
  54459. + } else if (data->data != NULL) {
  54460. + if (data->user) {
  54461. + /* AUDIT: Are we really should not check that pointer
  54462. + from userspace was valid and data bytes were
  54463. + available? How will we return -EFAULT of some kind
  54464. + without this check? */
  54465. + assert("nikita-3038", reiser4_schedulable());
  54466. + /* copy data from user space */
  54467. + if (__copy_from_user(zdata(target->node) + offset,
  54468. + (const char __user *)data->data,
  54469. + (unsigned)data->length))
  54470. + return RETERR(-EFAULT);
  54471. + } else
  54472. + /* copy from kernel space */
  54473. + memcpy(zdata(target->node) + offset, data->data,
  54474. + (unsigned)data->length);
  54475. + }
  54476. +
  54477. + if (target->item_pos == 0) {
  54478. + /* left delimiting key has to be updated */
  54479. + prepare_for_update(NULL, target->node, info);
  54480. + }
  54481. +
  54482. + if (item_plugin_by_coord(target)->b.create_hook != NULL) {
  54483. + item_plugin_by_coord(target)->b.create_hook(target, data->arg);
  54484. + }
  54485. +
  54486. + return 0;
  54487. +}
  54488. +
  54489. +/* plugin->u.node.update_item_key
  54490. + look for description of this method in plugin/node/node.h */
  54491. +void
  54492. +update_item_key_node40(coord_t * target, const reiser4_key * key,
  54493. + carry_plugin_info * info)
  54494. +{
  54495. + item_header40 *ih;
  54496. +
  54497. + ih = node40_ih_at_coord(target);
  54498. + memcpy(&ih->key, key, sizeof(reiser4_key));
  54499. +
  54500. + if (target->item_pos == 0) {
  54501. + prepare_for_update(NULL, target->node, info);
  54502. + }
  54503. +}
  54504. +
  54505. +/* this bits encode cut mode */
  54506. +#define CMODE_TAIL 1
  54507. +#define CMODE_WHOLE 2
  54508. +#define CMODE_HEAD 4
  54509. +
  54510. +struct cut40_info {
  54511. + int mode;
  54512. + pos_in_node_t tail_removed; /* position of item which gets tail removed */
  54513. + pos_in_node_t first_removed; /* position of first the leftmost item among items removed completely */
  54514. + pos_in_node_t removed_count; /* number of items removed completely */
  54515. + pos_in_node_t head_removed; /* position of item which gets head removed */
  54516. +
  54517. + pos_in_node_t freed_space_start;
  54518. + pos_in_node_t freed_space_end;
  54519. + pos_in_node_t first_moved;
  54520. + pos_in_node_t head_removed_location;
  54521. +};
  54522. +
  54523. +static void init_cinfo(struct cut40_info *cinfo)
  54524. +{
  54525. + cinfo->mode = 0;
  54526. + cinfo->tail_removed = MAX_POS_IN_NODE;
  54527. + cinfo->first_removed = MAX_POS_IN_NODE;
  54528. + cinfo->removed_count = MAX_POS_IN_NODE;
  54529. + cinfo->head_removed = MAX_POS_IN_NODE;
  54530. + cinfo->freed_space_start = MAX_POS_IN_NODE;
  54531. + cinfo->freed_space_end = MAX_POS_IN_NODE;
  54532. + cinfo->first_moved = MAX_POS_IN_NODE;
  54533. + cinfo->head_removed_location = MAX_POS_IN_NODE;
  54534. +}
  54535. +
  54536. +/* complete cut_node40/kill_node40 content by removing the gap created by */
  54537. +static void compact(znode * node, struct cut40_info *cinfo)
  54538. +{
  54539. + node40_header *nh;
  54540. + item_header40 *ih;
  54541. + pos_in_node_t freed;
  54542. + pos_in_node_t pos, nr_items;
  54543. +
  54544. + assert("vs-1526", (cinfo->freed_space_start != MAX_POS_IN_NODE &&
  54545. + cinfo->freed_space_end != MAX_POS_IN_NODE &&
  54546. + cinfo->first_moved != MAX_POS_IN_NODE));
  54547. + assert("vs-1523", cinfo->freed_space_end >= cinfo->freed_space_start);
  54548. +
  54549. + nh = node40_node_header(node);
  54550. + nr_items = nh40_get_num_items(nh);
  54551. +
  54552. + /* remove gap made up by removal */
  54553. + memmove(zdata(node) + cinfo->freed_space_start,
  54554. + zdata(node) + cinfo->freed_space_end,
  54555. + nh40_get_free_space_start(nh) - cinfo->freed_space_end);
  54556. +
  54557. + /* update item headers of moved items - change their locations */
  54558. + pos = cinfo->first_moved;
  54559. + ih = node40_ih_at(node, pos);
  54560. + if (cinfo->head_removed_location != MAX_POS_IN_NODE) {
  54561. + assert("vs-1580", pos == cinfo->head_removed);
  54562. + ih40_set_offset(ih, cinfo->head_removed_location);
  54563. + pos++;
  54564. + ih--;
  54565. + }
  54566. +
  54567. + freed = cinfo->freed_space_end - cinfo->freed_space_start;
  54568. + for (; pos < nr_items; pos++, ih--) {
  54569. + assert("vs-1581", ih == node40_ih_at(node, pos));
  54570. + ih40_set_offset(ih, ih40_get_offset(ih) - freed);
  54571. + }
  54572. +
  54573. + /* free space start moved to right */
  54574. + nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - freed);
  54575. +
  54576. + if (cinfo->removed_count != MAX_POS_IN_NODE) {
  54577. + /* number of items changed. Remove item headers of those items */
  54578. + ih = node40_ih_at(node, nr_items - 1);
  54579. + memmove(ih + cinfo->removed_count, ih,
  54580. + sizeof(item_header40) * (nr_items -
  54581. + cinfo->removed_count -
  54582. + cinfo->first_removed));
  54583. + freed += sizeof(item_header40) * cinfo->removed_count;
  54584. + node40_set_num_items(node, nh, nr_items - cinfo->removed_count);
  54585. + }
  54586. +
  54587. + /* total amount of free space increased */
  54588. + nh40_set_free_space(nh, nh40_get_free_space(nh) + freed);
  54589. +}
  54590. +
  54591. +int shrink_item_node40(coord_t * coord, int delta)
  54592. +{
  54593. + node40_header *nh;
  54594. + item_header40 *ih;
  54595. + pos_in_node_t pos;
  54596. + pos_in_node_t nr_items;
  54597. + char *end;
  54598. + znode *node;
  54599. + int off;
  54600. +
  54601. + assert("nikita-3487", coord != NULL);
  54602. + assert("nikita-3488", delta >= 0);
  54603. +
  54604. + node = coord->node;
  54605. + nh = node40_node_header(node);
  54606. + nr_items = nh40_get_num_items(nh);
  54607. +
  54608. + ih = node40_ih_at_coord(coord);
  54609. + assert("nikita-3489", delta <= length_by_coord_node40(coord));
  54610. + off = ih40_get_offset(ih) + length_by_coord_node40(coord);
  54611. + end = zdata(node) + off;
  54612. +
  54613. + /* remove gap made up by removal */
  54614. + memmove(end - delta, end, nh40_get_free_space_start(nh) - off);
  54615. +
  54616. + /* update item headers of moved items - change their locations */
  54617. + pos = coord->item_pos + 1;
  54618. + ih = node40_ih_at(node, pos);
  54619. + for (; pos < nr_items; pos++, ih--) {
  54620. + assert("nikita-3490", ih == node40_ih_at(node, pos));
  54621. + ih40_set_offset(ih, ih40_get_offset(ih) - delta);
  54622. + }
  54623. +
  54624. + /* free space start moved to left */
  54625. + nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - delta);
  54626. + /* total amount of free space increased */
  54627. + nh40_set_free_space(nh, nh40_get_free_space(nh) + delta);
  54628. + /*
  54629. + * This method does _not_ changes number of items. Hence, it cannot
  54630. + * make node empty. Also it doesn't remove items at all, which means
  54631. + * that no keys have to be updated either.
  54632. + */
  54633. + return 0;
  54634. +}
  54635. +
  54636. +/*
  54637. + * Evaluate cut mode, if key range has been specified.
  54638. + *
  54639. + * This is for the case when units are not minimal objects
  54640. + * addressed by keys.
  54641. + *
  54642. + * This doesn't work when range contains objects with
  54643. + * non-unique keys (e.g. directory items).
  54644. + */
  54645. +static int parse_cut_by_key_range(struct cut40_info *cinfo,
  54646. + const struct cut_kill_params *params)
  54647. +{
  54648. + reiser4_key min_from_key, max_to_key;
  54649. + const reiser4_key *from_key = params->from_key;
  54650. + const reiser4_key *to_key = params->to_key;
  54651. + /*
  54652. + * calculate minimal key stored in first item
  54653. + * of items to be cut (params->from)
  54654. + */
  54655. + item_key_by_coord(params->from, &min_from_key);
  54656. + /*
  54657. + * calculate maximal key stored in last item
  54658. + * of items to be cut (params->to)
  54659. + */
  54660. + max_item_key_by_coord(params->to, &max_to_key);
  54661. +
  54662. + if (params->from->item_pos == params->to->item_pos) {
  54663. + if (keylt(&min_from_key, from_key)
  54664. + && keylt(to_key, &max_to_key))
  54665. + return 1;
  54666. +
  54667. + if (keygt(from_key, &min_from_key)) {
  54668. + /* tail of item is to be cut cut */
  54669. + cinfo->tail_removed = params->from->item_pos;
  54670. + cinfo->mode |= CMODE_TAIL;
  54671. + } else if (keylt(to_key, &max_to_key)) {
  54672. + /* head of item is to be cut */
  54673. + cinfo->head_removed = params->from->item_pos;
  54674. + cinfo->mode |= CMODE_HEAD;
  54675. + } else {
  54676. + /* item is removed completely */
  54677. + cinfo->first_removed = params->from->item_pos;
  54678. + cinfo->removed_count = 1;
  54679. + cinfo->mode |= CMODE_WHOLE;
  54680. + }
  54681. + } else {
  54682. + cinfo->first_removed = params->from->item_pos + 1;
  54683. + cinfo->removed_count =
  54684. + params->to->item_pos - params->from->item_pos - 1;
  54685. +
  54686. + if (keygt(from_key, &min_from_key)) {
  54687. + /* first item is not cut completely */
  54688. + cinfo->tail_removed = params->from->item_pos;
  54689. + cinfo->mode |= CMODE_TAIL;
  54690. + } else {
  54691. + cinfo->first_removed--;
  54692. + cinfo->removed_count++;
  54693. + }
  54694. + if (keylt(to_key, &max_to_key)) {
  54695. + /* last item is not cut completely */
  54696. + cinfo->head_removed = params->to->item_pos;
  54697. + cinfo->mode |= CMODE_HEAD;
  54698. + } else {
  54699. + cinfo->removed_count++;
  54700. + }
  54701. + if (cinfo->removed_count)
  54702. + cinfo->mode |= CMODE_WHOLE;
  54703. + }
  54704. + return 0;
  54705. +}
  54706. +
  54707. +/*
  54708. + * Evaluate cut mode, if the key range hasn't been specified.
  54709. + * In this case the range can include objects with non-unique
  54710. + * keys (e.g. directory entries).
  54711. + *
  54712. + * This doesn't work when units are not the minimal objects
  54713. + * addressed by keys (e.g. bytes in file's body stored in
  54714. + * unformatted nodes).
  54715. + */
  54716. +static int parse_cut_by_coord_range(struct cut40_info *cinfo,
  54717. + const struct cut_kill_params *params)
  54718. +{
  54719. + coord_t *from = params->from;
  54720. + coord_t *to = params->to;
  54721. +
  54722. + if (from->item_pos == to->item_pos) {
  54723. + /*
  54724. + * cut is performed on only one item
  54725. + */
  54726. + if (from->unit_pos > 0 &&
  54727. + to->unit_pos < coord_last_unit_pos(to))
  54728. + /*
  54729. + * cut from the middle of item
  54730. + */
  54731. + return 1;
  54732. + if (from->unit_pos > 0) {
  54733. + /*
  54734. + * tail of item is to be cut
  54735. + */
  54736. + cinfo->tail_removed = params->from->item_pos;
  54737. + cinfo->mode |= CMODE_TAIL;
  54738. + } else if (to->unit_pos < coord_last_unit_pos(to)) {
  54739. + /*
  54740. + * head of item is to be cut
  54741. + */
  54742. + cinfo->head_removed = params->from->item_pos;
  54743. + cinfo->mode |= CMODE_HEAD;
  54744. + } else {
  54745. + /*
  54746. + * item is removed completely
  54747. + */
  54748. + assert("edward-1631",
  54749. + from->unit_pos == 0 &&
  54750. + to->unit_pos == coord_last_unit_pos(to));
  54751. +
  54752. + cinfo->first_removed = params->from->item_pos;
  54753. + cinfo->removed_count = 1;
  54754. + cinfo->mode |= CMODE_WHOLE;
  54755. + }
  54756. + } else {
  54757. + cinfo->first_removed = from->item_pos + 1;
  54758. + cinfo->removed_count =
  54759. + to->item_pos - from->item_pos - 1;
  54760. +
  54761. + if (from->unit_pos > 0) {
  54762. + /*
  54763. + * first item is not cut completely
  54764. + */
  54765. + cinfo->tail_removed = from->item_pos;
  54766. + cinfo->mode |= CMODE_TAIL;
  54767. + } else {
  54768. + cinfo->first_removed--;
  54769. + cinfo->removed_count++;
  54770. + }
  54771. + if (to->unit_pos < coord_last_unit_pos(to)) {
  54772. + /*
  54773. + * last item is not cut completely
  54774. + */
  54775. + cinfo->head_removed = to->item_pos;
  54776. + cinfo->mode |= CMODE_HEAD;
  54777. + } else {
  54778. + cinfo->removed_count++;
  54779. + }
  54780. + if (cinfo->removed_count)
  54781. + cinfo->mode |= CMODE_WHOLE;
  54782. + }
  54783. + return 0;
  54784. +}
  54785. +
  54786. +/*
  54787. + * this is used by cut_node40 and kill_node40. It analyses input parameters
  54788. + * and calculates cut mode. There are 2 types of cut. First is when a unit is
  54789. + * removed from the middle of an item. In this case this function returns 1.
  54790. + * All the rest fits into second case: 0 or 1 of items getting tail cut, 0 or
  54791. + * more items removed completely and 0 or 1 item getting head cut. Function
  54792. + * returns 0 in this case
  54793. + */
  54794. +static int parse_cut(struct cut40_info *cinfo,
  54795. + const struct cut_kill_params *params)
  54796. +{
  54797. + init_cinfo(cinfo);
  54798. + if (params->from_key == NULL) {
  54799. + /*
  54800. + * cut key range is not defined in input parameters
  54801. + */
  54802. + assert("vs-1513", params->to_key == NULL);
  54803. + return parse_cut_by_coord_range(cinfo, params);
  54804. + } else
  54805. + return parse_cut_by_key_range(cinfo, params);
  54806. +}
  54807. +
  54808. +static void
  54809. +call_kill_hooks(znode * node, pos_in_node_t from, pos_in_node_t count,
  54810. + carry_kill_data * kdata)
  54811. +{
  54812. + coord_t coord;
  54813. + item_plugin *iplug;
  54814. + pos_in_node_t pos;
  54815. +
  54816. + coord.node = node;
  54817. + coord.unit_pos = 0;
  54818. + coord.between = AT_UNIT;
  54819. + for (pos = 0; pos < count; pos++) {
  54820. + coord_set_item_pos(&coord, from + pos);
  54821. + coord.unit_pos = 0;
  54822. + coord.between = AT_UNIT;
  54823. + iplug = item_plugin_by_coord(&coord);
  54824. + if (iplug->b.kill_hook) {
  54825. + iplug->b.kill_hook(&coord, 0, coord_num_units(&coord),
  54826. + kdata);
  54827. + }
  54828. + }
  54829. +}
  54830. +
  54831. +/* this is used to kill item partially */
  54832. +static pos_in_node_t
  54833. +kill_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data,
  54834. + reiser4_key * smallest_removed, reiser4_key * new_first_key)
  54835. +{
  54836. + struct carry_kill_data *kdata;
  54837. + item_plugin *iplug;
  54838. +
  54839. + kdata = data;
  54840. + iplug = item_plugin_by_coord(coord);
  54841. +
  54842. + assert("vs-1524", iplug->b.kill_units);
  54843. + return iplug->b.kill_units(coord, from, to, kdata, smallest_removed,
  54844. + new_first_key);
  54845. +}
  54846. +
  54847. +/* call item plugin to cut tail of file */
  54848. +static pos_in_node_t
  54849. +kill_tail(coord_t * coord, void *data, reiser4_key * smallest_removed)
  54850. +{
  54851. + struct carry_kill_data *kdata;
  54852. + pos_in_node_t to;
  54853. +
  54854. + kdata = data;
  54855. + to = coord_last_unit_pos(coord);
  54856. + return kill_units(coord, coord->unit_pos, to, kdata, smallest_removed,
  54857. + NULL);
  54858. +}
  54859. +
  54860. +/* call item plugin to cut head of item */
  54861. +static pos_in_node_t
  54862. +kill_head(coord_t * coord, void *data, reiser4_key * smallest_removed,
  54863. + reiser4_key * new_first_key)
  54864. +{
  54865. + return kill_units(coord, 0, coord->unit_pos, data, smallest_removed,
  54866. + new_first_key);
  54867. +}
  54868. +
  54869. +/* this is used to cut item partially */
  54870. +static pos_in_node_t
  54871. +cut_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data,
  54872. + reiser4_key * smallest_removed, reiser4_key * new_first_key)
  54873. +{
  54874. + carry_cut_data *cdata;
  54875. + item_plugin *iplug;
  54876. +
  54877. + cdata = data;
  54878. + iplug = item_plugin_by_coord(coord);
  54879. + assert("vs-302", iplug->b.cut_units);
  54880. + return iplug->b.cut_units(coord, from, to, cdata, smallest_removed,
  54881. + new_first_key);
  54882. +}
  54883. +
  54884. +/* call item plugin to cut tail of file */
  54885. +static pos_in_node_t
  54886. +cut_tail(coord_t * coord, void *data, reiser4_key * smallest_removed)
  54887. +{
  54888. + carry_cut_data *cdata;
  54889. + pos_in_node_t to;
  54890. +
  54891. + cdata = data;
  54892. + to = coord_last_unit_pos(cdata->params.from);
  54893. + return cut_units(coord, coord->unit_pos, to, data, smallest_removed, NULL);
  54894. +}
  54895. +
  54896. +/* call item plugin to cut head of item */
  54897. +static pos_in_node_t
  54898. +cut_head(coord_t * coord, void *data, reiser4_key * smallest_removed,
  54899. + reiser4_key * new_first_key)
  54900. +{
  54901. + return cut_units(coord, 0, coord->unit_pos, data, smallest_removed,
  54902. + new_first_key);
  54903. +}
  54904. +
  54905. +/* this returns 1 of key of first item changed, 0 - if it did not */
  54906. +static int
  54907. +prepare_for_compact(struct cut40_info *cinfo,
  54908. + const struct cut_kill_params *params, int is_cut,
  54909. + void *data, carry_plugin_info * info)
  54910. +{
  54911. + znode *node;
  54912. + item_header40 *ih;
  54913. + pos_in_node_t freed;
  54914. + pos_in_node_t item_pos;
  54915. + coord_t coord;
  54916. + reiser4_key new_first_key;
  54917. + pos_in_node_t(*kill_units_f) (coord_t *, pos_in_node_t, pos_in_node_t,
  54918. + void *, reiser4_key *, reiser4_key *);
  54919. + pos_in_node_t(*kill_tail_f) (coord_t *, void *, reiser4_key *);
  54920. + pos_in_node_t(*kill_head_f) (coord_t *, void *, reiser4_key *,
  54921. + reiser4_key *);
  54922. + int retval;
  54923. +
  54924. + retval = 0;
  54925. +
  54926. + node = params->from->node;
  54927. +
  54928. + assert("vs-184", node == params->to->node);
  54929. + assert("vs-312", !node_is_empty(node));
  54930. + assert("vs-297",
  54931. + coord_compare(params->from, params->to) != COORD_CMP_ON_RIGHT);
  54932. +
  54933. + if (is_cut) {
  54934. + kill_units_f = cut_units;
  54935. + kill_tail_f = cut_tail;
  54936. + kill_head_f = cut_head;
  54937. + } else {
  54938. + kill_units_f = kill_units;
  54939. + kill_tail_f = kill_tail;
  54940. + kill_head_f = kill_head;
  54941. + }
  54942. +
  54943. + if (parse_cut(cinfo, params) == 1) {
  54944. + /* cut from the middle of item */
  54945. + freed =
  54946. + kill_units_f(params->from, params->from->unit_pos,
  54947. + params->to->unit_pos, data,
  54948. + params->smallest_removed, NULL);
  54949. +
  54950. + item_pos = params->from->item_pos;
  54951. + ih = node40_ih_at(node, item_pos);
  54952. + cinfo->freed_space_start =
  54953. + ih40_get_offset(ih) + node40_item_length(node,
  54954. + item_pos) - freed;
  54955. + cinfo->freed_space_end = cinfo->freed_space_start + freed;
  54956. + cinfo->first_moved = item_pos + 1;
  54957. + } else {
  54958. + assert("vs-1521", (cinfo->tail_removed != MAX_POS_IN_NODE ||
  54959. + cinfo->first_removed != MAX_POS_IN_NODE ||
  54960. + cinfo->head_removed != MAX_POS_IN_NODE));
  54961. +
  54962. + switch (cinfo->mode) {
  54963. + case CMODE_TAIL:
  54964. + /* one item gets cut partially from its end */
  54965. + assert("vs-1562",
  54966. + cinfo->tail_removed == params->from->item_pos);
  54967. +
  54968. + freed =
  54969. + kill_tail_f(params->from, data,
  54970. + params->smallest_removed);
  54971. +
  54972. + item_pos = cinfo->tail_removed;
  54973. + ih = node40_ih_at(node, item_pos);
  54974. + cinfo->freed_space_start =
  54975. + ih40_get_offset(ih) + node40_item_length(node,
  54976. + item_pos) -
  54977. + freed;
  54978. + cinfo->freed_space_end =
  54979. + cinfo->freed_space_start + freed;
  54980. + cinfo->first_moved = cinfo->tail_removed + 1;
  54981. + break;
  54982. +
  54983. + case CMODE_WHOLE:
  54984. + /* one or more items get removed completely */
  54985. + assert("vs-1563",
  54986. + cinfo->first_removed == params->from->item_pos);
  54987. + assert("vs-1564", cinfo->removed_count > 0
  54988. + && cinfo->removed_count != MAX_POS_IN_NODE);
  54989. +
  54990. + /* call kill hook for all items removed completely */
  54991. + if (is_cut == 0)
  54992. + call_kill_hooks(node, cinfo->first_removed,
  54993. + cinfo->removed_count, data);
  54994. +
  54995. + item_pos = cinfo->first_removed;
  54996. + ih = node40_ih_at(node, item_pos);
  54997. +
  54998. + if (params->smallest_removed)
  54999. + memcpy(params->smallest_removed, &ih->key,
  55000. + sizeof(reiser4_key));
  55001. +
  55002. + cinfo->freed_space_start = ih40_get_offset(ih);
  55003. +
  55004. + item_pos += (cinfo->removed_count - 1);
  55005. + ih -= (cinfo->removed_count - 1);
  55006. + cinfo->freed_space_end =
  55007. + ih40_get_offset(ih) + node40_item_length(node,
  55008. + item_pos);
  55009. + cinfo->first_moved = item_pos + 1;
  55010. + if (cinfo->first_removed == 0)
  55011. + /* key of first item of the node changes */
  55012. + retval = 1;
  55013. + break;
  55014. +
  55015. + case CMODE_HEAD:
  55016. + /* one item gets cut partially from its head */
  55017. + assert("vs-1565",
  55018. + cinfo->head_removed == params->from->item_pos);
  55019. +
  55020. + freed =
  55021. + kill_head_f(params->to, data,
  55022. + params->smallest_removed,
  55023. + &new_first_key);
  55024. +
  55025. + item_pos = cinfo->head_removed;
  55026. + ih = node40_ih_at(node, item_pos);
  55027. + cinfo->freed_space_start = ih40_get_offset(ih);
  55028. + cinfo->freed_space_end = ih40_get_offset(ih) + freed;
  55029. + cinfo->first_moved = cinfo->head_removed + 1;
  55030. +
  55031. + /* item head is removed, therefore, item key changed */
  55032. + coord.node = node;
  55033. + coord_set_item_pos(&coord, item_pos);
  55034. + coord.unit_pos = 0;
  55035. + coord.between = AT_UNIT;
  55036. + update_item_key_node40(&coord, &new_first_key, NULL);
  55037. + if (item_pos == 0)
  55038. + /* key of first item of the node changes */
  55039. + retval = 1;
  55040. + break;
  55041. +
  55042. + case CMODE_TAIL | CMODE_WHOLE:
  55043. + /* one item gets cut from its end and one or more items get removed completely */
  55044. + assert("vs-1566",
  55045. + cinfo->tail_removed == params->from->item_pos);
  55046. + assert("vs-1567",
  55047. + cinfo->first_removed == cinfo->tail_removed + 1);
  55048. + assert("vs-1564", cinfo->removed_count > 0
  55049. + && cinfo->removed_count != MAX_POS_IN_NODE);
  55050. +
  55051. + freed =
  55052. + kill_tail_f(params->from, data,
  55053. + params->smallest_removed);
  55054. +
  55055. + item_pos = cinfo->tail_removed;
  55056. + ih = node40_ih_at(node, item_pos);
  55057. + cinfo->freed_space_start =
  55058. + ih40_get_offset(ih) + node40_item_length(node,
  55059. + item_pos) -
  55060. + freed;
  55061. +
  55062. + /* call kill hook for all items removed completely */
  55063. + if (is_cut == 0)
  55064. + call_kill_hooks(node, cinfo->first_removed,
  55065. + cinfo->removed_count, data);
  55066. +
  55067. + item_pos += cinfo->removed_count;
  55068. + ih -= cinfo->removed_count;
  55069. + cinfo->freed_space_end =
  55070. + ih40_get_offset(ih) + node40_item_length(node,
  55071. + item_pos);
  55072. + cinfo->first_moved = item_pos + 1;
  55073. + break;
  55074. +
  55075. + case CMODE_WHOLE | CMODE_HEAD:
  55076. + /* one or more items get removed completely and one item gets cut partially from its head */
  55077. + assert("vs-1568",
  55078. + cinfo->first_removed == params->from->item_pos);
  55079. + assert("vs-1564", cinfo->removed_count > 0
  55080. + && cinfo->removed_count != MAX_POS_IN_NODE);
  55081. + assert("vs-1569",
  55082. + cinfo->head_removed ==
  55083. + cinfo->first_removed + cinfo->removed_count);
  55084. +
  55085. + /* call kill hook for all items removed completely */
  55086. + if (is_cut == 0)
  55087. + call_kill_hooks(node, cinfo->first_removed,
  55088. + cinfo->removed_count, data);
  55089. +
  55090. + item_pos = cinfo->first_removed;
  55091. + ih = node40_ih_at(node, item_pos);
  55092. +
  55093. + if (params->smallest_removed)
  55094. + memcpy(params->smallest_removed, &ih->key,
  55095. + sizeof(reiser4_key));
  55096. +
  55097. + freed =
  55098. + kill_head_f(params->to, data, NULL, &new_first_key);
  55099. +
  55100. + cinfo->freed_space_start = ih40_get_offset(ih);
  55101. +
  55102. + ih = node40_ih_at(node, cinfo->head_removed);
  55103. + /* this is the most complex case. Item which got head removed and items which are to be moved
  55104. + intact change their location differently. */
  55105. + cinfo->freed_space_end = ih40_get_offset(ih) + freed;
  55106. + cinfo->first_moved = cinfo->head_removed;
  55107. + cinfo->head_removed_location = cinfo->freed_space_start;
  55108. +
  55109. + /* item head is removed, therefore, item key changed */
  55110. + coord.node = node;
  55111. + coord_set_item_pos(&coord, cinfo->head_removed);
  55112. + coord.unit_pos = 0;
  55113. + coord.between = AT_UNIT;
  55114. + update_item_key_node40(&coord, &new_first_key, NULL);
  55115. +
  55116. + assert("vs-1579", cinfo->first_removed == 0);
  55117. + /* key of first item of the node changes */
  55118. + retval = 1;
  55119. + break;
  55120. +
  55121. + case CMODE_TAIL | CMODE_HEAD:
  55122. + /* one item get cut from its end and its neighbor gets cut from its tail */
  55123. + impossible("vs-1576", "this can not happen currently");
  55124. + break;
  55125. +
  55126. + case CMODE_TAIL | CMODE_WHOLE | CMODE_HEAD:
  55127. + impossible("vs-1577", "this can not happen currently");
  55128. + break;
  55129. + default:
  55130. + impossible("vs-1578", "unexpected cut mode");
  55131. + break;
  55132. + }
  55133. + }
  55134. + return retval;
  55135. +}
  55136. +
  55137. +/* plugin->u.node.kill
  55138. + return value is number of items removed completely */
  55139. +int kill_node40(struct carry_kill_data *kdata, carry_plugin_info * info)
  55140. +{
  55141. + znode *node;
  55142. + struct cut40_info cinfo;
  55143. + int first_key_changed;
  55144. +
  55145. + node = kdata->params.from->node;
  55146. +
  55147. + first_key_changed =
  55148. + prepare_for_compact(&cinfo, &kdata->params, 0 /* not cut */ , kdata,
  55149. + info);
  55150. + compact(node, &cinfo);
  55151. +
  55152. + if (info) {
  55153. + /* it is not called by node40_shift, so we have to take care
  55154. + of changes on upper levels */
  55155. + if (node_is_empty(node)
  55156. + && !(kdata->flags & DELETE_RETAIN_EMPTY))
  55157. + /* all contents of node is deleted */
  55158. + prepare_removal_node40(node, info);
  55159. + else if (first_key_changed) {
  55160. + prepare_for_update(NULL, node, info);
  55161. + }
  55162. + }
  55163. +
  55164. + coord_clear_iplug(kdata->params.from);
  55165. + coord_clear_iplug(kdata->params.to);
  55166. +
  55167. + znode_make_dirty(node);
  55168. + return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count;
  55169. +}
  55170. +
  55171. +/* plugin->u.node.cut
  55172. + return value is number of items removed completely */
  55173. +int cut_node40(struct carry_cut_data *cdata, carry_plugin_info * info)
  55174. +{
  55175. + znode *node;
  55176. + struct cut40_info cinfo;
  55177. + int first_key_changed;
  55178. +
  55179. + node = cdata->params.from->node;
  55180. +
  55181. + first_key_changed =
  55182. + prepare_for_compact(&cinfo, &cdata->params, 1 /* not cut */ , cdata,
  55183. + info);
  55184. + compact(node, &cinfo);
  55185. +
  55186. + if (info) {
  55187. + /* it is not called by node40_shift, so we have to take care
  55188. + of changes on upper levels */
  55189. + if (node_is_empty(node))
  55190. + /* all contents of node is deleted */
  55191. + prepare_removal_node40(node, info);
  55192. + else if (first_key_changed) {
  55193. + prepare_for_update(NULL, node, info);
  55194. + }
  55195. + }
  55196. +
  55197. + coord_clear_iplug(cdata->params.from);
  55198. + coord_clear_iplug(cdata->params.to);
  55199. +
  55200. + znode_make_dirty(node);
  55201. + return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count;
  55202. +}
  55203. +
  55204. +/* this structure is used by shift method of node40 plugin */
  55205. +struct shift_params {
  55206. + shift_direction pend; /* when @pend == append - we are shifting to
  55207. + left, when @pend == prepend - to right */
  55208. + coord_t wish_stop; /* when shifting to left this is last unit we
  55209. + want shifted, when shifting to right - this
  55210. + is set to unit we want to start shifting
  55211. + from */
  55212. + znode *target;
  55213. + int everything; /* it is set to 1 if everything we have to shift is
  55214. + shifted, 0 - otherwise */
  55215. +
  55216. + /* FIXME-VS: get rid of read_stop */
  55217. +
  55218. + /* these are set by estimate_shift */
  55219. + coord_t real_stop; /* this will be set to last unit which will be
  55220. + really shifted */
  55221. +
  55222. + /* coordinate in source node before operation of unit which becomes
  55223. + first after shift to left of last after shift to right */
  55224. + union {
  55225. + coord_t future_first;
  55226. + coord_t future_last;
  55227. + } u;
  55228. +
  55229. + unsigned merging_units; /* number of units of first item which have to
  55230. + be merged with last item of target node */
  55231. + unsigned merging_bytes; /* number of bytes in those units */
  55232. +
  55233. + unsigned entire; /* items shifted in their entirety */
  55234. + unsigned entire_bytes; /* number of bytes in those items */
  55235. +
  55236. + unsigned part_units; /* number of units of partially copied item */
  55237. + unsigned part_bytes; /* number of bytes in those units */
  55238. +
  55239. + unsigned shift_bytes; /* total number of bytes in items shifted (item
  55240. + headers not included) */
  55241. +
  55242. +};
  55243. +
  55244. +static int item_creation_overhead(coord_t *item)
  55245. +{
  55246. + return node_plugin_by_coord(item)->item_overhead(item->node, NULL);
  55247. +}
  55248. +
  55249. +/* how many units are there in @source starting from source->unit_pos
  55250. + but not further than @stop_coord */
  55251. +static int
  55252. +wanted_units(coord_t *source, coord_t *stop_coord, shift_direction pend)
  55253. +{
  55254. + if (pend == SHIFT_LEFT) {
  55255. + assert("vs-181", source->unit_pos == 0);
  55256. + } else {
  55257. + assert("vs-182",
  55258. + source->unit_pos == coord_last_unit_pos(source));
  55259. + }
  55260. +
  55261. + if (source->item_pos != stop_coord->item_pos) {
  55262. + /* @source and @stop_coord are different items */
  55263. + return coord_last_unit_pos(source) + 1;
  55264. + }
  55265. +
  55266. + if (pend == SHIFT_LEFT) {
  55267. + return stop_coord->unit_pos + 1;
  55268. + } else {
  55269. + return source->unit_pos - stop_coord->unit_pos + 1;
  55270. + }
  55271. +}
  55272. +
  55273. +/* this calculates what can be copied from @shift->wish_stop.node to
  55274. + @shift->target */
  55275. +static void
  55276. +estimate_shift(struct shift_params *shift, const reiser4_context * ctx)
  55277. +{
  55278. + unsigned target_free_space, size;
  55279. + pos_in_node_t stop_item; /* item which estimating should not consider */
  55280. + unsigned want; /* number of units of item we want shifted */
  55281. + coord_t source; /* item being estimated */
  55282. + item_plugin *iplug;
  55283. +
  55284. + /* shifting to left/right starts from first/last units of
  55285. + @shift->wish_stop.node */
  55286. + if (shift->pend == SHIFT_LEFT) {
  55287. + coord_init_first_unit(&source, shift->wish_stop.node);
  55288. + } else {
  55289. + coord_init_last_unit(&source, shift->wish_stop.node);
  55290. + }
  55291. + shift->real_stop = source;
  55292. +
  55293. + /* free space in target node and number of items in source */
  55294. + target_free_space = znode_free_space(shift->target);
  55295. +
  55296. + shift->everything = 0;
  55297. + if (!node_is_empty(shift->target)) {
  55298. + /* target node is not empty, check for boundary items
  55299. + mergeability */
  55300. + coord_t to;
  55301. +
  55302. + /* item we try to merge @source with */
  55303. + if (shift->pend == SHIFT_LEFT) {
  55304. + coord_init_last_unit(&to, shift->target);
  55305. + } else {
  55306. + coord_init_first_unit(&to, shift->target);
  55307. + }
  55308. +
  55309. + if ((shift->pend == SHIFT_LEFT) ? are_items_mergeable(&to,
  55310. + &source) :
  55311. + are_items_mergeable(&source, &to)) {
  55312. + /* how many units of @source do we want to merge to
  55313. + item @to */
  55314. + want =
  55315. + wanted_units(&source, &shift->wish_stop,
  55316. + shift->pend);
  55317. +
  55318. + /* how many units of @source we can merge to item
  55319. + @to */
  55320. + iplug = item_plugin_by_coord(&source);
  55321. + if (iplug->b.can_shift != NULL)
  55322. + shift->merging_units =
  55323. + iplug->b.can_shift(target_free_space,
  55324. + &source, shift->target,
  55325. + shift->pend, &size,
  55326. + want);
  55327. + else {
  55328. + shift->merging_units = 0;
  55329. + size = 0;
  55330. + }
  55331. + shift->merging_bytes = size;
  55332. + shift->shift_bytes += size;
  55333. + /* update stop coord to be set to last unit of @source
  55334. + we can merge to @target */
  55335. + if (shift->merging_units)
  55336. + /* at least one unit can be shifted */
  55337. + shift->real_stop.unit_pos =
  55338. + (shift->merging_units - source.unit_pos -
  55339. + 1) * shift->pend;
  55340. + else {
  55341. + /* nothing can be shifted */
  55342. + if (shift->pend == SHIFT_LEFT)
  55343. + coord_init_before_first_item(&shift->
  55344. + real_stop,
  55345. + source.
  55346. + node);
  55347. + else
  55348. + coord_init_after_last_item(&shift->
  55349. + real_stop,
  55350. + source.node);
  55351. + }
  55352. + assert("nikita-2081", shift->real_stop.unit_pos + 1);
  55353. +
  55354. + if (shift->merging_units != want) {
  55355. + /* we could not copy as many as we want, so,
  55356. + there is no reason for estimating any
  55357. + longer */
  55358. + return;
  55359. + }
  55360. +
  55361. + target_free_space -= size;
  55362. + coord_add_item_pos(&source, shift->pend);
  55363. + }
  55364. + }
  55365. +
  55366. + /* number of item nothing of which we want to shift */
  55367. + stop_item = shift->wish_stop.item_pos + shift->pend;
  55368. +
  55369. + /* calculate how many items can be copied into given free
  55370. + space as whole */
  55371. + for (; source.item_pos != stop_item;
  55372. + coord_add_item_pos(&source, shift->pend)) {
  55373. + if (shift->pend == SHIFT_RIGHT)
  55374. + source.unit_pos = coord_last_unit_pos(&source);
  55375. +
  55376. + /* how many units of @source do we want to copy */
  55377. + want = wanted_units(&source, &shift->wish_stop, shift->pend);
  55378. +
  55379. + if (want == coord_last_unit_pos(&source) + 1) {
  55380. + /* we want this item to be copied entirely */
  55381. + size =
  55382. + item_length_by_coord(&source) +
  55383. + item_creation_overhead(&source);
  55384. + if (size <= target_free_space) {
  55385. + /* item fits into target node as whole */
  55386. + target_free_space -= size;
  55387. + shift->shift_bytes +=
  55388. + size - item_creation_overhead(&source);
  55389. + shift->entire_bytes +=
  55390. + size - item_creation_overhead(&source);
  55391. + shift->entire++;
  55392. +
  55393. + /* update shift->real_stop coord to be set to
  55394. + last unit of @source we can merge to
  55395. + @target */
  55396. + shift->real_stop = source;
  55397. + if (shift->pend == SHIFT_LEFT)
  55398. + shift->real_stop.unit_pos =
  55399. + coord_last_unit_pos(&shift->
  55400. + real_stop);
  55401. + else
  55402. + shift->real_stop.unit_pos = 0;
  55403. + continue;
  55404. + }
  55405. + }
  55406. +
  55407. + /* we reach here only for an item which does not fit into
  55408. + target node in its entirety. This item may be either
  55409. + partially shifted, or not shifted at all. We will have to
  55410. + create new item in target node, so decrease amout of free
  55411. + space by an item creation overhead. We can reach here also
  55412. + if stop coord is in this item */
  55413. + if (target_free_space >=
  55414. + (unsigned)item_creation_overhead(&source)) {
  55415. + target_free_space -= item_creation_overhead(&source);
  55416. + iplug = item_plugin_by_coord(&source);
  55417. + if (iplug->b.can_shift) {
  55418. + shift->part_units = iplug->b.can_shift(target_free_space,
  55419. + &source,
  55420. + NULL, /* target */
  55421. + shift->pend,
  55422. + &size,
  55423. + want);
  55424. + } else {
  55425. + target_free_space = 0;
  55426. + shift->part_units = 0;
  55427. + size = 0;
  55428. + }
  55429. + } else {
  55430. + target_free_space = 0;
  55431. + shift->part_units = 0;
  55432. + size = 0;
  55433. + }
  55434. + shift->part_bytes = size;
  55435. + shift->shift_bytes += size;
  55436. +
  55437. + /* set @shift->real_stop to last unit of @source we can merge
  55438. + to @shift->target */
  55439. + if (shift->part_units) {
  55440. + shift->real_stop = source;
  55441. + shift->real_stop.unit_pos =
  55442. + (shift->part_units - source.unit_pos -
  55443. + 1) * shift->pend;
  55444. + assert("nikita-2082", shift->real_stop.unit_pos + 1);
  55445. + }
  55446. +
  55447. + if (want != shift->part_units)
  55448. + /* not everything wanted were shifted */
  55449. + return;
  55450. + break;
  55451. + }
  55452. +
  55453. + shift->everything = 1;
  55454. +}
  55455. +
  55456. +static void
  55457. +copy_units(coord_t * target, coord_t * source, unsigned from, unsigned count,
  55458. + shift_direction dir, unsigned free_space)
  55459. +{
  55460. + item_plugin *iplug;
  55461. +
  55462. + assert("nikita-1463", target != NULL);
  55463. + assert("nikita-1464", source != NULL);
  55464. + assert("nikita-1465", from + count <= coord_num_units(source));
  55465. +
  55466. + iplug = item_plugin_by_coord(source);
  55467. + assert("nikita-1468", iplug == item_plugin_by_coord(target));
  55468. + iplug->b.copy_units(target, source, from, count, dir, free_space);
  55469. +
  55470. + if (dir == SHIFT_RIGHT) {
  55471. + /* FIXME-VS: this looks not necessary. update_item_key was
  55472. + called already by copy_units method */
  55473. + reiser4_key split_key;
  55474. +
  55475. + assert("nikita-1469", target->unit_pos == 0);
  55476. +
  55477. + unit_key_by_coord(target, &split_key);
  55478. + node_plugin_by_coord(target)->update_item_key(target,
  55479. + &split_key, NULL);
  55480. + }
  55481. +}
  55482. +
  55483. +/* copy part of @shift->real_stop.node starting either from its beginning or
  55484. + from its end and ending at @shift->real_stop to either the end or the
  55485. + beginning of @shift->target */
  55486. +static void copy(struct shift_params *shift, size_t node_header_size)
  55487. +{
  55488. + node40_header *nh;
  55489. + coord_t from;
  55490. + coord_t to;
  55491. + item_header40 *from_ih, *to_ih;
  55492. + int free_space_start;
  55493. + int new_items;
  55494. + unsigned old_items;
  55495. + int old_offset;
  55496. + unsigned i;
  55497. +
  55498. + nh = node40_node_header(shift->target);
  55499. + free_space_start = nh40_get_free_space_start(nh);
  55500. + old_items = nh40_get_num_items(nh);
  55501. + new_items = shift->entire + (shift->part_units ? 1 : 0);
  55502. + assert("vs-185",
  55503. + shift->shift_bytes ==
  55504. + shift->merging_bytes + shift->entire_bytes + shift->part_bytes);
  55505. +
  55506. + from = shift->wish_stop;
  55507. +
  55508. + coord_init_first_unit(&to, shift->target);
  55509. +
  55510. + /* NOTE:NIKITA->VS not sure what I am doing: shift->target is empty,
  55511. + hence to.between is set to EMPTY_NODE above. Looks like we want it
  55512. + to be AT_UNIT.
  55513. +
  55514. + Oh, wonders of ->betweeness...
  55515. +
  55516. + */
  55517. + to.between = AT_UNIT;
  55518. +
  55519. + if (shift->pend == SHIFT_LEFT) {
  55520. + /* copying to left */
  55521. +
  55522. + coord_set_item_pos(&from, 0);
  55523. + from_ih = node40_ih_at(from.node, 0);
  55524. +
  55525. + coord_set_item_pos(&to,
  55526. + node40_num_of_items_internal(to.node) - 1);
  55527. + if (shift->merging_units) {
  55528. + /* expand last item, so that plugin methods will see
  55529. + correct data */
  55530. + free_space_start += shift->merging_bytes;
  55531. + nh40_set_free_space_start(nh,
  55532. + (unsigned)free_space_start);
  55533. + nh40_set_free_space(nh,
  55534. + nh40_get_free_space(nh) -
  55535. + shift->merging_bytes);
  55536. +
  55537. + /* appending last item of @target */
  55538. + copy_units(&to, &from, 0, /* starting from 0-th unit */
  55539. + shift->merging_units, SHIFT_LEFT,
  55540. + shift->merging_bytes);
  55541. + coord_inc_item_pos(&from);
  55542. + from_ih--;
  55543. + coord_inc_item_pos(&to);
  55544. + }
  55545. +
  55546. + to_ih = node40_ih_at(shift->target, old_items);
  55547. + if (shift->entire) {
  55548. + /* copy @entire items entirely */
  55549. +
  55550. + /* copy item headers */
  55551. + memcpy(to_ih - shift->entire + 1,
  55552. + from_ih - shift->entire + 1,
  55553. + shift->entire * sizeof(item_header40));
  55554. + /* update item header offset */
  55555. + old_offset = ih40_get_offset(from_ih);
  55556. + /* AUDIT: Looks like if we calculate old_offset + free_space_start here instead of just old_offset, we can perform one "add" operation less per each iteration */
  55557. + for (i = 0; i < shift->entire; i++, to_ih--, from_ih--)
  55558. + ih40_set_offset(to_ih,
  55559. + ih40_get_offset(from_ih) -
  55560. + old_offset + free_space_start);
  55561. +
  55562. + /* copy item bodies */
  55563. + memcpy(zdata(shift->target) + free_space_start, zdata(from.node) + old_offset, /*ih40_get_offset (from_ih), */
  55564. + shift->entire_bytes);
  55565. +
  55566. + coord_add_item_pos(&from, (int)shift->entire);
  55567. + coord_add_item_pos(&to, (int)shift->entire);
  55568. + }
  55569. +
  55570. + nh40_set_free_space_start(nh,
  55571. + free_space_start +
  55572. + shift->shift_bytes -
  55573. + shift->merging_bytes);
  55574. + nh40_set_free_space(nh,
  55575. + nh40_get_free_space(nh) -
  55576. + (shift->shift_bytes - shift->merging_bytes +
  55577. + sizeof(item_header40) * new_items));
  55578. +
  55579. + /* update node header */
  55580. + node40_set_num_items(shift->target, nh, old_items + new_items);
  55581. + assert("vs-170",
  55582. + nh40_get_free_space(nh) < znode_size(shift->target));
  55583. +
  55584. + if (shift->part_units) {
  55585. + /* copy heading part (@part units) of @source item as
  55586. + a new item into @target->node */
  55587. +
  55588. + /* copy item header of partially copied item */
  55589. + coord_set_item_pos(&to,
  55590. + node40_num_of_items_internal(to.node)
  55591. + - 1);
  55592. + memcpy(to_ih, from_ih, sizeof(item_header40));
  55593. + ih40_set_offset(to_ih,
  55594. + nh40_get_free_space_start(nh) -
  55595. + shift->part_bytes);
  55596. + if (item_plugin_by_coord(&to)->b.init)
  55597. + item_plugin_by_coord(&to)->b.init(&to, &from,
  55598. + NULL);
  55599. + copy_units(&to, &from, 0, shift->part_units, SHIFT_LEFT,
  55600. + shift->part_bytes);
  55601. + }
  55602. +
  55603. + } else {
  55604. + /* copying to right */
  55605. +
  55606. + coord_set_item_pos(&from,
  55607. + node40_num_of_items_internal(from.node) - 1);
  55608. + from_ih = node40_ih_at_coord(&from);
  55609. +
  55610. + coord_set_item_pos(&to, 0);
  55611. +
  55612. + /* prepare space for new items */
  55613. + memmove(zdata(to.node) + node_header_size +
  55614. + shift->shift_bytes,
  55615. + zdata(to.node) + node_header_size,
  55616. + free_space_start - node_header_size);
  55617. + /* update item headers of moved items */
  55618. + to_ih = node40_ih_at(to.node, 0);
  55619. + /* first item gets @merging_bytes longer. free space appears
  55620. + at its beginning */
  55621. + if (!node_is_empty(to.node))
  55622. + ih40_set_offset(to_ih,
  55623. + ih40_get_offset(to_ih) +
  55624. + shift->shift_bytes -
  55625. + shift->merging_bytes);
  55626. +
  55627. + for (i = 1; i < old_items; i++)
  55628. + ih40_set_offset(to_ih - i,
  55629. + ih40_get_offset(to_ih - i) +
  55630. + shift->shift_bytes);
  55631. +
  55632. + /* move item headers to make space for new items */
  55633. + memmove(to_ih - old_items + 1 - new_items,
  55634. + to_ih - old_items + 1,
  55635. + sizeof(item_header40) * old_items);
  55636. + to_ih -= (new_items - 1);
  55637. +
  55638. + nh40_set_free_space_start(nh,
  55639. + free_space_start +
  55640. + shift->shift_bytes);
  55641. + nh40_set_free_space(nh,
  55642. + nh40_get_free_space(nh) -
  55643. + (shift->shift_bytes +
  55644. + sizeof(item_header40) * new_items));
  55645. +
  55646. + /* update node header */
  55647. + node40_set_num_items(shift->target, nh, old_items + new_items);
  55648. + assert("vs-170",
  55649. + nh40_get_free_space(nh) < znode_size(shift->target));
  55650. +
  55651. + if (shift->merging_units) {
  55652. + coord_add_item_pos(&to, new_items);
  55653. + to.unit_pos = 0;
  55654. + to.between = AT_UNIT;
  55655. + /* prepend first item of @to */
  55656. + copy_units(&to, &from,
  55657. + coord_last_unit_pos(&from) -
  55658. + shift->merging_units + 1,
  55659. + shift->merging_units, SHIFT_RIGHT,
  55660. + shift->merging_bytes);
  55661. + coord_dec_item_pos(&from);
  55662. + from_ih++;
  55663. + }
  55664. +
  55665. + if (shift->entire) {
  55666. + /* copy @entire items entirely */
  55667. +
  55668. + /* copy item headers */
  55669. + memcpy(to_ih, from_ih,
  55670. + shift->entire * sizeof(item_header40));
  55671. +
  55672. + /* update item header offset */
  55673. + old_offset =
  55674. + ih40_get_offset(from_ih + shift->entire - 1);
  55675. + /* AUDIT: old_offset + sizeof (node40_header) + shift->part_bytes calculation can be taken off the loop. */
  55676. + for (i = 0; i < shift->entire; i++, to_ih++, from_ih++)
  55677. + ih40_set_offset(to_ih,
  55678. + ih40_get_offset(from_ih) -
  55679. + old_offset +
  55680. + node_header_size +
  55681. + shift->part_bytes);
  55682. + /* copy item bodies */
  55683. + coord_add_item_pos(&from, -(int)(shift->entire - 1));
  55684. + memcpy(zdata(to.node) + node_header_size +
  55685. + shift->part_bytes, item_by_coord_node40(&from),
  55686. + shift->entire_bytes);
  55687. + coord_dec_item_pos(&from);
  55688. + }
  55689. +
  55690. + if (shift->part_units) {
  55691. + coord_set_item_pos(&to, 0);
  55692. + to.unit_pos = 0;
  55693. + to.between = AT_UNIT;
  55694. + /* copy heading part (@part units) of @source item as
  55695. + a new item into @target->node */
  55696. +
  55697. + /* copy item header of partially copied item */
  55698. + memcpy(to_ih, from_ih, sizeof(item_header40));
  55699. + ih40_set_offset(to_ih, node_header_size);
  55700. + if (item_plugin_by_coord(&to)->b.init)
  55701. + item_plugin_by_coord(&to)->b.init(&to, &from,
  55702. + NULL);
  55703. + copy_units(&to, &from,
  55704. + coord_last_unit_pos(&from) -
  55705. + shift->part_units + 1, shift->part_units,
  55706. + SHIFT_RIGHT, shift->part_bytes);
  55707. + }
  55708. + }
  55709. +}
  55710. +
  55711. +/* remove everything either before or after @fact_stop. Number of items
  55712. + removed completely is returned */
  55713. +static int delete_copied(struct shift_params *shift)
  55714. +{
  55715. + coord_t from;
  55716. + coord_t to;
  55717. + struct carry_cut_data cdata;
  55718. +
  55719. + if (shift->pend == SHIFT_LEFT) {
  55720. + /* we were shifting to left, remove everything from the
  55721. + beginning of @shift->wish_stop->node upto
  55722. + @shift->wish_stop */
  55723. + coord_init_first_unit(&from, shift->real_stop.node);
  55724. + to = shift->real_stop;
  55725. +
  55726. + /* store old coordinate of unit which will be first after
  55727. + shift to left */
  55728. + shift->u.future_first = to;
  55729. + coord_next_unit(&shift->u.future_first);
  55730. + } else {
  55731. + /* we were shifting to right, remove everything from
  55732. + @shift->stop_coord upto to end of
  55733. + @shift->stop_coord->node */
  55734. + from = shift->real_stop;
  55735. + coord_init_last_unit(&to, from.node);
  55736. +
  55737. + /* store old coordinate of unit which will be last after
  55738. + shift to right */
  55739. + shift->u.future_last = from;
  55740. + coord_prev_unit(&shift->u.future_last);
  55741. + }
  55742. +
  55743. + cdata.params.from = &from;
  55744. + cdata.params.to = &to;
  55745. + cdata.params.from_key = NULL;
  55746. + cdata.params.to_key = NULL;
  55747. + cdata.params.smallest_removed = NULL;
  55748. + return cut_node40(&cdata, NULL);
  55749. +}
  55750. +
  55751. +/* something was moved between @left and @right. Add carry operation to @info
  55752. + list to have carry to update delimiting key between them */
  55753. +static int
  55754. +prepare_for_update(znode * left, znode * right, carry_plugin_info * info)
  55755. +{
  55756. + carry_op *op;
  55757. + carry_node *cn;
  55758. +
  55759. + if (info == NULL)
  55760. + /* nowhere to send operation to. */
  55761. + return 0;
  55762. +
  55763. + if (!should_notify_parent(right))
  55764. + return 0;
  55765. +
  55766. + op = node_post_carry(info, COP_UPDATE, right, 1);
  55767. + if (IS_ERR(op) || op == NULL)
  55768. + return op ? PTR_ERR(op) : -EIO;
  55769. +
  55770. + if (left != NULL) {
  55771. + carry_node *reference;
  55772. +
  55773. + if (info->doing)
  55774. + reference = insert_carry_node(info->doing,
  55775. + info->todo, left);
  55776. + else
  55777. + reference = op->node;
  55778. + assert("nikita-2992", reference != NULL);
  55779. + cn = reiser4_add_carry(info->todo, POOLO_BEFORE, reference);
  55780. + if (IS_ERR(cn))
  55781. + return PTR_ERR(cn);
  55782. + cn->parent = 1;
  55783. + cn->node = left;
  55784. + if (ZF_ISSET(left, JNODE_ORPHAN))
  55785. + cn->left_before = 1;
  55786. + op->u.update.left = cn;
  55787. + } else
  55788. + op->u.update.left = NULL;
  55789. + return 0;
  55790. +}
  55791. +
  55792. +/* plugin->u.node.prepare_removal
  55793. + to delete a pointer to @empty from the tree add corresponding carry
  55794. + operation (delete) to @info list */
  55795. +int prepare_removal_node40(znode * empty, carry_plugin_info * info)
  55796. +{
  55797. + carry_op *op;
  55798. + reiser4_tree *tree;
  55799. +
  55800. + if (!should_notify_parent(empty))
  55801. + return 0;
  55802. + /* already on a road to Styx */
  55803. + if (ZF_ISSET(empty, JNODE_HEARD_BANSHEE))
  55804. + return 0;
  55805. + op = node_post_carry(info, COP_DELETE, empty, 1);
  55806. + if (IS_ERR(op) || op == NULL)
  55807. + return RETERR(op ? PTR_ERR(op) : -EIO);
  55808. +
  55809. + op->u.delete.child = NULL;
  55810. + op->u.delete.flags = 0;
  55811. +
  55812. + /* fare thee well */
  55813. + tree = znode_get_tree(empty);
  55814. + read_lock_tree(tree);
  55815. + write_lock_dk(tree);
  55816. + znode_set_ld_key(empty, znode_get_rd_key(empty));
  55817. + if (znode_is_left_connected(empty) && empty->left)
  55818. + znode_set_rd_key(empty->left, znode_get_rd_key(empty));
  55819. + write_unlock_dk(tree);
  55820. + read_unlock_tree(tree);
  55821. +
  55822. + ZF_SET(empty, JNODE_HEARD_BANSHEE);
  55823. + return 0;
  55824. +}
  55825. +
  55826. +/* something were shifted from @insert_coord->node to @shift->target, update
  55827. + @insert_coord correspondingly */
  55828. +static void
  55829. +adjust_coord(coord_t * insert_coord, struct shift_params *shift, int removed,
  55830. + int including_insert_coord)
  55831. +{
  55832. + /* item plugin was invalidated by shifting */
  55833. + coord_clear_iplug(insert_coord);
  55834. +
  55835. + if (node_is_empty(shift->wish_stop.node)) {
  55836. + assert("vs-242", shift->everything);
  55837. + if (including_insert_coord) {
  55838. + if (shift->pend == SHIFT_RIGHT) {
  55839. + /* set @insert_coord before first unit of
  55840. + @shift->target node */
  55841. + coord_init_before_first_item(insert_coord,
  55842. + shift->target);
  55843. + } else {
  55844. + /* set @insert_coord after last in target node */
  55845. + coord_init_after_last_item(insert_coord,
  55846. + shift->target);
  55847. + }
  55848. + } else {
  55849. + /* set @insert_coord inside of empty node. There is
  55850. + only one possible coord within an empty
  55851. + node. init_first_unit will set that coord */
  55852. + coord_init_first_unit(insert_coord,
  55853. + shift->wish_stop.node);
  55854. + }
  55855. + return;
  55856. + }
  55857. +
  55858. + if (shift->pend == SHIFT_RIGHT) {
  55859. + /* there was shifting to right */
  55860. + if (shift->everything) {
  55861. + /* everything wanted was shifted */
  55862. + if (including_insert_coord) {
  55863. + /* @insert_coord is set before first unit of
  55864. + @to node */
  55865. + coord_init_before_first_item(insert_coord,
  55866. + shift->target);
  55867. + insert_coord->between = BEFORE_UNIT;
  55868. + } else {
  55869. + /* @insert_coord is set after last unit of
  55870. + @insert->node */
  55871. + coord_init_last_unit(insert_coord,
  55872. + shift->wish_stop.node);
  55873. + insert_coord->between = AFTER_UNIT;
  55874. + }
  55875. + }
  55876. + return;
  55877. + }
  55878. +
  55879. + /* there was shifting to left */
  55880. + if (shift->everything) {
  55881. + /* everything wanted was shifted */
  55882. + if (including_insert_coord) {
  55883. + /* @insert_coord is set after last unit in @to node */
  55884. + coord_init_after_last_item(insert_coord, shift->target);
  55885. + } else {
  55886. + /* @insert_coord is set before first unit in the same
  55887. + node */
  55888. + coord_init_before_first_item(insert_coord,
  55889. + shift->wish_stop.node);
  55890. + }
  55891. + return;
  55892. + }
  55893. +
  55894. + /* FIXME-VS: the code below is complicated because with between ==
  55895. + AFTER_ITEM unit_pos is set to 0 */
  55896. +
  55897. + if (!removed) {
  55898. + /* no items were shifted entirely */
  55899. + assert("vs-195", shift->merging_units == 0
  55900. + || shift->part_units == 0);
  55901. +
  55902. + if (shift->real_stop.item_pos == insert_coord->item_pos) {
  55903. + if (shift->merging_units) {
  55904. + if (insert_coord->between == AFTER_UNIT) {
  55905. + assert("nikita-1441",
  55906. + insert_coord->unit_pos >=
  55907. + shift->merging_units);
  55908. + insert_coord->unit_pos -=
  55909. + shift->merging_units;
  55910. + } else if (insert_coord->between == BEFORE_UNIT) {
  55911. + assert("nikita-2090",
  55912. + insert_coord->unit_pos >
  55913. + shift->merging_units);
  55914. + insert_coord->unit_pos -=
  55915. + shift->merging_units;
  55916. + }
  55917. +
  55918. + assert("nikita-2083",
  55919. + insert_coord->unit_pos + 1);
  55920. + } else {
  55921. + if (insert_coord->between == AFTER_UNIT) {
  55922. + assert("nikita-1442",
  55923. + insert_coord->unit_pos >=
  55924. + shift->part_units);
  55925. + insert_coord->unit_pos -=
  55926. + shift->part_units;
  55927. + } else if (insert_coord->between == BEFORE_UNIT) {
  55928. + assert("nikita-2089",
  55929. + insert_coord->unit_pos >
  55930. + shift->part_units);
  55931. + insert_coord->unit_pos -=
  55932. + shift->part_units;
  55933. + }
  55934. +
  55935. + assert("nikita-2084",
  55936. + insert_coord->unit_pos + 1);
  55937. + }
  55938. + }
  55939. + return;
  55940. + }
  55941. +
  55942. + /* we shifted to left and there was no enough space for everything */
  55943. + switch (insert_coord->between) {
  55944. + case AFTER_UNIT:
  55945. + case BEFORE_UNIT:
  55946. + if (shift->real_stop.item_pos == insert_coord->item_pos)
  55947. + insert_coord->unit_pos -= shift->part_units;
  55948. + fallthrough;
  55949. + case AFTER_ITEM:
  55950. + coord_add_item_pos(insert_coord, -removed);
  55951. + break;
  55952. + default:
  55953. + impossible("nikita-2087", "not ready");
  55954. + }
  55955. + assert("nikita-2085", insert_coord->unit_pos + 1);
  55956. +}
  55957. +
  55958. +static int call_shift_hooks(struct shift_params *shift)
  55959. +{
  55960. + unsigned i, shifted;
  55961. + coord_t coord;
  55962. + item_plugin *iplug;
  55963. +
  55964. + assert("vs-275", !node_is_empty(shift->target));
  55965. +
  55966. + /* number of items shift touches */
  55967. + shifted =
  55968. + shift->entire + (shift->merging_units ? 1 : 0) +
  55969. + (shift->part_units ? 1 : 0);
  55970. +
  55971. + if (shift->pend == SHIFT_LEFT) {
  55972. + /* moved items are at the end */
  55973. + coord_init_last_unit(&coord, shift->target);
  55974. + coord.unit_pos = 0;
  55975. +
  55976. + assert("vs-279", shift->pend == 1);
  55977. + for (i = 0; i < shifted; i++) {
  55978. + unsigned from, count;
  55979. +
  55980. + iplug = item_plugin_by_coord(&coord);
  55981. + if (i == 0 && shift->part_units) {
  55982. + assert("vs-277",
  55983. + coord_num_units(&coord) ==
  55984. + shift->part_units);
  55985. + count = shift->part_units;
  55986. + from = 0;
  55987. + } else if (i == shifted - 1 && shift->merging_units) {
  55988. + count = shift->merging_units;
  55989. + from = coord_num_units(&coord) - count;
  55990. + } else {
  55991. + count = coord_num_units(&coord);
  55992. + from = 0;
  55993. + }
  55994. +
  55995. + if (iplug->b.shift_hook) {
  55996. + iplug->b.shift_hook(&coord, from, count,
  55997. + shift->wish_stop.node);
  55998. + }
  55999. + coord_add_item_pos(&coord, -shift->pend);
  56000. + }
  56001. + } else {
  56002. + /* moved items are at the beginning */
  56003. + coord_init_first_unit(&coord, shift->target);
  56004. +
  56005. + assert("vs-278", shift->pend == -1);
  56006. + for (i = 0; i < shifted; i++) {
  56007. + unsigned from, count;
  56008. +
  56009. + iplug = item_plugin_by_coord(&coord);
  56010. + if (i == 0 && shift->part_units) {
  56011. + assert("vs-277",
  56012. + coord_num_units(&coord) ==
  56013. + shift->part_units);
  56014. + count = coord_num_units(&coord);
  56015. + from = 0;
  56016. + } else if (i == shifted - 1 && shift->merging_units) {
  56017. + count = shift->merging_units;
  56018. + from = 0;
  56019. + } else {
  56020. + count = coord_num_units(&coord);
  56021. + from = 0;
  56022. + }
  56023. +
  56024. + if (iplug->b.shift_hook) {
  56025. + iplug->b.shift_hook(&coord, from, count,
  56026. + shift->wish_stop.node);
  56027. + }
  56028. + coord_add_item_pos(&coord, -shift->pend);
  56029. + }
  56030. + }
  56031. +
  56032. + return 0;
  56033. +}
  56034. +
  56035. +/* shift to left is completed. Return 1 if unit @old was moved to left neighbor */
  56036. +static int
  56037. +unit_moved_left(const struct shift_params *shift, const coord_t * old)
  56038. +{
  56039. + assert("vs-944", shift->real_stop.node == old->node);
  56040. +
  56041. + if (shift->real_stop.item_pos < old->item_pos)
  56042. + return 0;
  56043. + if (shift->real_stop.item_pos == old->item_pos) {
  56044. + if (shift->real_stop.unit_pos < old->unit_pos)
  56045. + return 0;
  56046. + }
  56047. + return 1;
  56048. +}
  56049. +
  56050. +/* shift to right is completed. Return 1 if unit @old was moved to right
  56051. + neighbor */
  56052. +static int
  56053. +unit_moved_right(const struct shift_params *shift, const coord_t * old)
  56054. +{
  56055. + assert("vs-944", shift->real_stop.node == old->node);
  56056. +
  56057. + if (shift->real_stop.item_pos > old->item_pos)
  56058. + return 0;
  56059. + if (shift->real_stop.item_pos == old->item_pos) {
  56060. + if (shift->real_stop.unit_pos > old->unit_pos)
  56061. + return 0;
  56062. + }
  56063. + return 1;
  56064. +}
  56065. +
  56066. +/* coord @old was set in node from which shift was performed. What was shifted
  56067. + is stored in @shift. Update @old correspondingly to performed shift */
  56068. +static coord_t *adjust_coord2(const struct shift_params *shift,
  56069. + const coord_t * old, coord_t * new)
  56070. +{
  56071. + coord_clear_iplug(new);
  56072. + new->between = old->between;
  56073. +
  56074. + coord_clear_iplug(new);
  56075. + if (old->node == shift->target) {
  56076. + if (shift->pend == SHIFT_LEFT) {
  56077. + /* coord which is set inside of left neighbor does not
  56078. + change during shift to left */
  56079. + coord_dup(new, old);
  56080. + return new;
  56081. + }
  56082. + new->node = old->node;
  56083. + coord_set_item_pos(new,
  56084. + old->item_pos + shift->entire +
  56085. + (shift->part_units ? 1 : 0));
  56086. + new->unit_pos = old->unit_pos;
  56087. + if (old->item_pos == 0 && shift->merging_units)
  56088. + new->unit_pos += shift->merging_units;
  56089. + return new;
  56090. + }
  56091. +
  56092. + assert("vs-977", old->node == shift->wish_stop.node);
  56093. + if (shift->pend == SHIFT_LEFT) {
  56094. + if (unit_moved_left(shift, old)) {
  56095. + /* unit @old moved to left neighbor. Calculate its
  56096. + coordinate there */
  56097. + new->node = shift->target;
  56098. + coord_set_item_pos(new,
  56099. + node_num_items(shift->target) -
  56100. + shift->entire -
  56101. + (shift->part_units ? 1 : 0) +
  56102. + old->item_pos);
  56103. +
  56104. + new->unit_pos = old->unit_pos;
  56105. + if (shift->merging_units) {
  56106. + coord_dec_item_pos(new);
  56107. + if (old->item_pos == 0) {
  56108. + /* unit_pos only changes if item got
  56109. + merged */
  56110. + new->unit_pos =
  56111. + coord_num_units(new) -
  56112. + (shift->merging_units -
  56113. + old->unit_pos);
  56114. + }
  56115. + }
  56116. + } else {
  56117. + /* unit @old did not move to left neighbor.
  56118. +
  56119. + Use _nocheck, because @old is outside of its node.
  56120. + */
  56121. + coord_dup_nocheck(new, old);
  56122. + coord_add_item_pos(new,
  56123. + -shift->u.future_first.item_pos);
  56124. + if (new->item_pos == 0)
  56125. + new->unit_pos -= shift->u.future_first.unit_pos;
  56126. + }
  56127. + } else {
  56128. + if (unit_moved_right(shift, old)) {
  56129. + /* unit @old moved to right neighbor */
  56130. + new->node = shift->target;
  56131. + coord_set_item_pos(new,
  56132. + old->item_pos -
  56133. + shift->real_stop.item_pos);
  56134. + if (new->item_pos == 0) {
  56135. + /* unit @old might change unit pos */
  56136. + coord_set_item_pos(new,
  56137. + old->unit_pos -
  56138. + shift->real_stop.unit_pos);
  56139. + }
  56140. + } else {
  56141. + /* unit @old did not move to right neighbor, therefore
  56142. + it did not change */
  56143. + coord_dup(new, old);
  56144. + }
  56145. + }
  56146. + coord_set_iplug(new, item_plugin_by_coord(new));
  56147. + return new;
  56148. +}
  56149. +
  56150. +/* this is called when shift is completed (something of source node is copied
  56151. + to target and deleted in source) to update all taps set in current
  56152. + context */
  56153. +static void update_taps(const struct shift_params *shift)
  56154. +{
  56155. + tap_t *tap;
  56156. + coord_t new;
  56157. +
  56158. + for_all_taps(tap) {
  56159. + /* update only taps set to nodes participating in shift */
  56160. + if (tap->coord->node == shift->wish_stop.node
  56161. + || tap->coord->node == shift->target)
  56162. + tap_to_coord(tap,
  56163. + adjust_coord2(shift, tap->coord, &new));
  56164. + }
  56165. +}
  56166. +
  56167. +#if REISER4_DEBUG
  56168. +
  56169. +struct shift_check {
  56170. + reiser4_key key;
  56171. + __u16 plugin_id;
  56172. + union {
  56173. + __u64 bytes;
  56174. + __u64 entries;
  56175. + void *unused;
  56176. + } u;
  56177. +};
  56178. +
  56179. +void *shift_check_prepare(const znode * left, const znode * right)
  56180. +{
  56181. + pos_in_node_t i, nr_items;
  56182. + int mergeable;
  56183. + struct shift_check *data;
  56184. + item_header40 *ih;
  56185. +
  56186. + if (node_is_empty(left) || node_is_empty(right))
  56187. + mergeable = 0;
  56188. + else {
  56189. + coord_t l, r;
  56190. +
  56191. + coord_init_last_unit(&l, left);
  56192. + coord_init_first_unit(&r, right);
  56193. + mergeable = are_items_mergeable(&l, &r);
  56194. + }
  56195. + nr_items =
  56196. + node40_num_of_items_internal(left) +
  56197. + node40_num_of_items_internal(right) - (mergeable ? 1 : 0);
  56198. + data =
  56199. + kmalloc(sizeof(struct shift_check) * nr_items,
  56200. + reiser4_ctx_gfp_mask_get());
  56201. + if (data != NULL) {
  56202. + coord_t coord;
  56203. + pos_in_node_t item_pos;
  56204. +
  56205. + coord_init_first_unit(&coord, left);
  56206. + i = 0;
  56207. +
  56208. + for (item_pos = 0;
  56209. + item_pos < node40_num_of_items_internal(left);
  56210. + item_pos++) {
  56211. +
  56212. + coord_set_item_pos(&coord, item_pos);
  56213. + ih = node40_ih_at_coord(&coord);
  56214. +
  56215. + data[i].key = ih->key;
  56216. + data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id));
  56217. + switch (data[i].plugin_id) {
  56218. + case CTAIL_ID:
  56219. + case FORMATTING_ID:
  56220. + data[i].u.bytes = coord_num_units(&coord);
  56221. + break;
  56222. + case EXTENT_POINTER_ID:
  56223. + data[i].u.bytes =
  56224. + reiser4_extent_size(&coord,
  56225. + coord_num_units(&coord));
  56226. + break;
  56227. + case COMPOUND_DIR_ID:
  56228. + data[i].u.entries = coord_num_units(&coord);
  56229. + break;
  56230. + default:
  56231. + data[i].u.unused = NULL;
  56232. + break;
  56233. + }
  56234. + i++;
  56235. + }
  56236. +
  56237. + coord_init_first_unit(&coord, right);
  56238. +
  56239. + if (mergeable) {
  56240. + assert("vs-1609", i != 0);
  56241. +
  56242. + ih = node40_ih_at_coord(&coord);
  56243. +
  56244. + assert("vs-1589",
  56245. + data[i - 1].plugin_id ==
  56246. + le16_to_cpu(get_unaligned(&ih->plugin_id)));
  56247. + switch (data[i - 1].plugin_id) {
  56248. + case CTAIL_ID:
  56249. + case FORMATTING_ID:
  56250. + data[i - 1].u.bytes += coord_num_units(&coord);
  56251. + break;
  56252. + case EXTENT_POINTER_ID:
  56253. + data[i - 1].u.bytes +=
  56254. + reiser4_extent_size(&coord,
  56255. + coord_num_units(&coord));
  56256. + break;
  56257. + case COMPOUND_DIR_ID:
  56258. + data[i - 1].u.entries +=
  56259. + coord_num_units(&coord);
  56260. + break;
  56261. + default:
  56262. + impossible("vs-1605", "wrong mergeable item");
  56263. + break;
  56264. + }
  56265. + item_pos = 1;
  56266. + } else
  56267. + item_pos = 0;
  56268. + for (; item_pos < node40_num_of_items_internal(right);
  56269. + item_pos++) {
  56270. +
  56271. + assert("vs-1604", i < nr_items);
  56272. + coord_set_item_pos(&coord, item_pos);
  56273. + ih = node40_ih_at_coord(&coord);
  56274. +
  56275. + data[i].key = ih->key;
  56276. + data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id));
  56277. + switch (data[i].plugin_id) {
  56278. + case CTAIL_ID:
  56279. + case FORMATTING_ID:
  56280. + data[i].u.bytes = coord_num_units(&coord);
  56281. + break;
  56282. + case EXTENT_POINTER_ID:
  56283. + data[i].u.bytes =
  56284. + reiser4_extent_size(&coord,
  56285. + coord_num_units(&coord));
  56286. + break;
  56287. + case COMPOUND_DIR_ID:
  56288. + data[i].u.entries = coord_num_units(&coord);
  56289. + break;
  56290. + default:
  56291. + data[i].u.unused = NULL;
  56292. + break;
  56293. + }
  56294. + i++;
  56295. + }
  56296. + assert("vs-1606", i == nr_items);
  56297. + }
  56298. + return data;
  56299. +}
  56300. +
  56301. +void shift_check(void *vp, const znode * left, const znode * right)
  56302. +{
  56303. + pos_in_node_t i, nr_items;
  56304. + coord_t coord;
  56305. + __u64 last_bytes;
  56306. + int mergeable;
  56307. + item_header40 *ih;
  56308. + pos_in_node_t item_pos;
  56309. + struct shift_check *data;
  56310. +
  56311. + data = (struct shift_check *)vp;
  56312. +
  56313. + if (data == NULL)
  56314. + return;
  56315. +
  56316. + if (node_is_empty(left) || node_is_empty(right))
  56317. + mergeable = 0;
  56318. + else {
  56319. + coord_t l, r;
  56320. +
  56321. + coord_init_last_unit(&l, left);
  56322. + coord_init_first_unit(&r, right);
  56323. + mergeable = are_items_mergeable(&l, &r);
  56324. + }
  56325. +
  56326. + nr_items =
  56327. + node40_num_of_items_internal(left) +
  56328. + node40_num_of_items_internal(right) - (mergeable ? 1 : 0);
  56329. +
  56330. + i = 0;
  56331. + last_bytes = 0;
  56332. +
  56333. + coord_init_first_unit(&coord, left);
  56334. +
  56335. + for (item_pos = 0; item_pos < node40_num_of_items_internal(left);
  56336. + item_pos++) {
  56337. +
  56338. + coord_set_item_pos(&coord, item_pos);
  56339. + ih = node40_ih_at_coord(&coord);
  56340. +
  56341. + assert("vs-1611", i == item_pos);
  56342. + assert("vs-1590", keyeq(&ih->key, &data[i].key));
  56343. + assert("vs-1591",
  56344. + le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id);
  56345. + if ((i < (node40_num_of_items_internal(left) - 1))
  56346. + || !mergeable) {
  56347. + switch (data[i].plugin_id) {
  56348. + case CTAIL_ID:
  56349. + case FORMATTING_ID:
  56350. + assert("vs-1592",
  56351. + data[i].u.bytes ==
  56352. + coord_num_units(&coord));
  56353. + break;
  56354. + case EXTENT_POINTER_ID:
  56355. + assert("vs-1593",
  56356. + data[i].u.bytes ==
  56357. + reiser4_extent_size(&coord,
  56358. + coord_num_units
  56359. + (&coord)));
  56360. + break;
  56361. + case COMPOUND_DIR_ID:
  56362. + assert("vs-1594",
  56363. + data[i].u.entries ==
  56364. + coord_num_units(&coord));
  56365. + break;
  56366. + default:
  56367. + break;
  56368. + }
  56369. + }
  56370. + if (item_pos == (node40_num_of_items_internal(left) - 1)
  56371. + && mergeable) {
  56372. + switch (data[i].plugin_id) {
  56373. + case CTAIL_ID:
  56374. + case FORMATTING_ID:
  56375. + last_bytes = coord_num_units(&coord);
  56376. + break;
  56377. + case EXTENT_POINTER_ID:
  56378. + last_bytes =
  56379. + reiser4_extent_size(&coord,
  56380. + coord_num_units(&coord));
  56381. + break;
  56382. + case COMPOUND_DIR_ID:
  56383. + last_bytes = coord_num_units(&coord);
  56384. + break;
  56385. + default:
  56386. + impossible("vs-1595", "wrong mergeable item");
  56387. + break;
  56388. + }
  56389. + }
  56390. + i++;
  56391. + }
  56392. +
  56393. + coord_init_first_unit(&coord, right);
  56394. + if (mergeable) {
  56395. + ih = node40_ih_at_coord(&coord);
  56396. +
  56397. + assert("vs-1589",
  56398. + data[i - 1].plugin_id == le16_to_cpu(get_unaligned(&ih->plugin_id)));
  56399. + assert("vs-1608", last_bytes != 0);
  56400. + switch (data[i - 1].plugin_id) {
  56401. + case CTAIL_ID:
  56402. + case FORMATTING_ID:
  56403. + assert("vs-1596",
  56404. + data[i - 1].u.bytes ==
  56405. + last_bytes + coord_num_units(&coord));
  56406. + break;
  56407. +
  56408. + case EXTENT_POINTER_ID:
  56409. + assert("vs-1597",
  56410. + data[i - 1].u.bytes ==
  56411. + last_bytes + reiser4_extent_size(&coord,
  56412. + coord_num_units
  56413. + (&coord)));
  56414. + break;
  56415. +
  56416. + case COMPOUND_DIR_ID:
  56417. + assert("vs-1598",
  56418. + data[i - 1].u.bytes ==
  56419. + last_bytes + coord_num_units(&coord));
  56420. + break;
  56421. + default:
  56422. + impossible("vs-1599", "wrong mergeable item");
  56423. + break;
  56424. + }
  56425. + item_pos = 1;
  56426. + } else
  56427. + item_pos = 0;
  56428. +
  56429. + for (; item_pos < node40_num_of_items_internal(right); item_pos++) {
  56430. +
  56431. + coord_set_item_pos(&coord, item_pos);
  56432. + ih = node40_ih_at_coord(&coord);
  56433. +
  56434. + assert("vs-1612", keyeq(&ih->key, &data[i].key));
  56435. + assert("vs-1613",
  56436. + le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id);
  56437. + switch (data[i].plugin_id) {
  56438. + case CTAIL_ID:
  56439. + case FORMATTING_ID:
  56440. + assert("vs-1600",
  56441. + data[i].u.bytes == coord_num_units(&coord));
  56442. + break;
  56443. + case EXTENT_POINTER_ID:
  56444. + assert("vs-1601",
  56445. + data[i].u.bytes ==
  56446. + reiser4_extent_size(&coord,
  56447. + coord_num_units
  56448. + (&coord)));
  56449. + break;
  56450. + case COMPOUND_DIR_ID:
  56451. + assert("vs-1602",
  56452. + data[i].u.entries == coord_num_units(&coord));
  56453. + break;
  56454. + default:
  56455. + break;
  56456. + }
  56457. + i++;
  56458. + }
  56459. +
  56460. + assert("vs-1603", i == nr_items);
  56461. + kfree(data);
  56462. +}
  56463. +
  56464. +#endif
  56465. +
  56466. +/*
  56467. + * common part of ->shift() for all nodes,
  56468. + * which contain node40_header at the beginning and
  56469. + * the table of item headers at the end
  56470. + */
  56471. +int shift_node40_common(coord_t *from, znode *to,
  56472. + shift_direction pend,
  56473. + int delete_child, /* if @from->node becomes empty,
  56474. + * it will be deleted from the
  56475. + * tree if this is set to 1 */
  56476. + int including_stop_coord,
  56477. + carry_plugin_info *info,
  56478. + size_t node_header_size)
  56479. +{
  56480. + struct shift_params shift;
  56481. + int result;
  56482. + znode *left, *right;
  56483. + znode *source;
  56484. + int target_empty;
  56485. +
  56486. + assert("nikita-2161", coord_check(from));
  56487. +
  56488. + memset(&shift, 0, sizeof(shift));
  56489. + shift.pend = pend;
  56490. + shift.wish_stop = *from;
  56491. + shift.target = to;
  56492. +
  56493. + assert("nikita-1473", znode_is_write_locked(from->node));
  56494. + assert("nikita-1474", znode_is_write_locked(to));
  56495. +
  56496. + source = from->node;
  56497. +
  56498. + /* set @shift.wish_stop to rightmost/leftmost unit among units we want
  56499. + shifted */
  56500. + if (pend == SHIFT_LEFT) {
  56501. + result = coord_set_to_left(&shift.wish_stop);
  56502. + left = to;
  56503. + right = from->node;
  56504. + } else {
  56505. + result = coord_set_to_right(&shift.wish_stop);
  56506. + left = from->node;
  56507. + right = to;
  56508. + }
  56509. +
  56510. + if (result) {
  56511. + /* move insertion coord even if there is nothing to move */
  56512. + if (including_stop_coord) {
  56513. + /* move insertion coord (@from) */
  56514. + if (pend == SHIFT_LEFT) {
  56515. + /* after last item in target node */
  56516. + coord_init_after_last_item(from, to);
  56517. + } else {
  56518. + /* before first item in target node */
  56519. + coord_init_before_first_item(from, to);
  56520. + }
  56521. + }
  56522. +
  56523. + if (delete_child && node_is_empty(shift.wish_stop.node))
  56524. + result =
  56525. + prepare_removal_node40(shift.wish_stop.node, info);
  56526. + else
  56527. + result = 0;
  56528. + /* there is nothing to shift */
  56529. + assert("nikita-2078", coord_check(from));
  56530. + return result;
  56531. + }
  56532. +
  56533. + target_empty = node_is_empty(to);
  56534. +
  56535. + /* when first node plugin with item body compression is implemented,
  56536. + this must be changed to call node specific plugin */
  56537. +
  56538. + /* shift->stop_coord is updated to last unit which really will be
  56539. + shifted */
  56540. + estimate_shift(&shift, get_current_context());
  56541. + if (!shift.shift_bytes) {
  56542. + /* we could not shift anything */
  56543. + assert("nikita-2079", coord_check(from));
  56544. + return 0;
  56545. + }
  56546. +
  56547. + copy(&shift, node_header_size);
  56548. +
  56549. + /* result value of this is important. It is used by adjust_coord below */
  56550. + result = delete_copied(&shift);
  56551. +
  56552. + assert("vs-1610", result >= 0);
  56553. + assert("vs-1471",
  56554. + ((reiser4_context *) current->journal_info)->magic ==
  56555. + context_magic);
  56556. +
  56557. + /* item which has been moved from one node to another might want to do
  56558. + something on that event. This can be done by item's shift_hook
  56559. + method, which will be now called for every moved items */
  56560. + call_shift_hooks(&shift);
  56561. +
  56562. + assert("vs-1472",
  56563. + ((reiser4_context *) current->journal_info)->magic ==
  56564. + context_magic);
  56565. +
  56566. + update_taps(&shift);
  56567. +
  56568. + assert("vs-1473",
  56569. + ((reiser4_context *) current->journal_info)->magic ==
  56570. + context_magic);
  56571. +
  56572. + /* adjust @from pointer in accordance with @including_stop_coord flag
  56573. + and amount of data which was really shifted */
  56574. + adjust_coord(from, &shift, result, including_stop_coord);
  56575. +
  56576. + if (target_empty)
  56577. + /*
  56578. + * items were shifted into empty node. Update delimiting key.
  56579. + */
  56580. + result = prepare_for_update(NULL, left, info);
  56581. +
  56582. + /* add update operation to @info, which is the list of operations to
  56583. + be performed on a higher level */
  56584. + result = prepare_for_update(left, right, info);
  56585. + if (!result && node_is_empty(source) && delete_child) {
  56586. + /* all contents of @from->node is moved to @to and @from->node
  56587. + has to be removed from the tree, so, on higher level we
  56588. + will be removing the pointer to node @from->node */
  56589. + result = prepare_removal_node40(source, info);
  56590. + }
  56591. + assert("nikita-2080", coord_check(from));
  56592. + return result ? result : (int)shift.shift_bytes;
  56593. +}
  56594. +
  56595. +/*
  56596. + * plugin->u.node.shift
  56597. + * look for description of this method in plugin/node/node.h
  56598. + */
  56599. +int shift_node40(coord_t *from, znode *to,
  56600. + shift_direction pend,
  56601. + int delete_child, /* if @from->node becomes empty,
  56602. + * it will be deleted from the
  56603. + * tree if this is set to 1 */
  56604. + int including_stop_coord,
  56605. + carry_plugin_info *info)
  56606. +{
  56607. + return shift_node40_common(from, to, pend, delete_child,
  56608. + including_stop_coord, info,
  56609. + sizeof(node40_header));
  56610. +}
  56611. +
  56612. +/* plugin->u.node.fast_insert()
  56613. + look for description of this method in plugin/node/node.h */
  56614. +int fast_insert_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
  56615. +{
  56616. + return 1;
  56617. +}
  56618. +
  56619. +/* plugin->u.node.fast_paste()
  56620. + look for description of this method in plugin/node/node.h */
  56621. +int fast_paste_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
  56622. +{
  56623. + return 1;
  56624. +}
  56625. +
  56626. +/* plugin->u.node.fast_cut()
  56627. + look for description of this method in plugin/node/node.h */
  56628. +int fast_cut_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
  56629. +{
  56630. + return 1;
  56631. +}
  56632. +
  56633. +/* plugin->u.node.modify - not defined */
  56634. +
  56635. +/* plugin->u.node.max_item_size */
  56636. +int max_item_size_node40(void)
  56637. +{
  56638. + return reiser4_get_current_sb()->s_blocksize - sizeof(node40_header) -
  56639. + sizeof(item_header40);
  56640. +}
  56641. +
  56642. +/* plugin->u.node.set_item_plugin */
  56643. +int set_item_plugin_node40(coord_t *coord, item_id id)
  56644. +{
  56645. + item_header40 *ih;
  56646. +
  56647. + ih = node40_ih_at_coord(coord);
  56648. + put_unaligned(cpu_to_le16(id), &ih->plugin_id);
  56649. + coord->iplugid = id;
  56650. + return 0;
  56651. +}
  56652. +
  56653. +/*
  56654. + Local variables:
  56655. + c-indentation-style: "K&R"
  56656. + mode-name: "LC"
  56657. + c-basic-offset: 8
  56658. + tab-width: 8
  56659. + fill-column: 120
  56660. + scroll-step: 1
  56661. + End:
  56662. +*/
  56663. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/node/node40.h linux-5.16.14/fs/reiser4/plugin/node/node40.h
  56664. --- linux-5.16.14.orig/fs/reiser4/plugin/node/node40.h 1970-01-01 01:00:00.000000000 +0100
  56665. +++ linux-5.16.14/fs/reiser4/plugin/node/node40.h 2022-03-12 13:26:19.683892804 +0100
  56666. @@ -0,0 +1,130 @@
  56667. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  56668. +
  56669. +#if !defined( __REISER4_NODE40_H__ )
  56670. +#define __REISER4_NODE40_H__
  56671. +
  56672. +#include "../../forward.h"
  56673. +#include "../../dformat.h"
  56674. +#include "node.h"
  56675. +
  56676. +#include <linux/types.h>
  56677. +
  56678. +/* format of node header for 40 node layouts. Keep bloat out of this struct. */
  56679. +typedef struct node40_header {
  56680. + /* identifier of node plugin. Must be located at the very beginning
  56681. + of a node. */
  56682. + common_node_header common_header; /* this is 16 bits */
  56683. + /* number of items. Should be first element in the node header,
  56684. + because we haven't yet finally decided whether it shouldn't go into
  56685. + common_header.
  56686. + */
  56687. +/* NIKITA-FIXME-HANS: Create a macro such that if there is only one
  56688. + * node format at compile time, and it is this one, accesses do not function dereference when
  56689. + * accessing these fields (and otherwise they do). Probably 80% of users will only have one node format at a time throughout the life of reiser4. */
  56690. + d16 nr_items;
  56691. + /* free space in node measured in bytes */
  56692. + d16 free_space;
  56693. + /* offset to start of free space in node */
  56694. + d16 free_space_start;
  56695. + /* for reiser4_fsck. When information about what is a free
  56696. + block is corrupted, and we try to recover everything even
  56697. + if marked as freed, then old versions of data may
  56698. + duplicate newer versions, and this field allows us to
  56699. + restore the newer version. Also useful for when users
  56700. + who don't have the new trashcan installed on their linux distro
  56701. + delete the wrong files and send us desperate emails
  56702. + offering $25 for them back. */
  56703. +
  56704. + /* magic field we need to tell formatted nodes NIKITA-FIXME-HANS: improve this comment */
  56705. + d32 magic;
  56706. + /* flushstamp is made of mk_id and write_counter. mk_id is an
  56707. + id generated randomly at mkreiserfs time. So we can just
  56708. + skip all nodes with different mk_id. write_counter is d64
  56709. + incrementing counter of writes on disk. It is used for
  56710. + choosing the newest data at fsck time. NIKITA-FIXME-HANS: why was field name changed but not comment? */
  56711. +
  56712. + d32 mkfs_id;
  56713. + d64 flush_id;
  56714. + /* node flags to be used by fsck (reiser4ck or reiser4fsck?)
  56715. + and repacker NIKITA-FIXME-HANS: say more or reference elsewhere that says more */
  56716. + d16 flags;
  56717. +
  56718. + /* 1 is leaf level, 2 is twig level, root is the numerically
  56719. + largest level */
  56720. + d8 level;
  56721. +
  56722. + d8 pad;
  56723. +} PACKED node40_header;
  56724. +
  56725. +/* item headers are not standard across all node layouts, pass
  56726. + pos_in_node to functions instead */
  56727. +typedef struct item_header40 {
  56728. + /* key of item */
  56729. + /* 0 */ reiser4_key key;
  56730. + /* offset from start of a node measured in 8-byte chunks */
  56731. + /* 24 */ d16 offset;
  56732. + /* 26 */ d16 flags;
  56733. + /* 28 */ d16 plugin_id;
  56734. +} PACKED item_header40;
  56735. +
  56736. +size_t item_overhead_node40(const znode * node, flow_t * aflow);
  56737. +size_t free_space_node40(znode * node);
  56738. +node_search_result lookup_node40(znode * node, const reiser4_key * key,
  56739. + lookup_bias bias, coord_t * coord);
  56740. +int num_of_items_node40(const znode * node);
  56741. +char *item_by_coord_node40(const coord_t * coord);
  56742. +int length_by_coord_node40(const coord_t * coord);
  56743. +item_plugin *plugin_by_coord_node40(const coord_t * coord);
  56744. +reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key);
  56745. +size_t estimate_node40(znode * node);
  56746. +int check_node40(const znode * node, __u32 flags, const char **error);
  56747. +int parse_node40_common(znode *node, const __u32 magic);
  56748. +int parse_node40(znode * node);
  56749. +int init_node40_common(znode *node, node_plugin *nplug,
  56750. + size_t node_header_size, const __u32 magic);
  56751. +int init_node40(znode *node);
  56752. +
  56753. +#ifdef GUESS_EXISTS
  56754. +int guess_node40_common(const znode *node, reiser4_node_id id,
  56755. + const __u32 magic);
  56756. +int guess_node40(const znode *node);
  56757. +#endif
  56758. +
  56759. +void change_item_size_node40(coord_t * coord, int by);
  56760. +int create_item_node40(coord_t * target, const reiser4_key * key,
  56761. + reiser4_item_data * data, carry_plugin_info * info);
  56762. +void update_item_key_node40(coord_t * target, const reiser4_key * key,
  56763. + carry_plugin_info * info);
  56764. +int kill_node40(struct carry_kill_data *, carry_plugin_info *);
  56765. +int cut_node40(struct carry_cut_data *, carry_plugin_info *);
  56766. +int shift_node40_common(coord_t *from, znode *to, shift_direction pend,
  56767. + int delete_child, int including_stop_coord,
  56768. + carry_plugin_info *info, size_t nh_size);
  56769. +int shift_node40(coord_t *from, znode *to, shift_direction pend,
  56770. + int delete_child, int including_stop_coord,
  56771. + carry_plugin_info *info);
  56772. +int fast_insert_node40(const coord_t * coord);
  56773. +int fast_paste_node40(const coord_t * coord);
  56774. +int fast_cut_node40(const coord_t * coord);
  56775. +int max_item_size_node40(void);
  56776. +int prepare_removal_node40(znode * empty, carry_plugin_info * info);
  56777. +int set_item_plugin_node40(coord_t * coord, item_id id);
  56778. +int shrink_item_node40(coord_t * coord, int delta);
  56779. +
  56780. +#if REISER4_DEBUG
  56781. +void *shift_check_prepare(const znode *left, const znode *right);
  56782. +void shift_check(void *vp, const znode *left, const znode *right);
  56783. +#endif
  56784. +
  56785. +/* __REISER4_NODE40_H__ */
  56786. +#endif
  56787. +/*
  56788. + Local variables:
  56789. + c-indentation-style: "K&R"
  56790. + mode-name: "LC"
  56791. + c-basic-offset: 8
  56792. + tab-width: 8
  56793. + fill-column: 120
  56794. + scroll-step: 1
  56795. + End:
  56796. +*/
  56797. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/node/node41.c linux-5.16.14/fs/reiser4/plugin/node/node41.c
  56798. --- linux-5.16.14.orig/fs/reiser4/plugin/node/node41.c 1970-01-01 01:00:00.000000000 +0100
  56799. +++ linux-5.16.14/fs/reiser4/plugin/node/node41.c 2022-03-12 13:26:19.683892804 +0100
  56800. @@ -0,0 +1,136 @@
  56801. +/*
  56802. + * Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README
  56803. + */
  56804. +
  56805. +#include "../../debug.h"
  56806. +#include "../../key.h"
  56807. +#include "../../coord.h"
  56808. +#include "../plugin_header.h"
  56809. +#include "../item/item.h"
  56810. +#include "node.h"
  56811. +#include "node41.h"
  56812. +#include "../plugin.h"
  56813. +#include "../../jnode.h"
  56814. +#include "../../znode.h"
  56815. +#include "../../pool.h"
  56816. +#include "../../carry.h"
  56817. +#include "../../tap.h"
  56818. +#include "../../tree.h"
  56819. +#include "../../super.h"
  56820. +#include "../../checksum.h"
  56821. +#include "../../reiser4.h"
  56822. +
  56823. +#include <linux/types.h>
  56824. +#include <linux/prefetch.h>
  56825. +
  56826. +/*
  56827. + * node41 layout it almost the same as node40:
  56828. + * node41_header is at the beginning and a table of item headers
  56829. + * is at the end. Ther difference is that node41_header contains
  56830. + * a 32-bit checksum (see node41.h)
  56831. + */
  56832. +
  56833. +static const __u32 REISER4_NODE41_MAGIC = 0x19051966;
  56834. +
  56835. +static inline node41_header *node41_node_header(const znode *node)
  56836. +{
  56837. + assert("edward-1634", node != NULL);
  56838. + assert("edward-1635", znode_page(node) != NULL);
  56839. + assert("edward-1636", zdata(node) != NULL);
  56840. +
  56841. + return (node41_header *)zdata(node);
  56842. +}
  56843. +
  56844. +int csum_node41(znode *node, int check)
  56845. +{
  56846. + __u32 cpu_csum;
  56847. +
  56848. + cpu_csum = reiser4_crc32c(get_current_super_private()->csum_tfm,
  56849. + ~0,
  56850. + zdata(node),
  56851. + sizeof(struct node40_header));
  56852. + cpu_csum = reiser4_crc32c(get_current_super_private()->csum_tfm,
  56853. + cpu_csum,
  56854. + zdata(node) + sizeof(struct node41_header),
  56855. + reiser4_get_current_sb()->s_blocksize -
  56856. + sizeof(node41_header));
  56857. + if (check)
  56858. + return cpu_csum == nh41_get_csum(node41_node_header(node));
  56859. + else {
  56860. + nh41_set_csum(node41_node_header(node), cpu_csum);
  56861. + return 1;
  56862. + }
  56863. +}
  56864. +
  56865. +/*
  56866. + * plugin->u.node.parse
  56867. + * look for description of this method in plugin/node/node.h
  56868. + */
  56869. +int parse_node41(znode *node /* node to parse */)
  56870. +{
  56871. + int ret;
  56872. +
  56873. + ret = csum_node41(node, 1/* check */);
  56874. + if (!ret) {
  56875. + warning("edward-1645",
  56876. + "block %llu: bad checksum. FSCK?",
  56877. + *jnode_get_block(ZJNODE(node)));
  56878. + reiser4_handle_error();
  56879. + return RETERR(-EIO);
  56880. + }
  56881. + return parse_node40_common(node, REISER4_NODE41_MAGIC);
  56882. +}
  56883. +
  56884. +/*
  56885. + * plugin->u.node.init
  56886. + * look for description of this method in plugin/node/node.h
  56887. + */
  56888. +int init_node41(znode *node /* node to initialise */)
  56889. +{
  56890. + return init_node40_common(node, node_plugin_by_id(NODE41_ID),
  56891. + sizeof(node41_header), REISER4_NODE41_MAGIC);
  56892. +}
  56893. +
  56894. +/*
  56895. + * plugin->u.node.shift
  56896. + * look for description of this method in plugin/node/node.h
  56897. + */
  56898. +int shift_node41(coord_t *from, znode *to,
  56899. + shift_direction pend,
  56900. + int delete_child, /* if @from->node becomes empty,
  56901. + * it will be deleted from the
  56902. + * tree if this is set to 1 */
  56903. + int including_stop_coord,
  56904. + carry_plugin_info *info)
  56905. +{
  56906. + return shift_node40_common(from, to, pend, delete_child,
  56907. + including_stop_coord, info,
  56908. + sizeof(node41_header));
  56909. +}
  56910. +
  56911. +#ifdef GUESS_EXISTS
  56912. +int guess_node41(const znode *node /* node to guess plugin of */)
  56913. +{
  56914. + return guess_node40_common(node, NODE41_ID, REISER4_NODE41_MAGIC);
  56915. +}
  56916. +#endif
  56917. +
  56918. +/*
  56919. + * plugin->u.node.max_item_size
  56920. + */
  56921. +int max_item_size_node41(void)
  56922. +{
  56923. + return reiser4_get_current_sb()->s_blocksize - sizeof(node41_header) -
  56924. + sizeof(item_header40);
  56925. +}
  56926. +
  56927. +/*
  56928. + Local variables:
  56929. + c-indentation-style: "K&R"
  56930. + mode-name: "LC"
  56931. + c-basic-offset: 8
  56932. + tab-width: 8
  56933. + fill-column: 80
  56934. + scroll-step: 1
  56935. + End:
  56936. +*/
  56937. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/node/node41.h linux-5.16.14/fs/reiser4/plugin/node/node41.h
  56938. --- linux-5.16.14.orig/fs/reiser4/plugin/node/node41.h 1970-01-01 01:00:00.000000000 +0100
  56939. +++ linux-5.16.14/fs/reiser4/plugin/node/node41.h 2022-03-12 13:26:19.683892804 +0100
  56940. @@ -0,0 +1,50 @@
  56941. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  56942. +
  56943. +#if !defined( __REISER4_NODE41_H__ )
  56944. +#define __REISER4_NODE41_H__
  56945. +
  56946. +#include "../../forward.h"
  56947. +#include "../../dformat.h"
  56948. +#include "node40.h"
  56949. +#include <linux/types.h>
  56950. +
  56951. +/*
  56952. + * node41 layout: the same as node40, but with 32-bit checksum
  56953. + */
  56954. +
  56955. +typedef struct node41_header {
  56956. + node40_header head;
  56957. + d32 csum;
  56958. +} PACKED node41_header;
  56959. +
  56960. +/*
  56961. + * functions to get/set fields of node41_header
  56962. + */
  56963. +#define nh41_get_csum(nh) le32_to_cpu(get_unaligned(&(nh)->csum))
  56964. +#define nh41_set_csum(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->csum)
  56965. +
  56966. +int init_node41(znode * node);
  56967. +int parse_node41(znode *node);
  56968. +int max_item_size_node41(void);
  56969. +int shift_node41(coord_t *from, znode *to, shift_direction pend,
  56970. + int delete_child, int including_stop_coord,
  56971. + carry_plugin_info *info);
  56972. +int csum_node41(znode *node, int check);
  56973. +
  56974. +#ifdef GUESS_EXISTS
  56975. +int guess_node41(const znode * node);
  56976. +#endif
  56977. +extern void reiser4_handle_error(void);
  56978. +
  56979. +/* __REISER4_NODE41_H__ */
  56980. +#endif
  56981. +/*
  56982. + Local variables:
  56983. + c-indentation-style: "K&R"
  56984. + mode-name: "LC"
  56985. + c-basic-offset: 8
  56986. + tab-width: 8
  56987. + fill-column: 80
  56988. + scroll-step: 1
  56989. + End:
  56990. +*/
  56991. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/node/node.c linux-5.16.14/fs/reiser4/plugin/node/node.c
  56992. --- linux-5.16.14.orig/fs/reiser4/plugin/node/node.c 1970-01-01 01:00:00.000000000 +0100
  56993. +++ linux-5.16.14/fs/reiser4/plugin/node/node.c 2022-03-12 13:26:19.681892799 +0100
  56994. @@ -0,0 +1,170 @@
  56995. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  56996. +
  56997. +/* Node plugin interface.
  56998. +
  56999. + Description: The tree provides the abstraction of flows, which it
  57000. + internally fragments into items which it stores in nodes.
  57001. +
  57002. + A key_atom is a piece of data bound to a single key.
  57003. +
  57004. + For reasonable space efficiency to be achieved it is often
  57005. + necessary to store key_atoms in the nodes in the form of items, where
  57006. + an item is a sequence of key_atoms of the same or similar type. It is
  57007. + more space-efficient, because the item can implement (very)
  57008. + efficient compression of key_atom's bodies using internal knowledge
  57009. + about their semantics, and it can often avoid having a key for each
  57010. + key_atom. Each type of item has specific operations implemented by its
  57011. + item handler (see balance.c).
  57012. +
  57013. + Rationale: the rest of the code (specifically balancing routines)
  57014. + accesses leaf level nodes through this interface. This way we can
  57015. + implement various block layouts and even combine various layouts
  57016. + within the same tree. Balancing/allocating algorithms should not
  57017. + care about peculiarities of splitting/merging specific item types,
  57018. + but rather should leave that to the item's item handler.
  57019. +
  57020. + Items, including those that provide the abstraction of flows, have
  57021. + the property that if you move them in part or in whole to another
  57022. + node, the balancing code invokes their is_left_mergeable()
  57023. + item_operation to determine if they are mergeable with their new
  57024. + neighbor in the node you have moved them to. For some items the
  57025. + is_left_mergeable() function always returns null.
  57026. +
  57027. + When moving the bodies of items from one node to another:
  57028. +
  57029. + if a partial item is shifted to another node the balancing code invokes
  57030. + an item handler method to handle the item splitting.
  57031. +
  57032. + if the balancing code needs to merge with an item in the node it
  57033. + is shifting to, it will invoke an item handler method to handle
  57034. + the item merging.
  57035. +
  57036. + if it needs to move whole item bodies unchanged, the balancing code uses xmemcpy()
  57037. + adjusting the item headers after the move is done using the node handler.
  57038. +*/
  57039. +
  57040. +#include "../../forward.h"
  57041. +#include "../../debug.h"
  57042. +#include "../../key.h"
  57043. +#include "../../coord.h"
  57044. +#include "../plugin_header.h"
  57045. +#include "../item/item.h"
  57046. +#include "node.h"
  57047. +#include "../plugin.h"
  57048. +#include "../../znode.h"
  57049. +#include "../../tree.h"
  57050. +#include "../../super.h"
  57051. +#include "../../reiser4.h"
  57052. +
  57053. +/**
  57054. + * leftmost_key_in_node - get the smallest key in node
  57055. + * @node:
  57056. + * @key: store result here
  57057. + *
  57058. + * Stores the leftmost key of @node in @key.
  57059. + */
  57060. +reiser4_key *leftmost_key_in_node(const znode *node, reiser4_key *key)
  57061. +{
  57062. + assert("nikita-1634", node != NULL);
  57063. + assert("nikita-1635", key != NULL);
  57064. +
  57065. + if (!node_is_empty(node)) {
  57066. + coord_t first_item;
  57067. +
  57068. + coord_init_first_unit(&first_item, (znode *) node);
  57069. + item_key_by_coord(&first_item, key);
  57070. + } else
  57071. + *key = *reiser4_max_key();
  57072. + return key;
  57073. +}
  57074. +
  57075. +node_plugin node_plugins[LAST_NODE_ID] = {
  57076. + [NODE40_ID] = {
  57077. + .h = {
  57078. + .type_id = REISER4_NODE_PLUGIN_TYPE,
  57079. + .id = NODE40_ID,
  57080. + .pops = NULL,
  57081. + .label = "unified",
  57082. + .desc = "unified node layout",
  57083. + .linkage = {NULL, NULL}
  57084. + },
  57085. + .item_overhead = item_overhead_node40,
  57086. + .free_space = free_space_node40,
  57087. + .lookup = lookup_node40,
  57088. + .num_of_items = num_of_items_node40,
  57089. + .item_by_coord = item_by_coord_node40,
  57090. + .length_by_coord = length_by_coord_node40,
  57091. + .plugin_by_coord = plugin_by_coord_node40,
  57092. + .key_at = key_at_node40,
  57093. + .estimate = estimate_node40,
  57094. + .check = check_node40,
  57095. + .parse = parse_node40,
  57096. + .init = init_node40,
  57097. +#ifdef GUESS_EXISTS
  57098. + .guess = guess_node40,
  57099. +#endif
  57100. + .change_item_size = change_item_size_node40,
  57101. + .create_item = create_item_node40,
  57102. + .update_item_key = update_item_key_node40,
  57103. + .cut_and_kill = kill_node40,
  57104. + .cut = cut_node40,
  57105. + .shift = shift_node40,
  57106. + .shrink_item = shrink_item_node40,
  57107. + .fast_insert = fast_insert_node40,
  57108. + .fast_paste = fast_paste_node40,
  57109. + .fast_cut = fast_cut_node40,
  57110. + .max_item_size = max_item_size_node40,
  57111. + .prepare_removal = prepare_removal_node40,
  57112. + .set_item_plugin = set_item_plugin_node40
  57113. + },
  57114. + [NODE41_ID] = {
  57115. + .h = {
  57116. + .type_id = REISER4_NODE_PLUGIN_TYPE,
  57117. + .id = NODE41_ID,
  57118. + .pops = NULL,
  57119. + .label = "node41",
  57120. + .desc = "node41 layout",
  57121. + .linkage = {NULL, NULL}
  57122. + },
  57123. + .item_overhead = item_overhead_node40,
  57124. + .free_space = free_space_node40,
  57125. + .lookup = lookup_node40,
  57126. + .num_of_items = num_of_items_node40,
  57127. + .item_by_coord = item_by_coord_node40,
  57128. + .length_by_coord = length_by_coord_node40,
  57129. + .plugin_by_coord = plugin_by_coord_node40,
  57130. + .key_at = key_at_node40,
  57131. + .estimate = estimate_node40,
  57132. + .check = NULL,
  57133. + .parse = parse_node41,
  57134. + .init = init_node41,
  57135. +#ifdef GUESS_EXISTS
  57136. + .guess = guess_node41,
  57137. +#endif
  57138. + .change_item_size = change_item_size_node40,
  57139. + .create_item = create_item_node40,
  57140. + .update_item_key = update_item_key_node40,
  57141. + .cut_and_kill = kill_node40,
  57142. + .cut = cut_node40,
  57143. + .shift = shift_node41,
  57144. + .shrink_item = shrink_item_node40,
  57145. + .fast_insert = fast_insert_node40,
  57146. + .fast_paste = fast_paste_node40,
  57147. + .fast_cut = fast_cut_node40,
  57148. + .max_item_size = max_item_size_node41,
  57149. + .prepare_removal = prepare_removal_node40,
  57150. + .set_item_plugin = set_item_plugin_node40,
  57151. + .csum = csum_node41
  57152. + }
  57153. +};
  57154. +
  57155. +/*
  57156. + Local variables:
  57157. + c-indentation-style: "K&R"
  57158. + mode-name: "LC"
  57159. + c-basic-offset: 8
  57160. + tab-width: 8
  57161. + fill-column: 120
  57162. + scroll-step: 1
  57163. + End:
  57164. +*/
  57165. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/node/node.h linux-5.16.14/fs/reiser4/plugin/node/node.h
  57166. --- linux-5.16.14.orig/fs/reiser4/plugin/node/node.h 1970-01-01 01:00:00.000000000 +0100
  57167. +++ linux-5.16.14/fs/reiser4/plugin/node/node.h 2022-03-12 13:26:19.682892801 +0100
  57168. @@ -0,0 +1,275 @@
  57169. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  57170. +
  57171. +/* We need a definition of the default node layout here. */
  57172. +
  57173. +/* Generally speaking, it is best to have free space in the middle of the
  57174. + node so that two sets of things can grow towards it, and to have the
  57175. + item bodies on the left so that the last one of them grows into free
  57176. + space. We optimize for the case where we append new items to the end
  57177. + of the node, or grow the last item, because it hurts nothing to so
  57178. + optimize and it is a common special case to do massive insertions in
  57179. + increasing key order (and one of cases more likely to have a real user
  57180. + notice the delay time for).
  57181. +
  57182. + formatted leaf default layout: (leaf1)
  57183. +
  57184. + |node header:item bodies:free space:key + pluginid + item offset|
  57185. +
  57186. + We grow towards the middle, optimizing layout for the case where we
  57187. + append new items to the end of the node. The node header is fixed
  57188. + length. Keys, and item offsets plus pluginids for the items
  57189. + corresponding to them are in increasing key order, and are fixed
  57190. + length. Item offsets are relative to start of node (16 bits creating
  57191. + a node size limit of 64k, 12 bits might be a better choice....). Item
  57192. + bodies are in decreasing key order. Item bodies have a variable size.
  57193. + There is a one to one to one mapping of keys to item offsets to item
  57194. + bodies. Item offsets consist of pointers to the zeroth byte of the
  57195. + item body. Item length equals the start of the next item minus the
  57196. + start of this item, except the zeroth item whose length equals the end
  57197. + of the node minus the start of that item (plus a byte). In other
  57198. + words, the item length is not recorded anywhere, and it does not need
  57199. + to be since it is computable.
  57200. +
  57201. + Leaf variable length items and keys layout : (lvar)
  57202. +
  57203. + |node header:key offset + item offset + pluginid triplets:free space:key bodies:item bodies|
  57204. +
  57205. + We grow towards the middle, optimizing layout for the case where we
  57206. + append new items to the end of the node. The node header is fixed
  57207. + length. Keys and item offsets for the items corresponding to them are
  57208. + in increasing key order, and keys are variable length. Item offsets
  57209. + are relative to start of node (16 bits). Item bodies are in
  57210. + decreasing key order. Item bodies have a variable size. There is a
  57211. + one to one to one mapping of keys to item offsets to item bodies.
  57212. + Item offsets consist of pointers to the zeroth byte of the item body.
  57213. + Item length equals the start of the next item's key minus the start of
  57214. + this item, except the zeroth item whose length equals the end of the
  57215. + node minus the start of that item (plus a byte).
  57216. +
  57217. + leaf compressed keys layout: (lcomp)
  57218. +
  57219. + |node header:key offset + key inherit + item offset pairs:free space:key bodies:item bodies|
  57220. +
  57221. + We grow towards the middle, optimizing layout for the case where we
  57222. + append new items to the end of the node. The node header is fixed
  57223. + length. Keys and item offsets for the items corresponding to them are
  57224. + in increasing key order, and keys are variable length. The "key
  57225. + inherit" field indicates how much of the key prefix is identical to
  57226. + the previous key (stem compression as described in "Managing
  57227. + Gigabytes" is used). key_inherit is a one byte integer. The
  57228. + intra-node searches performed through this layout are linear searches,
  57229. + and this is theorized to not hurt performance much due to the high
  57230. + cost of processor stalls on modern CPUs, and the small number of keys
  57231. + in a single node. Item offsets are relative to start of node (16
  57232. + bits). Item bodies are in decreasing key order. Item bodies have a
  57233. + variable size. There is a one to one to one mapping of keys to item
  57234. + offsets to item bodies. Item offsets consist of pointers to the
  57235. + zeroth byte of the item body. Item length equals the start of the
  57236. + next item minus the start of this item, except the zeroth item whose
  57237. + length equals the end of the node minus the start of that item (plus a
  57238. + byte). In other words, item length and key length is not recorded
  57239. + anywhere, and it does not need to be since it is computable.
  57240. +
  57241. + internal node default layout: (idef1)
  57242. +
  57243. + just like ldef1 except that item bodies are either blocknrs of
  57244. + children or extents, and moving them may require updating parent
  57245. + pointers in the nodes that they point to.
  57246. +*/
  57247. +
  57248. +/* There is an inherent 3-way tradeoff between optimizing and
  57249. + exchanging disks between different architectures and code
  57250. + complexity. This is optimal and simple and inexchangeable.
  57251. + Someone else can do the code for exchanging disks and make it
  57252. + complex. It would not be that hard. Using other than the PAGE_SIZE
  57253. + might be suboptimal.
  57254. +*/
  57255. +
  57256. +#if !defined( __REISER4_NODE_H__ )
  57257. +#define __REISER4_NODE_H__
  57258. +
  57259. +#define LEAF40_NODE_SIZE PAGE_CACHE_SIZE
  57260. +
  57261. +#include "../../dformat.h"
  57262. +#include "../plugin_header.h"
  57263. +
  57264. +#include <linux/types.h>
  57265. +
  57266. +typedef enum {
  57267. + NS_FOUND = 0,
  57268. + NS_NOT_FOUND = -ENOENT
  57269. +} node_search_result;
  57270. +
  57271. +/* Maximal possible space overhead for creation of new item in a node */
  57272. +#define REISER4_NODE_MAX_OVERHEAD ( sizeof( reiser4_key ) + 32 )
  57273. +
  57274. +typedef enum {
  57275. + REISER4_NODE_DKEYS = (1 << 0),
  57276. + REISER4_NODE_TREE_STABLE = (1 << 1)
  57277. +} reiser4_node_check_flag;
  57278. +
  57279. +/* cut and cut_and_kill have too long list of parameters. This structure is just to safe some space on stack */
  57280. +struct cut_list {
  57281. + coord_t *from;
  57282. + coord_t *to;
  57283. + const reiser4_key *from_key;
  57284. + const reiser4_key *to_key;
  57285. + reiser4_key *smallest_removed;
  57286. + carry_plugin_info *info;
  57287. + __u32 flags;
  57288. + struct inode *inode; /* this is to pass list of eflushed jnodes down to extent_kill_hook */
  57289. + lock_handle *left;
  57290. + lock_handle *right;
  57291. +};
  57292. +
  57293. +struct carry_cut_data;
  57294. +struct carry_kill_data;
  57295. +
  57296. +/* The responsibility of the node plugin is to store and give access
  57297. + to the sequence of items within the node. */
  57298. +typedef struct node_plugin {
  57299. + /* generic plugin fields */
  57300. + plugin_header h;
  57301. +
  57302. + /* calculates the amount of space that will be required to store an
  57303. + item which is in addition to the space consumed by the item body.
  57304. + (the space consumed by the item body can be gotten by calling
  57305. + item->estimate) */
  57306. + size_t(*item_overhead) (const znode * node, flow_t * f);
  57307. +
  57308. + /* returns free space by looking into node (i.e., without using
  57309. + znode->free_space). */
  57310. + size_t(*free_space) (znode * node);
  57311. + /* search within the node for the one item which might
  57312. + contain the key, invoking item->search_within to search within
  57313. + that item to see if it is in there */
  57314. + node_search_result(*lookup) (znode * node, const reiser4_key * key,
  57315. + lookup_bias bias, coord_t * coord);
  57316. + /* number of items in node */
  57317. + int (*num_of_items) (const znode * node);
  57318. +
  57319. + /* store information about item in @coord in @data */
  57320. + /* break into several node ops, don't add any more uses of this before doing so */
  57321. + /*int ( *item_at )( const coord_t *coord, reiser4_item_data *data ); */
  57322. + char *(*item_by_coord) (const coord_t * coord);
  57323. + int (*length_by_coord) (const coord_t * coord);
  57324. + item_plugin *(*plugin_by_coord) (const coord_t * coord);
  57325. +
  57326. + /* store item key in @key */
  57327. + reiser4_key *(*key_at) (const coord_t * coord, reiser4_key * key);
  57328. + /* conservatively estimate whether unit of what size can fit
  57329. + into node. This estimation should be performed without
  57330. + actually looking into the node's content (free space is saved in
  57331. + znode). */
  57332. + size_t(*estimate) (znode * node);
  57333. +
  57334. + /* performs every consistency check the node plugin author could
  57335. + imagine. Optional. */
  57336. + int (*check) (const znode * node, __u32 flags, const char **error);
  57337. +
  57338. + /* Called when node is read into memory and node plugin is
  57339. + already detected. This should read some data into znode (like free
  57340. + space counter) and, optionally, check data consistency.
  57341. + */
  57342. + int (*parse) (znode * node);
  57343. + /* This method is called on a new node to initialise plugin specific
  57344. + data (header, etc.) */
  57345. + int (*init) (znode * node);
  57346. + /* Check whether @node content conforms to this plugin format.
  57347. + Probably only useful after support for old V3.x formats is added.
  57348. + Uncomment after 4.0 only.
  57349. + */
  57350. + /* int ( *guess )( const znode *node ); */
  57351. +#if REISER4_DEBUG
  57352. + void (*print) (const char *prefix, const znode * node, __u32 flags);
  57353. +#endif
  57354. + /* change size of @item by @by bytes. @item->node has enough free
  57355. + space. When @by > 0 - free space is appended to end of item. When
  57356. + @by < 0 - item is truncated - it is assumed that last @by bytes if
  57357. + the item are freed already */
  57358. + void (*change_item_size) (coord_t * item, int by);
  57359. +
  57360. + /* create new item @length bytes long in coord @target */
  57361. + int (*create_item) (coord_t * target, const reiser4_key * key,
  57362. + reiser4_item_data * data, carry_plugin_info * info);
  57363. +
  57364. + /* update key of item. */
  57365. + void (*update_item_key) (coord_t * target, const reiser4_key * key,
  57366. + carry_plugin_info * info);
  57367. +
  57368. + int (*cut_and_kill) (struct carry_kill_data *, carry_plugin_info *);
  57369. + int (*cut) (struct carry_cut_data *, carry_plugin_info *);
  57370. +
  57371. + /*
  57372. + * shrink item pointed to by @coord by @delta bytes.
  57373. + */
  57374. + int (*shrink_item) (coord_t * coord, int delta);
  57375. +
  57376. + /* copy as much as possible but not more than up to @stop from
  57377. + @stop->node to @target. If (pend == append) then data from beginning of
  57378. + @stop->node are copied to the end of @target. If (pend == prepend) then
  57379. + data from the end of @stop->node are copied to the beginning of
  57380. + @target. Copied data are removed from @stop->node. Information
  57381. + about what to do on upper level is stored in @todo */
  57382. + int (*shift) (coord_t * stop, znode * target, shift_direction pend,
  57383. + int delete_node, int including_insert_coord,
  57384. + carry_plugin_info * info);
  57385. + /* return true if this node allows skip carry() in some situations
  57386. + (see fs/reiser4/tree.c:insert_by_coord()). Reiser3.x format
  57387. + emulation doesn't.
  57388. +
  57389. + This will speedup insertions that doesn't require updates to the
  57390. + parent, by bypassing initialisation of carry() structures. It's
  57391. + believed that majority of insertions will fit there.
  57392. +
  57393. + */
  57394. + int (*fast_insert) (const coord_t * coord);
  57395. + int (*fast_paste) (const coord_t * coord);
  57396. + int (*fast_cut) (const coord_t * coord);
  57397. + /* this limits max size of item which can be inserted into a node and
  57398. + number of bytes item in a node may be appended with */
  57399. + int (*max_item_size) (void);
  57400. + int (*prepare_removal) (znode * empty, carry_plugin_info * info);
  57401. + /* change plugin id of items which are in a node already. Currently it is Used in tail conversion for regular
  57402. + * files */
  57403. + int (*set_item_plugin) (coord_t * coord, item_id);
  57404. + /* calculate and check/update znode's checksum
  57405. + (if @check is true, then check, otherwise update) */
  57406. + int (*csum)(znode *node, int check);
  57407. +} node_plugin;
  57408. +
  57409. +typedef enum {
  57410. + NODE40_ID, /* standard unified node layout used for both,
  57411. + leaf and internal nodes */
  57412. + NODE41_ID, /* node layout with a checksum */
  57413. + LAST_NODE_ID
  57414. +} reiser4_node_id;
  57415. +
  57416. +extern reiser4_key *leftmost_key_in_node(const znode * node, reiser4_key * key);
  57417. +#if REISER4_DEBUG
  57418. +extern void print_node_content(const char *prefix, const znode * node,
  57419. + __u32 flags);
  57420. +#endif
  57421. +
  57422. +extern void indent_znode(const znode * node);
  57423. +
  57424. +typedef struct common_node_header {
  57425. + /*
  57426. + * identifier of node plugin. Must be located at the very beginning of
  57427. + * a node.
  57428. + */
  57429. + __le16 plugin_id;
  57430. +} common_node_header;
  57431. +
  57432. +/* __REISER4_NODE_H__ */
  57433. +#endif
  57434. +/*
  57435. + * Local variables:
  57436. + * c-indentation-style: "K&R"
  57437. + * mode-name: "LC"
  57438. + * c-basic-offset: 8
  57439. + * tab-width: 8
  57440. + * fill-column: 79
  57441. + * scroll-step: 1
  57442. + * End:
  57443. + */
  57444. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/object.c linux-5.16.14/fs/reiser4/plugin/object.c
  57445. --- linux-5.16.14.orig/fs/reiser4/plugin/object.c 1970-01-01 01:00:00.000000000 +0100
  57446. +++ linux-5.16.14/fs/reiser4/plugin/object.c 2022-03-12 13:26:19.683892804 +0100
  57447. @@ -0,0 +1,548 @@
  57448. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  57449. + * reiser4/README */
  57450. +
  57451. +/*
  57452. + * Examples of object plugins: file, directory, symlink, special file.
  57453. + *
  57454. + * Plugins associated with inode:
  57455. + *
  57456. + * Plugin of inode is plugin referenced by plugin-id field of on-disk
  57457. + * stat-data. How we store this plugin in in-core inode is not
  57458. + * important. Currently pointers are used, another variant is to store offsets
  57459. + * and do array lookup on each access.
  57460. + *
  57461. + * Now, each inode has one selected plugin: object plugin that
  57462. + * determines what type of file this object is: directory, regular etc.
  57463. + *
  57464. + * This main plugin can use other plugins that are thus subordinated to
  57465. + * it. Directory instance of object plugin uses hash; regular file
  57466. + * instance uses tail policy plugin.
  57467. + *
  57468. + * Object plugin is either taken from id in stat-data or guessed from
  57469. + * i_mode bits. Once it is established we ask it to install its
  57470. + * subordinate plugins, by looking again in stat-data or inheriting them
  57471. + * from parent.
  57472. + *
  57473. + * How new inode is initialized during ->read_inode():
  57474. + * 1 read stat-data and initialize inode fields: i_size, i_mode,
  57475. + * i_generation, capabilities etc.
  57476. + * 2 read plugin id from stat data or try to guess plugin id
  57477. + * from inode->i_mode bits if plugin id is missing.
  57478. + * 3 Call ->init_inode() method of stat-data plugin to initialise inode fields.
  57479. + *
  57480. + * NIKITA-FIXME-HANS: can you say a little about 1 being done before 3? What
  57481. + * if stat data does contain i_size, etc., due to it being an unusual plugin?
  57482. + *
  57483. + * 4 Call ->activate() method of object's plugin. Plugin is either read from
  57484. + * from stat-data or guessed from mode bits
  57485. + * 5 Call ->inherit() method of object plugin to inherit as yet un initialized
  57486. + * plugins from parent.
  57487. + *
  57488. + * Easy induction proves that on last step all plugins of inode would be
  57489. + * initialized.
  57490. + *
  57491. + * When creating new object:
  57492. + * 1 obtain object plugin id (see next period)
  57493. + * NIKITA-FIXME-HANS: period?
  57494. + * 2 ->install() this plugin
  57495. + * 3 ->inherit() the rest from the parent
  57496. + *
  57497. + * We need some examples of creating an object with default and non-default
  57498. + * plugin ids. Nikita, please create them.
  57499. + */
  57500. +
  57501. +#include "../inode.h"
  57502. +
  57503. +int _bugop(void)
  57504. +{
  57505. + BUG_ON(1);
  57506. + return 0;
  57507. +}
  57508. +
  57509. +#define bugop ((void *)_bugop)
  57510. +
  57511. +static int flow_by_inode_bugop(struct inode *inode, const char __user *buf,
  57512. + int user, loff_t size,
  57513. + loff_t off, rw_op op, flow_t *f)
  57514. +{
  57515. + BUG_ON(1);
  57516. + return 0;
  57517. +}
  57518. +
  57519. +static int key_by_inode_bugop(struct inode *inode, loff_t off, reiser4_key *key)
  57520. +{
  57521. + BUG_ON(1);
  57522. + return 0;
  57523. +}
  57524. +
  57525. +static int change_file(struct inode *inode,
  57526. + reiser4_plugin * plugin,
  57527. + pset_member memb)
  57528. +{
  57529. + /* cannot change object plugin of already existing object */
  57530. + if (memb == PSET_FILE)
  57531. + return RETERR(-EINVAL);
  57532. +
  57533. + /* Change PSET_CREATE */
  57534. + return aset_set_unsafe(&reiser4_inode_data(inode)->pset, memb, plugin);
  57535. +}
  57536. +
  57537. +static reiser4_plugin_ops file_plugin_ops = {
  57538. + .change = change_file
  57539. +};
  57540. +
  57541. +static struct inode_operations null_i_ops = {.create = NULL};
  57542. +static struct file_operations null_f_ops = {.owner = NULL};
  57543. +static struct address_space_operations null_a_ops = {.writepage = NULL};
  57544. +
  57545. +/*
  57546. + * Reiser4 provides for VFS either dispatcher, or common (fop,
  57547. + * iop, aop) method.
  57548. + *
  57549. + * Dispatchers (suffixed with "dispatch") pass management to
  57550. + * proper plugin in accordance with plugin table (pset) located
  57551. + * in the private part of inode.
  57552. + *
  57553. + * Common methods are NOT prefixed with "dispatch". They are
  57554. + * the same for all plugins of FILE interface, and, hence, no
  57555. + * dispatching is needed.
  57556. + */
  57557. +
  57558. +/*
  57559. + * VFS methods for regular files
  57560. + */
  57561. +static struct inode_operations regular_file_i_ops = {
  57562. + .permission = reiser4_permission_common,
  57563. + .setattr = reiser4_setattr_dispatch,
  57564. + .getattr = reiser4_getattr_common
  57565. +};
  57566. +static struct file_operations regular_file_f_ops = {
  57567. + .llseek = generic_file_llseek,
  57568. + .read_iter = reiser4_read_dispatch,
  57569. + .write = reiser4_write_dispatch,
  57570. + .unlocked_ioctl = reiser4_ioctl_dispatch,
  57571. +#ifdef CONFIG_COMPAT
  57572. + .compat_ioctl = reiser4_ioctl_dispatch,
  57573. +#endif
  57574. + .mmap = reiser4_mmap_dispatch,
  57575. + .open = reiser4_open_dispatch,
  57576. + .release = reiser4_release_dispatch,
  57577. + .fsync = reiser4_sync_file_common,
  57578. + .splice_read = generic_file_splice_read,
  57579. +};
  57580. +static struct address_space_operations regular_file_a_ops = {
  57581. + .writepage = reiser4_writepage,
  57582. + .readpage = reiser4_readpage_dispatch,
  57583. + //.sync_page = block_sync_page,
  57584. + .writepages = reiser4_writepages_dispatch,
  57585. + .set_page_dirty = reiser4_set_page_dirty,
  57586. + .readpages = reiser4_readpages_dispatch,
  57587. + .write_begin = reiser4_write_begin_dispatch,
  57588. + .write_end = reiser4_write_end_dispatch,
  57589. + .bmap = reiser4_bmap_dispatch,
  57590. + .invalidatepage = reiser4_invalidatepage,
  57591. + .releasepage = reiser4_releasepage,
  57592. + .migratepage = reiser4_migratepage,
  57593. + .batch_lock_tabu = 1
  57594. +};
  57595. +
  57596. +/* VFS methods for symlink files */
  57597. +static struct inode_operations symlink_file_i_ops = {
  57598. + .get_link = reiser4_get_link_common,
  57599. + .permission = reiser4_permission_common,
  57600. + .setattr = reiser4_setattr_common,
  57601. + .getattr = reiser4_getattr_common
  57602. +};
  57603. +
  57604. +/* VFS methods for special files */
  57605. +static struct inode_operations special_file_i_ops = {
  57606. + .permission = reiser4_permission_common,
  57607. + .setattr = reiser4_setattr_common,
  57608. + .getattr = reiser4_getattr_common
  57609. +};
  57610. +
  57611. +/* VFS methods for directories */
  57612. +static struct inode_operations directory_i_ops = {
  57613. + .create = reiser4_create_common,
  57614. + .lookup = reiser4_lookup_common,
  57615. + .link = reiser4_link_common,
  57616. + .unlink = reiser4_unlink_common,
  57617. + .symlink = reiser4_symlink_common,
  57618. + .mkdir = reiser4_mkdir_common,
  57619. + .rmdir = reiser4_unlink_common,
  57620. + .mknod = reiser4_mknod_common,
  57621. + .rename = reiser4_rename2_common,
  57622. + .permission = reiser4_permission_common,
  57623. + .setattr = reiser4_setattr_common,
  57624. + .getattr = reiser4_getattr_common
  57625. +};
  57626. +static struct file_operations directory_f_ops = {
  57627. + .llseek = reiser4_llseek_dir_common,
  57628. + .read = generic_read_dir,
  57629. + .iterate = reiser4_iterate_common,
  57630. + .release = reiser4_release_dir_common,
  57631. + .fsync = reiser4_sync_common
  57632. +};
  57633. +
  57634. +static int reiser4_writepages_directory(struct address_space *mapping,
  57635. + struct writeback_control *wbc)
  57636. +{
  57637. + return 0;
  57638. +}
  57639. +
  57640. +static struct address_space_operations directory_a_ops = {
  57641. + .writepages = reiser4_writepages_directory
  57642. +};
  57643. +
  57644. +/*
  57645. + * Definitions of object plugins.
  57646. + */
  57647. +
  57648. +file_plugin file_plugins[LAST_FILE_PLUGIN_ID] = {
  57649. + [UNIX_FILE_PLUGIN_ID] = {
  57650. + .h = {
  57651. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57652. + .id = UNIX_FILE_PLUGIN_ID,
  57653. + .groups = (1 << REISER4_REGULAR_FILE),
  57654. + .pops = &file_plugin_ops,
  57655. + .label = "reg",
  57656. + .desc = "regular file",
  57657. + .linkage = {NULL, NULL},
  57658. + },
  57659. + /*
  57660. + * invariant vfs ops
  57661. + */
  57662. + .inode_ops = &regular_file_i_ops,
  57663. + .file_ops = &regular_file_f_ops,
  57664. + .as_ops = &regular_file_a_ops,
  57665. + /*
  57666. + * private i_ops
  57667. + */
  57668. + .setattr = setattr_unix_file,
  57669. + .open = open_unix_file,
  57670. + .read = read_unix_file,
  57671. + .write = write_unix_file,
  57672. + .ioctl = ioctl_unix_file,
  57673. + .mmap = mmap_unix_file,
  57674. + .release = release_unix_file,
  57675. + /*
  57676. + * private f_ops
  57677. + */
  57678. + .readpage = readpage_unix_file,
  57679. + .readpages = readpages_unix_file,
  57680. + .writepages = writepages_unix_file,
  57681. + .write_begin = write_begin_unix_file,
  57682. + .write_end = write_end_unix_file,
  57683. + /*
  57684. + * private a_ops
  57685. + */
  57686. + .bmap = bmap_unix_file,
  57687. + /*
  57688. + * other private methods
  57689. + */
  57690. + .write_sd_by_inode = write_sd_by_inode_common,
  57691. + .flow_by_inode = flow_by_inode_unix_file,
  57692. + .key_by_inode = key_by_inode_and_offset_common,
  57693. + .set_plug_in_inode = set_plug_in_inode_common,
  57694. + .adjust_to_parent = adjust_to_parent_common,
  57695. + .create_object = reiser4_create_object_common,
  57696. + .delete_object = delete_object_unix_file,
  57697. + .add_link = reiser4_add_link_common,
  57698. + .rem_link = reiser4_rem_link_common,
  57699. + .owns_item = owns_item_unix_file,
  57700. + .can_add_link = can_add_link_common,
  57701. + .detach = NULL,
  57702. + .safelink = safelink_common,
  57703. + .estimate = {
  57704. + .create = estimate_create_common,
  57705. + .update = estimate_update_common,
  57706. + .unlink = estimate_unlink_common
  57707. + },
  57708. + .init_inode_data = init_inode_data_unix_file,
  57709. + .cut_tree_worker = cut_tree_worker_common,
  57710. + .wire = {
  57711. + .write = wire_write_common,
  57712. + .read = wire_read_common,
  57713. + .get = wire_get_common,
  57714. + .size = wire_size_common,
  57715. + .done = wire_done_common
  57716. + }
  57717. + },
  57718. + [DIRECTORY_FILE_PLUGIN_ID] = {
  57719. + .h = {
  57720. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57721. + .id = DIRECTORY_FILE_PLUGIN_ID,
  57722. + .groups = (1 << REISER4_DIRECTORY_FILE),
  57723. + .pops = &file_plugin_ops,
  57724. + .label = "dir",
  57725. + .desc = "directory",
  57726. + .linkage = {NULL, NULL}
  57727. + },
  57728. + .inode_ops = &null_i_ops,
  57729. + .file_ops = &null_f_ops,
  57730. + .as_ops = &null_a_ops,
  57731. +
  57732. + .write_sd_by_inode = write_sd_by_inode_common,
  57733. + .flow_by_inode = flow_by_inode_bugop,
  57734. + .key_by_inode = key_by_inode_bugop,
  57735. + .set_plug_in_inode = set_plug_in_inode_common,
  57736. + .adjust_to_parent = adjust_to_parent_common_dir,
  57737. + .create_object = reiser4_create_object_common,
  57738. + .delete_object = reiser4_delete_dir_common,
  57739. + .add_link = reiser4_add_link_common,
  57740. + .rem_link = rem_link_common_dir,
  57741. + .owns_item = owns_item_common_dir,
  57742. + .can_add_link = can_add_link_common,
  57743. + .can_rem_link = can_rem_link_common_dir,
  57744. + .detach = reiser4_detach_common_dir,
  57745. + .safelink = safelink_common,
  57746. + .estimate = {
  57747. + .create = estimate_create_common_dir,
  57748. + .update = estimate_update_common,
  57749. + .unlink = estimate_unlink_common_dir
  57750. + },
  57751. + .wire = {
  57752. + .write = wire_write_common,
  57753. + .read = wire_read_common,
  57754. + .get = wire_get_common,
  57755. + .size = wire_size_common,
  57756. + .done = wire_done_common
  57757. + },
  57758. + .init_inode_data = init_inode_ordering,
  57759. + .cut_tree_worker = cut_tree_worker_common,
  57760. + },
  57761. + [SYMLINK_FILE_PLUGIN_ID] = {
  57762. + .h = {
  57763. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57764. + .id = SYMLINK_FILE_PLUGIN_ID,
  57765. + .groups = (1 << REISER4_SYMLINK_FILE),
  57766. + .pops = &file_plugin_ops,
  57767. + .label = "symlink",
  57768. + .desc = "symbolic link",
  57769. + .linkage = {NULL,NULL}
  57770. + },
  57771. + .inode_ops = &symlink_file_i_ops,
  57772. + /* inode->i_fop of symlink is initialized
  57773. + by NULL in setup_inode_ops */
  57774. + .file_ops = &null_f_ops,
  57775. + .as_ops = &null_a_ops,
  57776. +
  57777. + .write_sd_by_inode = write_sd_by_inode_common,
  57778. + .set_plug_in_inode = set_plug_in_inode_common,
  57779. + .adjust_to_parent = adjust_to_parent_common,
  57780. + .create_object = reiser4_create_symlink,
  57781. + .delete_object = reiser4_delete_object_common,
  57782. + .add_link = reiser4_add_link_common,
  57783. + .rem_link = reiser4_rem_link_common,
  57784. + .can_add_link = can_add_link_common,
  57785. + .detach = NULL,
  57786. + .safelink = safelink_common,
  57787. + .estimate = {
  57788. + .create = estimate_create_common,
  57789. + .update = estimate_update_common,
  57790. + .unlink = estimate_unlink_common
  57791. + },
  57792. + .init_inode_data = init_inode_ordering,
  57793. + .cut_tree_worker = cut_tree_worker_common,
  57794. + .destroy_inode = destroy_inode_symlink,
  57795. + .wire = {
  57796. + .write = wire_write_common,
  57797. + .read = wire_read_common,
  57798. + .get = wire_get_common,
  57799. + .size = wire_size_common,
  57800. + .done = wire_done_common
  57801. + }
  57802. + },
  57803. + [SPECIAL_FILE_PLUGIN_ID] = {
  57804. + .h = {
  57805. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57806. + .id = SPECIAL_FILE_PLUGIN_ID,
  57807. + .groups = (1 << REISER4_SPECIAL_FILE),
  57808. + .pops = &file_plugin_ops,
  57809. + .label = "special",
  57810. + .desc =
  57811. + "special: fifo, device or socket",
  57812. + .linkage = {NULL, NULL}
  57813. + },
  57814. + .inode_ops = &special_file_i_ops,
  57815. + /* file_ops of special files (sockets, block, char, fifo) are
  57816. + initialized by init_special_inode. */
  57817. + .file_ops = &null_f_ops,
  57818. + .as_ops = &null_a_ops,
  57819. +
  57820. + .write_sd_by_inode = write_sd_by_inode_common,
  57821. + .set_plug_in_inode = set_plug_in_inode_common,
  57822. + .adjust_to_parent = adjust_to_parent_common,
  57823. + .create_object = reiser4_create_object_common,
  57824. + .delete_object = reiser4_delete_object_common,
  57825. + .add_link = reiser4_add_link_common,
  57826. + .rem_link = reiser4_rem_link_common,
  57827. + .owns_item = owns_item_common,
  57828. + .can_add_link = can_add_link_common,
  57829. + .detach = NULL,
  57830. + .safelink = safelink_common,
  57831. + .estimate = {
  57832. + .create = estimate_create_common,
  57833. + .update = estimate_update_common,
  57834. + .unlink = estimate_unlink_common
  57835. + },
  57836. + .init_inode_data = init_inode_ordering,
  57837. + .cut_tree_worker = cut_tree_worker_common,
  57838. + .wire = {
  57839. + .write = wire_write_common,
  57840. + .read = wire_read_common,
  57841. + .get = wire_get_common,
  57842. + .size = wire_size_common,
  57843. + .done = wire_done_common
  57844. + }
  57845. + },
  57846. + [CRYPTCOMPRESS_FILE_PLUGIN_ID] = {
  57847. + .h = {
  57848. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57849. + .id = CRYPTCOMPRESS_FILE_PLUGIN_ID,
  57850. + .groups = (1 << REISER4_REGULAR_FILE),
  57851. + .pops = &file_plugin_ops,
  57852. + .label = "cryptcompress",
  57853. + .desc = "cryptcompress file",
  57854. + .linkage = {NULL, NULL}
  57855. + },
  57856. + .inode_ops = &regular_file_i_ops,
  57857. + .file_ops = &regular_file_f_ops,
  57858. + .as_ops = &regular_file_a_ops,
  57859. +
  57860. + .setattr = setattr_cryptcompress,
  57861. + .open = open_cryptcompress,
  57862. + .read = read_cryptcompress,
  57863. + .write = write_cryptcompress,
  57864. + .ioctl = ioctl_cryptcompress,
  57865. + .mmap = mmap_cryptcompress,
  57866. + .release = release_cryptcompress,
  57867. +
  57868. + .readpage = readpage_cryptcompress,
  57869. + .readpages = readpages_cryptcompress,
  57870. + .writepages = writepages_cryptcompress,
  57871. + .write_begin = write_begin_cryptcompress,
  57872. + .write_end = write_end_cryptcompress,
  57873. +
  57874. + .bmap = bmap_cryptcompress,
  57875. +
  57876. + .write_sd_by_inode = write_sd_by_inode_common,
  57877. + .flow_by_inode = flow_by_inode_cryptcompress,
  57878. + .key_by_inode = key_by_inode_cryptcompress,
  57879. + .set_plug_in_inode = set_plug_in_inode_common,
  57880. + .adjust_to_parent = adjust_to_parent_cryptcompress,
  57881. + .create_object = create_object_cryptcompress,
  57882. + .delete_object = delete_object_cryptcompress,
  57883. + .add_link = reiser4_add_link_common,
  57884. + .rem_link = reiser4_rem_link_common,
  57885. + .owns_item = owns_item_common,
  57886. + .can_add_link = can_add_link_common,
  57887. + .detach = NULL,
  57888. + .safelink = safelink_common,
  57889. + .estimate = {
  57890. + .create = estimate_create_common,
  57891. + .update = estimate_update_common,
  57892. + .unlink = estimate_unlink_common
  57893. + },
  57894. + .init_inode_data = init_inode_data_cryptcompress,
  57895. + .cut_tree_worker = cut_tree_worker_cryptcompress,
  57896. + .destroy_inode = destroy_inode_cryptcompress,
  57897. + .wire = {
  57898. + .write = wire_write_common,
  57899. + .read = wire_read_common,
  57900. + .get = wire_get_common,
  57901. + .size = wire_size_common,
  57902. + .done = wire_done_common
  57903. + }
  57904. + }
  57905. +};
  57906. +
  57907. +static int change_dir(struct inode *inode,
  57908. + reiser4_plugin * plugin,
  57909. + pset_member memb)
  57910. +{
  57911. + /* cannot change dir plugin of already existing object */
  57912. + return RETERR(-EINVAL);
  57913. +}
  57914. +
  57915. +static reiser4_plugin_ops dir_plugin_ops = {
  57916. + .change = change_dir
  57917. +};
  57918. +
  57919. +/*
  57920. + * definition of directory plugins
  57921. + */
  57922. +
  57923. +dir_plugin dir_plugins[LAST_DIR_ID] = {
  57924. + /* standard hashed directory plugin */
  57925. + [HASHED_DIR_PLUGIN_ID] = {
  57926. + .h = {
  57927. + .type_id = REISER4_DIR_PLUGIN_TYPE,
  57928. + .id = HASHED_DIR_PLUGIN_ID,
  57929. + .pops = &dir_plugin_ops,
  57930. + .label = "dir",
  57931. + .desc = "hashed directory",
  57932. + .linkage = {NULL, NULL}
  57933. + },
  57934. + .inode_ops = &directory_i_ops,
  57935. + .file_ops = &directory_f_ops,
  57936. + .as_ops = &directory_a_ops,
  57937. +
  57938. + .get_parent = get_parent_common,
  57939. + .is_name_acceptable = is_name_acceptable_common,
  57940. + .build_entry_key = build_entry_key_hashed,
  57941. + .build_readdir_key = build_readdir_key_common,
  57942. + .add_entry = reiser4_add_entry_common,
  57943. + .rem_entry = reiser4_rem_entry_common,
  57944. + .init = reiser4_dir_init_common,
  57945. + .done = reiser4_dir_done_common,
  57946. + .attach = reiser4_attach_common,
  57947. + .detach = reiser4_detach_common,
  57948. + .estimate = {
  57949. + .add_entry = estimate_add_entry_common,
  57950. + .rem_entry = estimate_rem_entry_common,
  57951. + .unlink = dir_estimate_unlink_common
  57952. + }
  57953. + },
  57954. + /* hashed directory for which seekdir/telldir are guaranteed to
  57955. + * work. Brain-damage. */
  57956. + [SEEKABLE_HASHED_DIR_PLUGIN_ID] = {
  57957. + .h = {
  57958. + .type_id = REISER4_DIR_PLUGIN_TYPE,
  57959. + .id = SEEKABLE_HASHED_DIR_PLUGIN_ID,
  57960. + .pops = &dir_plugin_ops,
  57961. + .label = "dir32",
  57962. + .desc = "directory hashed with 31 bit hash",
  57963. + .linkage = {NULL, NULL}
  57964. + },
  57965. + .inode_ops = &directory_i_ops,
  57966. + .file_ops = &directory_f_ops,
  57967. + .as_ops = &directory_a_ops,
  57968. +
  57969. + .get_parent = get_parent_common,
  57970. + .is_name_acceptable = is_name_acceptable_common,
  57971. + .build_entry_key = build_entry_key_seekable,
  57972. + .build_readdir_key = build_readdir_key_common,
  57973. + .add_entry = reiser4_add_entry_common,
  57974. + .rem_entry = reiser4_rem_entry_common,
  57975. + .init = reiser4_dir_init_common,
  57976. + .done = reiser4_dir_done_common,
  57977. + .attach = reiser4_attach_common,
  57978. + .detach = reiser4_detach_common,
  57979. + .estimate = {
  57980. + .add_entry = estimate_add_entry_common,
  57981. + .rem_entry = estimate_rem_entry_common,
  57982. + .unlink = dir_estimate_unlink_common
  57983. + }
  57984. + }
  57985. +};
  57986. +
  57987. +/* Make Linus happy.
  57988. + Local variables:
  57989. + c-indentation-style: "K&R"
  57990. + mode-name: "LC"
  57991. + c-basic-offset: 8
  57992. + tab-width: 8
  57993. + fill-column: 120
  57994. + End:
  57995. +*/
  57996. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/object.h linux-5.16.14/fs/reiser4/plugin/object.h
  57997. --- linux-5.16.14.orig/fs/reiser4/plugin/object.h 1970-01-01 01:00:00.000000000 +0100
  57998. +++ linux-5.16.14/fs/reiser4/plugin/object.h 2022-03-12 13:26:19.683892804 +0100
  57999. @@ -0,0 +1,125 @@
  58000. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by
  58001. + * reiser4/README */
  58002. +
  58003. +/* Declaration of object plugin functions. */
  58004. +
  58005. +#if !defined(__FS_REISER4_PLUGIN_OBJECT_H__)
  58006. +#define __FS_REISER4_PLUGIN_OBJECT_H__
  58007. +
  58008. +#include "../type_safe_hash.h"
  58009. +
  58010. +/* common implementations of inode operations */
  58011. +int reiser4_create_common(struct user_namespace *mnt_userns,
  58012. + struct inode *parent, struct dentry *dentry,
  58013. + umode_t mode, bool);
  58014. +struct dentry *reiser4_lookup_common(struct inode *parent,
  58015. + struct dentry *dentry,
  58016. + unsigned int);
  58017. +int reiser4_link_common(struct dentry *existing, struct inode *parent,
  58018. + struct dentry *newname);
  58019. +int reiser4_unlink_common(struct inode *parent, struct dentry *victim);
  58020. +int reiser4_mkdir_common(struct user_namespace *mnt_userns,
  58021. + struct inode *parent, struct dentry *dentry,
  58022. + umode_t mode);
  58023. +int reiser4_symlink_common(struct user_namespace *mnt_userns,
  58024. + struct inode *parent, struct dentry *dentry,
  58025. + const char *linkname);
  58026. +int reiser4_mknod_common(struct user_namespace *mnt_userns,
  58027. + struct inode *parent, struct dentry *dentry,
  58028. + umode_t mode, dev_t rdev);
  58029. +int reiser4_rename2_common(struct user_namespace *mnt_userns,
  58030. + struct inode *old_dir, struct dentry *old_name,
  58031. + struct inode *new_dir, struct dentry *new_name,
  58032. + unsigned flags);
  58033. +const char *reiser4_get_link_common(struct dentry *, struct inode *inode,
  58034. + struct delayed_call *done);
  58035. +int reiser4_permission_common(struct user_namespace *mnt_userns,
  58036. + struct inode *, int mask);
  58037. +int reiser4_setattr_common(struct user_namespace *mnt_userns,
  58038. + struct dentry *, struct iattr *);
  58039. +int reiser4_getattr_common(struct user_namespace *mnt_userns,
  58040. + const struct path *path, struct kstat *stat,
  58041. + u32 request_mask, unsigned int flags);
  58042. +
  58043. +/* common implementations of file operations */
  58044. +loff_t reiser4_llseek_dir_common(struct file *, loff_t off, int origin);
  58045. +int reiser4_iterate_common(struct file *, struct dir_context *context);
  58046. +int reiser4_release_dir_common(struct inode *, struct file *);
  58047. +int reiser4_sync_common(struct file *, loff_t, loff_t, int datasync);
  58048. +
  58049. +/* file plugin operations: common implementations */
  58050. +int write_sd_by_inode_common(struct inode *);
  58051. +int key_by_inode_and_offset_common(struct inode *, loff_t, reiser4_key *);
  58052. +int set_plug_in_inode_common(struct inode *object, struct inode *parent,
  58053. + reiser4_object_create_data *);
  58054. +int adjust_to_parent_common(struct inode *object, struct inode *parent,
  58055. + struct inode *root);
  58056. +int adjust_to_parent_common_dir(struct inode *object, struct inode *parent,
  58057. + struct inode *root);
  58058. +int adjust_to_parent_cryptcompress(struct inode *object, struct inode *parent,
  58059. + struct inode *root);
  58060. +int reiser4_create_object_common(struct inode *object, struct inode *parent,
  58061. + reiser4_object_create_data *);
  58062. +int reiser4_delete_object_common(struct inode *);
  58063. +int reiser4_delete_dir_common(struct inode *);
  58064. +int reiser4_add_link_common(struct inode *object, struct inode *parent);
  58065. +int reiser4_rem_link_common(struct inode *object, struct inode *parent);
  58066. +int rem_link_common_dir(struct inode *object, struct inode *parent);
  58067. +int owns_item_common(const struct inode *, const coord_t *);
  58068. +int owns_item_common_dir(const struct inode *, const coord_t *);
  58069. +int can_add_link_common(const struct inode *);
  58070. +int can_rem_link_common_dir(const struct inode *);
  58071. +int reiser4_detach_common_dir(struct inode *child, struct inode *parent);
  58072. +int safelink_common(struct inode *, reiser4_safe_link_t, __u64 value);
  58073. +reiser4_block_nr estimate_create_common(const struct inode *);
  58074. +reiser4_block_nr estimate_create_common_dir(const struct inode *);
  58075. +reiser4_block_nr estimate_update_common(const struct inode *);
  58076. +reiser4_block_nr estimate_unlink_common(const struct inode *,
  58077. + const struct inode *);
  58078. +reiser4_block_nr estimate_unlink_common_dir(const struct inode *,
  58079. + const struct inode *);
  58080. +char *wire_write_common(struct inode *, char *start);
  58081. +char *wire_read_common(char *addr, reiser4_object_on_wire *);
  58082. +struct dentry *wire_get_common(struct super_block *, reiser4_object_on_wire *);
  58083. +int wire_size_common(struct inode *);
  58084. +void wire_done_common(reiser4_object_on_wire *);
  58085. +
  58086. +/* dir plugin operations: common implementations */
  58087. +struct dentry *get_parent_common(struct inode *child);
  58088. +int is_name_acceptable_common(const struct inode *, const char *name, int len);
  58089. +void build_entry_key_common(const struct inode *,
  58090. + const struct qstr *qname, reiser4_key *);
  58091. +int build_readdir_key_common(struct file *dir, reiser4_key *);
  58092. +int reiser4_add_entry_common(struct inode *object, struct dentry *where,
  58093. + reiser4_object_create_data * , reiser4_dir_entry_desc *);
  58094. +int reiser4_rem_entry_common(struct inode *object, struct dentry *where,
  58095. + reiser4_dir_entry_desc *);
  58096. +int reiser4_dir_init_common(struct inode *object, struct inode *parent,
  58097. + reiser4_object_create_data *);
  58098. +int reiser4_dir_done_common(struct inode *);
  58099. +int reiser4_attach_common(struct inode *child, struct inode *parent);
  58100. +int reiser4_detach_common(struct inode *object, struct inode *parent);
  58101. +reiser4_block_nr estimate_add_entry_common(const struct inode *);
  58102. +reiser4_block_nr estimate_rem_entry_common(const struct inode *);
  58103. +reiser4_block_nr dir_estimate_unlink_common(const struct inode *,
  58104. + const struct inode *);
  58105. +
  58106. +/* these are essential parts of common implementations, they are to make
  58107. + customized implementations easier */
  58108. +
  58109. +/* merely useful functions */
  58110. +int lookup_sd(struct inode *, znode_lock_mode, coord_t *, lock_handle * ,
  58111. + const reiser4_key * , int silent);
  58112. +
  58113. +/* __FS_REISER4_PLUGIN_OBJECT_H__ */
  58114. +#endif
  58115. +
  58116. +/* Make Linus happy.
  58117. + Local variables:
  58118. + c-indentation-style: "K&R"
  58119. + mode-name: "LC"
  58120. + c-basic-offset: 8
  58121. + tab-width: 8
  58122. + fill-column: 120
  58123. + End:
  58124. +*/
  58125. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/plugin.c linux-5.16.14/fs/reiser4/plugin/plugin.c
  58126. --- linux-5.16.14.orig/fs/reiser4/plugin/plugin.c 1970-01-01 01:00:00.000000000 +0100
  58127. +++ linux-5.16.14/fs/reiser4/plugin/plugin.c 2022-03-12 13:26:19.684892806 +0100
  58128. @@ -0,0 +1,569 @@
  58129. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  58130. + * reiser4/README */
  58131. +
  58132. +/* Basic plugin infrastructure, lookup etc. */
  58133. +
  58134. +/* PLUGINS:
  58135. +
  58136. + Plugins are internal Reiser4 "modules" or "objects" used to increase
  58137. + extensibility and allow external users to easily adapt reiser4 to
  58138. + their needs.
  58139. +
  58140. + Plugins are classified into several disjoint "types". Plugins
  58141. + belonging to the particular plugin type are termed "instances" of
  58142. + this type. Existing types are listed by enum reiser4_plugin_type
  58143. + (see plugin/plugin_header.h)
  58144. +
  58145. +NIKITA-FIXME-HANS: update this list, and review this entire comment for currency
  58146. +
  58147. + Object (file) plugin determines how given file-system object serves
  58148. + standard VFS requests for read, write, seek, mmap etc. Instances of
  58149. + file plugins are: regular file, directory, symlink. Another example
  58150. + of file plugin is audit plugin, that optionally records accesses to
  58151. + underlying object and forwards requests to it.
  58152. +
  58153. + Hash plugins compute hashes used by reiser4 to store and locate
  58154. + files within directories. Instances of hash plugin type are: r5,
  58155. + tea, rupasov.
  58156. +
  58157. + Tail plugins (or, more precisely, tail policy plugins) determine
  58158. + when last part of the file should be stored in a formatted item.
  58159. +
  58160. + Scope and lookup:
  58161. +
  58162. + label such that pair ( type_label, plugin_label ) is unique. This
  58163. + pair is a globally persistent and user-visible plugin
  58164. + identifier. Internally kernel maintains plugins and plugin types in
  58165. + arrays using an index into those arrays as plugin and plugin type
  58166. + identifiers. File-system in turn, also maintains persistent
  58167. + "dictionary" which is mapping from plugin label to numerical
  58168. + identifier which is stored in file-system objects. That is, we
  58169. + store the offset into the plugin array for that plugin type as the
  58170. + plugin id in the stat data of the filesystem object.
  58171. +
  58172. + Internal kernel plugin type identifier (index in plugins[] array) is
  58173. + of type reiser4_plugin_type. Set of available plugin types is
  58174. + currently static, but dynamic loading doesn't seem to pose
  58175. + insurmountable problems.
  58176. +
  58177. + Within each type plugins are addressed by the identifiers of type
  58178. + reiser4_plugin_id (indices in reiser4_plugin_type_data.builtin[]).
  58179. + Such identifiers are only required to be unique within one type,
  58180. + not globally.
  58181. +
  58182. + Thus, plugin in memory is uniquely identified by the pair (type_id,
  58183. + id).
  58184. +
  58185. + Usage:
  58186. +
  58187. + There exists only one instance of each plugin instance, but this
  58188. + single instance can be associated with many entities (file-system
  58189. + objects, items, nodes, transactions, file-descriptors etc.). Entity
  58190. + to which plugin of given type is termed (due to the lack of
  58191. + imagination) "subject" of this plugin type and, by abuse of
  58192. + terminology, subject of particular instance of this type to which
  58193. + it's attached currently. For example, inode is subject of object
  58194. + plugin type. Inode representing directory is subject of directory
  58195. + plugin, hash plugin type and some particular instance of hash plugin
  58196. + type. Inode, representing regular file is subject of "regular file"
  58197. + plugin, tail-policy plugin type etc.
  58198. +
  58199. + With each subject the plugin possibly stores some state. For example,
  58200. + the state of a directory plugin (instance of object plugin type) is pointer
  58201. + to hash plugin (if directories always use hashing that is).
  58202. +
  58203. + Interface:
  58204. +
  58205. + In addition to a scalar identifier, each plugin type and plugin
  58206. + proper has a "label": short string and a "description"---longer
  58207. + descriptive string. Labels and descriptions of plugin types are
  58208. + hard-coded into plugins[] array, declared and defined in
  58209. + plugin.c. Label and description of plugin are stored in .label and
  58210. + .desc fields of reiser4_plugin_header respectively. It's possible to
  58211. + locate plugin by the pair of labels.
  58212. +
  58213. + Features (not implemented):
  58214. +
  58215. + . user-level plugin manipulations:
  58216. + + reiser4("filename/..file_plugin<='audit'");
  58217. + + write(open("filename/..file_plugin"), "audit", 8);
  58218. +
  58219. + . user level utilities lsplug and chplug to manipulate plugins.
  58220. + Utilities are not of primary priority. Possibly they will be not
  58221. + working on v4.0
  58222. +
  58223. + NIKITA-FIXME-HANS: this should be a mkreiserfs option not a mount
  58224. + option, do you agree? I don't think that specifying it at mount time,
  58225. + and then changing it with each mount, is a good model for usage.
  58226. +
  58227. + . mount option "plug" to set-up plugins of root-directory.
  58228. + "plug=foo:bar" will set "bar" as default plugin of type "foo".
  58229. +
  58230. + Limitations:
  58231. +
  58232. + . each plugin type has to provide at least one builtin
  58233. + plugin. This is technical limitation and it can be lifted in the
  58234. + future.
  58235. +
  58236. + TODO:
  58237. +
  58238. + New plugin types/plugings:
  58239. + Things we should be able to separately choose to inherit:
  58240. +
  58241. + security plugins
  58242. +
  58243. + stat data
  58244. +
  58245. + file bodies
  58246. +
  58247. + file plugins
  58248. +
  58249. + dir plugins
  58250. +
  58251. + . perm:acl
  58252. +
  58253. + . audi---audit plugin intercepting and possibly logging all
  58254. + accesses to object. Requires to put stub functions in file_operations
  58255. + in stead of generic_file_*.
  58256. +
  58257. +NIKITA-FIXME-HANS: why make overflows a plugin?
  58258. + . over---handle hash overflows
  58259. +
  58260. + . sqnt---handle different access patterns and instruments read-ahead
  58261. +
  58262. +NIKITA-FIXME-HANS: describe the line below in more detail.
  58263. +
  58264. + . hier---handle inheritance of plugins along file-system hierarchy
  58265. +
  58266. + Different kinds of inheritance: on creation vs. on access.
  58267. + Compatible/incompatible plugins.
  58268. + Inheritance for multi-linked files.
  58269. + Layered plugins.
  58270. + Notion of plugin context is abandoned.
  58271. +
  58272. +Each file is associated
  58273. + with one plugin and dependant plugins (hash, etc.) are stored as
  58274. + main plugin state. Now, if we have plugins used for regular files
  58275. + but not for directories, how such plugins would be inherited?
  58276. + . always store them with directories also
  58277. +
  58278. +NIKTIA-FIXME-HANS: Do the line above. It is not exclusive of doing
  58279. +the line below which is also useful.
  58280. +
  58281. + . use inheritance hierarchy, independent of file-system namespace
  58282. +*/
  58283. +
  58284. +#include "../debug.h"
  58285. +#include "../dformat.h"
  58286. +#include "plugin_header.h"
  58287. +#include "item/static_stat.h"
  58288. +#include "node/node.h"
  58289. +#include "security/perm.h"
  58290. +#include "space/space_allocator.h"
  58291. +#include "disk_format/disk_format.h"
  58292. +#include "plugin.h"
  58293. +#include "../reiser4.h"
  58294. +#include "../jnode.h"
  58295. +#include "../inode.h"
  58296. +
  58297. +#include <linux/fs.h> /* for struct super_block */
  58298. +
  58299. +/*
  58300. + * init_plugins - initialize plugin sub-system.
  58301. + * Just call this once on reiser4 startup.
  58302. + *
  58303. + * Initializes plugin sub-system. It is part of reiser4 module
  58304. + * initialization. For each plugin of each type init method is called and each
  58305. + * plugin is put into list of plugins.
  58306. + */
  58307. +int init_plugins(void)
  58308. +{
  58309. + reiser4_plugin_type type_id;
  58310. +
  58311. + for (type_id = 0; type_id < REISER4_PLUGIN_TYPES; ++type_id) {
  58312. + struct reiser4_plugin_type_data *ptype;
  58313. + int i;
  58314. +
  58315. + ptype = &plugins[type_id];
  58316. + assert("nikita-3508", ptype->label != NULL);
  58317. + assert("nikita-3509", ptype->type_id == type_id);
  58318. +
  58319. + INIT_LIST_HEAD(&ptype->plugins_list);
  58320. +/* NIKITA-FIXME-HANS: change builtin_num to some other name lacking the term
  58321. + * builtin. */
  58322. + for (i = 0; i < ptype->builtin_num; ++i) {
  58323. + reiser4_plugin *plugin;
  58324. +
  58325. + plugin = plugin_at(ptype, i);
  58326. +
  58327. + if (plugin->h.label == NULL)
  58328. + /* uninitialized slot encountered */
  58329. + continue;
  58330. + assert("nikita-3445", plugin->h.type_id == type_id);
  58331. + plugin->h.id = i;
  58332. + if (plugin->h.pops != NULL &&
  58333. + plugin->h.pops->init != NULL) {
  58334. + int result;
  58335. +
  58336. + result = plugin->h.pops->init(plugin);
  58337. + if (result != 0)
  58338. + return result;
  58339. + }
  58340. + INIT_LIST_HEAD(&plugin->h.linkage);
  58341. + list_add_tail(&plugin->h.linkage, &ptype->plugins_list);
  58342. + }
  58343. + }
  58344. + return 0;
  58345. +}
  58346. +
  58347. +/* true if plugin type id is valid */
  58348. +int is_plugin_type_valid(reiser4_plugin_type type)
  58349. +{
  58350. + /* "type" is unsigned, so no comparison with 0 is
  58351. + necessary */
  58352. + return (type < REISER4_PLUGIN_TYPES);
  58353. +}
  58354. +
  58355. +/* true if plugin id is valid */
  58356. +int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id)
  58357. +{
  58358. + assert("nikita-1653", is_plugin_type_valid(type));
  58359. + return id < plugins[type].builtin_num;
  58360. +}
  58361. +
  58362. +/* return plugin by its @type and @id.
  58363. +
  58364. + Both arguments are checked for validness: this is supposed to be called
  58365. + from user-level.
  58366. +
  58367. +NIKITA-FIXME-HANS: Do you instead mean that this checks ids created in
  58368. +user space, and passed to the filesystem by use of method files? Your
  58369. +comment really confused me on the first reading....
  58370. +
  58371. +*/
  58372. +reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type /* plugin type
  58373. + * unchecked */,
  58374. + reiser4_plugin_id id /* plugin id,
  58375. + * unchecked */)
  58376. +{
  58377. + if (is_plugin_type_valid(type)) {
  58378. + if (is_plugin_id_valid(type, id))
  58379. + return plugin_at(&plugins[type], id);
  58380. + else
  58381. + /* id out of bounds */
  58382. + warning("nikita-2913",
  58383. + "Invalid plugin id: [%i:%i]", type, id);
  58384. + } else
  58385. + /* type_id out of bounds */
  58386. + warning("nikita-2914", "Invalid type_id: %i", type);
  58387. + return NULL;
  58388. +}
  58389. +
  58390. +/**
  58391. + * save_plugin_id - store plugin id in disk format
  58392. + * @plugin: plugin to convert
  58393. + * @area: where to store result
  58394. + *
  58395. + * Puts id of @plugin in little endian format to address @area.
  58396. + */
  58397. +int save_plugin_id(reiser4_plugin *plugin /* plugin to convert */ ,
  58398. + d16 * area/* where to store result */)
  58399. +{
  58400. + assert("nikita-1261", plugin != NULL);
  58401. + assert("nikita-1262", area != NULL);
  58402. +
  58403. + put_unaligned(cpu_to_le16(plugin->h.id), area);
  58404. + return 0;
  58405. +}
  58406. +
  58407. +/* list of all plugins of given type */
  58408. +struct list_head *get_plugin_list(reiser4_plugin_type type)
  58409. +{
  58410. + assert("nikita-1056", is_plugin_type_valid(type));
  58411. + return &plugins[type].plugins_list;
  58412. +}
  58413. +
  58414. +static void update_pset_mask(reiser4_inode * info, pset_member memb)
  58415. +{
  58416. + struct dentry *rootdir;
  58417. + reiser4_inode *root;
  58418. +
  58419. + assert("edward-1443", memb != PSET_FILE);
  58420. +
  58421. + rootdir = inode_by_reiser4_inode(info)->i_sb->s_root;
  58422. + if (rootdir != NULL) {
  58423. + root = reiser4_inode_data(rootdir->d_inode);
  58424. + /*
  58425. + * if inode is different from the default one, or we are
  58426. + * changing plugin of root directory, update plugin_mask
  58427. + */
  58428. + if (aset_get(info->pset, memb) !=
  58429. + aset_get(root->pset, memb) ||
  58430. + info == root)
  58431. + info->plugin_mask |= (1 << memb);
  58432. + else
  58433. + info->plugin_mask &= ~(1 << memb);
  58434. + }
  58435. +}
  58436. +
  58437. +/* Get specified plugin set member from parent,
  58438. + or from fs-defaults (if no parent is given) and
  58439. + install the result to pset of @self */
  58440. +int grab_plugin_pset(struct inode *self,
  58441. + struct inode *ancestor,
  58442. + pset_member memb)
  58443. +{
  58444. + reiser4_plugin *plug;
  58445. + reiser4_inode *info;
  58446. + int result = 0;
  58447. +
  58448. + /* Do not grab if initialised already. */
  58449. + info = reiser4_inode_data(self);
  58450. + if (aset_get(info->pset, memb) != NULL)
  58451. + return 0;
  58452. + if (ancestor) {
  58453. + reiser4_inode *parent;
  58454. +
  58455. + parent = reiser4_inode_data(ancestor);
  58456. + plug = aset_get(parent->hset, memb) ? :
  58457. + aset_get(parent->pset, memb);
  58458. + } else
  58459. + plug = get_default_plugin(memb);
  58460. +
  58461. + result = set_plugin(&info->pset, memb, plug);
  58462. + if (result == 0) {
  58463. + if (!ancestor || self->i_sb->s_root->d_inode != self)
  58464. + update_pset_mask(info, memb);
  58465. + }
  58466. + return result;
  58467. +}
  58468. +
  58469. +/* Take missing pset members from root inode */
  58470. +int finish_pset(struct inode *inode)
  58471. +{
  58472. + reiser4_plugin *plug;
  58473. + reiser4_inode *root;
  58474. + reiser4_inode *info;
  58475. + pset_member memb;
  58476. + int result = 0;
  58477. +
  58478. + root = reiser4_inode_data(inode->i_sb->s_root->d_inode);
  58479. + info = reiser4_inode_data(inode);
  58480. +
  58481. + assert("edward-1455", root != NULL);
  58482. + assert("edward-1456", info != NULL);
  58483. +
  58484. + /* file and directory plugins are already initialized. */
  58485. + for (memb = PSET_DIR + 1; memb < PSET_LAST; ++memb) {
  58486. +
  58487. + /* Do not grab if initialised already. */
  58488. + if (aset_get(info->pset, memb) != NULL)
  58489. + continue;
  58490. +
  58491. + plug = aset_get(root->pset, memb);
  58492. + result = set_plugin(&info->pset, memb, plug);
  58493. + if (result != 0)
  58494. + break;
  58495. + }
  58496. + if (result != 0) {
  58497. + warning("nikita-3447",
  58498. + "Cannot set up plugins for %lli",
  58499. + (unsigned long long)
  58500. + get_inode_oid(inode));
  58501. + }
  58502. + return result;
  58503. +}
  58504. +
  58505. +int force_plugin_pset(struct inode *self, pset_member memb,
  58506. + reiser4_plugin * plug)
  58507. +{
  58508. + reiser4_inode *info;
  58509. + int result = 0;
  58510. +
  58511. + if (!self->i_sb->s_root || self->i_sb->s_root->d_inode == self) {
  58512. + /* Changing pset in the root object. */
  58513. + return RETERR(-EINVAL);
  58514. + }
  58515. +
  58516. + info = reiser4_inode_data(self);
  58517. + if (plug->h.pops != NULL && plug->h.pops->change != NULL)
  58518. + result = plug->h.pops->change(self, plug, memb);
  58519. + else
  58520. + result = aset_set_unsafe(&info->pset, memb, plug);
  58521. + if (result == 0) {
  58522. + __u16 oldmask = info->plugin_mask;
  58523. +
  58524. + update_pset_mask(info, memb);
  58525. + if (oldmask != info->plugin_mask)
  58526. + reiser4_inode_clr_flag(self, REISER4_SDLEN_KNOWN);
  58527. + }
  58528. + return result;
  58529. +}
  58530. +
  58531. +struct reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES] = {
  58532. + /* C90 initializers */
  58533. + [REISER4_FILE_PLUGIN_TYPE] = {
  58534. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  58535. + .label = "file",
  58536. + .desc = "Object plugins",
  58537. + .builtin_num = sizeof_array(file_plugins),
  58538. + .builtin = file_plugins,
  58539. + .plugins_list = {NULL, NULL},
  58540. + .size = sizeof(file_plugin)
  58541. + },
  58542. + [REISER4_DIR_PLUGIN_TYPE] = {
  58543. + .type_id = REISER4_DIR_PLUGIN_TYPE,
  58544. + .label = "dir",
  58545. + .desc = "Directory plugins",
  58546. + .builtin_num = sizeof_array(dir_plugins),
  58547. + .builtin = dir_plugins,
  58548. + .plugins_list = {NULL, NULL},
  58549. + .size = sizeof(dir_plugin)
  58550. + },
  58551. + [REISER4_HASH_PLUGIN_TYPE] = {
  58552. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  58553. + .label = "hash",
  58554. + .desc = "Directory hashes",
  58555. + .builtin_num = sizeof_array(hash_plugins),
  58556. + .builtin = hash_plugins,
  58557. + .plugins_list = {NULL, NULL},
  58558. + .size = sizeof(hash_plugin)
  58559. + },
  58560. + [REISER4_FIBRATION_PLUGIN_TYPE] = {
  58561. + .type_id =
  58562. + REISER4_FIBRATION_PLUGIN_TYPE,
  58563. + .label = "fibration",
  58564. + .desc = "Directory fibrations",
  58565. + .builtin_num = sizeof_array(fibration_plugins),
  58566. + .builtin = fibration_plugins,
  58567. + .plugins_list = {NULL, NULL},
  58568. + .size = sizeof(fibration_plugin)
  58569. + },
  58570. + [REISER4_CIPHER_PLUGIN_TYPE] = {
  58571. + .type_id = REISER4_CIPHER_PLUGIN_TYPE,
  58572. + .label = "cipher",
  58573. + .desc = "Cipher plugins",
  58574. + .builtin_num = sizeof_array(cipher_plugins),
  58575. + .builtin = cipher_plugins,
  58576. + .plugins_list = {NULL, NULL},
  58577. + .size = sizeof(cipher_plugin)
  58578. + },
  58579. + [REISER4_DIGEST_PLUGIN_TYPE] = {
  58580. + .type_id = REISER4_DIGEST_PLUGIN_TYPE,
  58581. + .label = "digest",
  58582. + .desc = "Digest plugins",
  58583. + .builtin_num = sizeof_array(digest_plugins),
  58584. + .builtin = digest_plugins,
  58585. + .plugins_list = {NULL, NULL},
  58586. + .size = sizeof(digest_plugin)
  58587. + },
  58588. + [REISER4_COMPRESSION_PLUGIN_TYPE] = {
  58589. + .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
  58590. + .label = "compression",
  58591. + .desc = "Compression plugins",
  58592. + .builtin_num = sizeof_array(compression_plugins),
  58593. + .builtin = compression_plugins,
  58594. + .plugins_list = {NULL, NULL},
  58595. + .size = sizeof(compression_plugin)
  58596. + },
  58597. + [REISER4_FORMATTING_PLUGIN_TYPE] = {
  58598. + .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
  58599. + .label = "formatting",
  58600. + .desc = "Tail inlining policies",
  58601. + .builtin_num = sizeof_array(formatting_plugins),
  58602. + .builtin = formatting_plugins,
  58603. + .plugins_list = {NULL, NULL},
  58604. + .size = sizeof(formatting_plugin)
  58605. + },
  58606. + [REISER4_PERM_PLUGIN_TYPE] = {
  58607. + .type_id = REISER4_PERM_PLUGIN_TYPE,
  58608. + .label = "perm",
  58609. + .desc = "Permission checks",
  58610. + .builtin_num = sizeof_array(perm_plugins),
  58611. + .builtin = perm_plugins,
  58612. + .plugins_list = {NULL, NULL},
  58613. + .size = sizeof(perm_plugin)
  58614. + },
  58615. + [REISER4_ITEM_PLUGIN_TYPE] = {
  58616. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  58617. + .label = "item",
  58618. + .desc = "Item handlers",
  58619. + .builtin_num = sizeof_array(item_plugins),
  58620. + .builtin = item_plugins,
  58621. + .plugins_list = {NULL, NULL},
  58622. + .size = sizeof(item_plugin)
  58623. + },
  58624. + [REISER4_NODE_PLUGIN_TYPE] = {
  58625. + .type_id = REISER4_NODE_PLUGIN_TYPE,
  58626. + .label = "node",
  58627. + .desc = "node layout handlers",
  58628. + .builtin_num = sizeof_array(node_plugins),
  58629. + .builtin = node_plugins,
  58630. + .plugins_list = {NULL, NULL},
  58631. + .size = sizeof(node_plugin)
  58632. + },
  58633. + [REISER4_SD_EXT_PLUGIN_TYPE] = {
  58634. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  58635. + .label = "sd_ext",
  58636. + .desc = "Parts of stat-data",
  58637. + .builtin_num = sizeof_array(sd_ext_plugins),
  58638. + .builtin = sd_ext_plugins,
  58639. + .plugins_list = {NULL, NULL},
  58640. + .size = sizeof(sd_ext_plugin)
  58641. + },
  58642. + [REISER4_FORMAT_PLUGIN_TYPE] = {
  58643. + .type_id = REISER4_FORMAT_PLUGIN_TYPE,
  58644. + .label = "disk_layout",
  58645. + .desc = "defines filesystem on disk layout",
  58646. + .builtin_num = sizeof_array(format_plugins),
  58647. + .builtin = format_plugins,
  58648. + .plugins_list = {NULL, NULL},
  58649. + .size = sizeof(disk_format_plugin)
  58650. + },
  58651. + [REISER4_JNODE_PLUGIN_TYPE] = {
  58652. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  58653. + .label = "jnode",
  58654. + .desc = "defines kind of jnode",
  58655. + .builtin_num = sizeof_array(jnode_plugins),
  58656. + .builtin = jnode_plugins,
  58657. + .plugins_list = {NULL, NULL},
  58658. + .size = sizeof(jnode_plugin)
  58659. + },
  58660. + [REISER4_COMPRESSION_MODE_PLUGIN_TYPE] = {
  58661. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  58662. + .label = "compression_mode",
  58663. + .desc = "Defines compression mode",
  58664. + .builtin_num = sizeof_array(compression_mode_plugins),
  58665. + .builtin = compression_mode_plugins,
  58666. + .plugins_list = {NULL, NULL},
  58667. + .size = sizeof(compression_mode_plugin)
  58668. + },
  58669. + [REISER4_CLUSTER_PLUGIN_TYPE] = {
  58670. + .type_id = REISER4_CLUSTER_PLUGIN_TYPE,
  58671. + .label = "cluster",
  58672. + .desc = "Defines cluster size",
  58673. + .builtin_num = sizeof_array(cluster_plugins),
  58674. + .builtin = cluster_plugins,
  58675. + .plugins_list = {NULL, NULL},
  58676. + .size = sizeof(cluster_plugin)
  58677. + },
  58678. + [REISER4_TXMOD_PLUGIN_TYPE] = {
  58679. + .type_id = REISER4_TXMOD_PLUGIN_TYPE,
  58680. + .label = "txmod",
  58681. + .desc = "Defines transaction model",
  58682. + .builtin_num = sizeof_array(txmod_plugins),
  58683. + .builtin = txmod_plugins,
  58684. + .plugins_list = {NULL, NULL},
  58685. + .size = sizeof(txmod_plugin)
  58686. + }
  58687. +};
  58688. +
  58689. +/*
  58690. + * Local variables:
  58691. + * c-indentation-style: "K&R"
  58692. + * mode-name: "LC"
  58693. + * c-basic-offset: 8
  58694. + * tab-width: 8
  58695. + * fill-column: 120
  58696. + * End:
  58697. + */
  58698. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/plugin.h linux-5.16.14/fs/reiser4/plugin/plugin.h
  58699. --- linux-5.16.14.orig/fs/reiser4/plugin/plugin.h 1970-01-01 01:00:00.000000000 +0100
  58700. +++ linux-5.16.14/fs/reiser4/plugin/plugin.h 2022-03-12 13:26:19.684892806 +0100
  58701. @@ -0,0 +1,993 @@
  58702. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  58703. + * reiser4/README */
  58704. +
  58705. +/* Basic plugin data-types.
  58706. + see fs/reiser4/plugin/plugin.c for details */
  58707. +
  58708. +#if !defined(__FS_REISER4_PLUGIN_TYPES_H__)
  58709. +#define __FS_REISER4_PLUGIN_TYPES_H__
  58710. +
  58711. +#include "../forward.h"
  58712. +#include "../debug.h"
  58713. +#include "../dformat.h"
  58714. +#include "../key.h"
  58715. +#include "compress/compress.h"
  58716. +#include "crypto/cipher.h"
  58717. +#include "plugin_header.h"
  58718. +#include "item/static_stat.h"
  58719. +#include "item/internal.h"
  58720. +#include "item/sde.h"
  58721. +#include "item/cde.h"
  58722. +#include "item/item.h"
  58723. +#include "node/node.h"
  58724. +#include "node/node41.h"
  58725. +#include "security/perm.h"
  58726. +#include "fibration.h"
  58727. +
  58728. +#include "space/bitmap.h"
  58729. +#include "space/space_allocator.h"
  58730. +
  58731. +#include "disk_format/disk_format40.h"
  58732. +#include "disk_format/disk_format.h"
  58733. +
  58734. +#include <linux/fs.h> /* for struct super_block, address_space */
  58735. +#include <linux/mm.h> /* for struct page */
  58736. +#include <linux/buffer_head.h> /* for struct buffer_head */
  58737. +#include <linux/dcache.h> /* for struct dentry */
  58738. +#include <linux/types.h>
  58739. +#include <linux/crypto.h>
  58740. +
  58741. +typedef struct reiser4_object_on_wire reiser4_object_on_wire;
  58742. +
  58743. +/*
  58744. + * File plugin. Defines the set of methods that file plugins implement, some
  58745. + * of which are optional.
  58746. + *
  58747. + * A file plugin offers to the caller an interface for IO ( writing to and/or
  58748. + * reading from) to what the caller sees as one sequence of bytes. An IO to it
  58749. + * may affect more than one physical sequence of bytes, or no physical sequence
  58750. + * of bytes, it may affect sequences of bytes offered by other file plugins to
  58751. + * the semantic layer, and the file plugin may invoke other plugins and
  58752. + * delegate work to them, but its interface is structured for offering the
  58753. + * caller the ability to read and/or write what the caller sees as being a
  58754. + * single sequence of bytes.
  58755. + *
  58756. + * The file plugin must present a sequence of bytes to the caller, but it does
  58757. + * not necessarily have to store a sequence of bytes, it does not necessarily
  58758. + * have to support efficient tree traversal to any offset in the sequence of
  58759. + * bytes (tail and extent items, whose keys contain offsets, do however provide
  58760. + * efficient non-sequential lookup of any offset in the sequence of bytes).
  58761. + *
  58762. + * Directory plugins provide methods for selecting file plugins by resolving a
  58763. + * name for them.
  58764. + *
  58765. + * The functionality other filesystems call an attribute, and rigidly tie
  58766. + * together, we decompose into orthogonal selectable features of files. Using
  58767. + * the terminology we will define next, an attribute is a perhaps constrained,
  58768. + * perhaps static length, file whose parent has a uni-count-intra-link to it,
  58769. + * which might be grandparent-major-packed, and whose parent has a deletion
  58770. + * method that deletes it.
  58771. + *
  58772. + * File plugins can implement constraints.
  58773. + *
  58774. + * Files can be of variable length (e.g. regular unix files), or of static
  58775. + * length (e.g. static sized attributes).
  58776. + *
  58777. + * An object may have many sequences of bytes, and many file plugins, but, it
  58778. + * has exactly one objectid. It is usually desirable that an object has a
  58779. + * deletion method which deletes every item with that objectid. Items cannot
  58780. + * in general be found by just their objectids. This means that an object must
  58781. + * have either a method built into its deletion plugin method for knowing what
  58782. + * items need to be deleted, or links stored with the object that provide the
  58783. + * plugin with a method for finding those items. Deleting a file within an
  58784. + * object may or may not have the effect of deleting the entire object,
  58785. + * depending on the file plugin's deletion method.
  58786. + *
  58787. + * LINK TAXONOMY:
  58788. + *
  58789. + * Many objects have a reference count, and when the reference count reaches 0
  58790. + * the object's deletion method is invoked. Some links embody a reference
  58791. + * count increase ("countlinks"), and others do not ("nocountlinks").
  58792. + *
  58793. + * Some links are bi-directional links ("bilinks"), and some are
  58794. + * uni-directional("unilinks").
  58795. + *
  58796. + * Some links are between parts of the same object ("intralinks"), and some are
  58797. + * between different objects ("interlinks").
  58798. + *
  58799. + * PACKING TAXONOMY:
  58800. + *
  58801. + * Some items of an object are stored with a major packing locality based on
  58802. + * their object's objectid (e.g. unix directory items in plan A), and these are
  58803. + * called "self-major-packed".
  58804. + *
  58805. + * Some items of an object are stored with a major packing locality based on
  58806. + * their semantic parent object's objectid (e.g. unix file bodies in plan A),
  58807. + * and these are called "parent-major-packed".
  58808. + *
  58809. + * Some items of an object are stored with a major packing locality based on
  58810. + * their semantic grandparent, and these are called "grandparent-major-packed".
  58811. + * Now carefully notice that we run into trouble with key length if we have to
  58812. + * store a 8 byte major+minor grandparent based packing locality, an 8 byte
  58813. + * parent objectid, an 8 byte attribute objectid, and an 8 byte offset, all in
  58814. + * a 24 byte key. One of these fields must be sacrificed if an item is to be
  58815. + * grandparent-major-packed, and which to sacrifice is left to the item author
  58816. + * choosing to make the item grandparent-major-packed. You cannot make tail
  58817. + * items and extent items grandparent-major-packed, though you could make them
  58818. + * self-major-packed (usually they are parent-major-packed).
  58819. + *
  58820. + * In the case of ACLs (which are composed of fixed length ACEs which consist
  58821. + * of {subject-type, subject, and permission bitmask} triples), it makes sense
  58822. + * to not have an offset field in the ACE item key, and to allow duplicate keys
  58823. + * for ACEs. Thus, the set of ACES for a given file is found by looking for a
  58824. + * key consisting of the objectid of the grandparent (thus grouping all ACLs in
  58825. + * a directory together), the minor packing locality of ACE, the objectid of
  58826. + * the file, and 0.
  58827. + *
  58828. + * IO involves moving data from one location to another, which means that two
  58829. + * locations must be specified, source and destination.
  58830. + *
  58831. + * This source and destination can be in the filesystem, or they can be a
  58832. + * pointer in the user process address space plus a byte count.
  58833. + *
  58834. + * If both source and destination are in the filesystem, then at least one of
  58835. + * them must be representable as a pure stream of bytes (which we call a flow,
  58836. + * and define as a struct containing a key, a data pointer, and a length).
  58837. + * This may mean converting one of them into a flow. We provide a generic
  58838. + * cast_into_flow() method, which will work for any plugin supporting
  58839. + * read_flow(), though it is inefficiently implemented in that it temporarily
  58840. + * stores the flow in a buffer (Question: what to do with huge flows that
  58841. + * cannot fit into memory? Answer: we must not convert them all at once. )
  58842. + *
  58843. + * Performing a write requires resolving the write request into a flow defining
  58844. + * the source, and a method that performs the write, and a key that defines
  58845. + * where in the tree the write is to go.
  58846. + *
  58847. + * Performing a read requires resolving the read request into a flow defining
  58848. + * the target, and a method that performs the read, and a key that defines
  58849. + * where in the tree the read is to come from.
  58850. + *
  58851. + * There will exist file plugins which have no pluginid stored on the disk for
  58852. + * them, and which are only invoked by other plugins.
  58853. + */
  58854. +
  58855. +/*
  58856. + * This should be incremented in every release which adds one
  58857. + * or more new plugins.
  58858. + * NOTE: Make sure that respective marco is also incremented in
  58859. + * the new release of reiser4progs.
  58860. + */
  58861. +#define PLUGIN_LIBRARY_VERSION 2
  58862. +
  58863. + /* enumeration of fields within plugin_set */
  58864. +typedef enum {
  58865. + PSET_FILE,
  58866. + PSET_DIR, /* PSET_FILE and PSET_DIR should be first
  58867. + * elements: inode.c:read_inode() depends on
  58868. + * this. */
  58869. + PSET_PERM,
  58870. + PSET_FORMATTING,
  58871. + PSET_HASH,
  58872. + PSET_FIBRATION,
  58873. + PSET_SD,
  58874. + PSET_DIR_ITEM,
  58875. + PSET_CIPHER,
  58876. + PSET_DIGEST,
  58877. + PSET_COMPRESSION,
  58878. + PSET_COMPRESSION_MODE,
  58879. + PSET_CLUSTER,
  58880. + PSET_CREATE,
  58881. + PSET_LAST
  58882. +} pset_member;
  58883. +
  58884. +/* builtin file-plugins */
  58885. +typedef enum {
  58886. + /* regular file */
  58887. + UNIX_FILE_PLUGIN_ID,
  58888. + /* directory */
  58889. + DIRECTORY_FILE_PLUGIN_ID,
  58890. + /* symlink */
  58891. + SYMLINK_FILE_PLUGIN_ID,
  58892. + /* for objects completely handled by the VFS: fifos, devices,
  58893. + sockets */
  58894. + SPECIAL_FILE_PLUGIN_ID,
  58895. + /* regular cryptcompress file */
  58896. + CRYPTCOMPRESS_FILE_PLUGIN_ID,
  58897. + /* number of file plugins. Used as size of arrays to hold
  58898. + file plugins. */
  58899. + LAST_FILE_PLUGIN_ID
  58900. +} reiser4_file_id;
  58901. +
  58902. +typedef struct file_plugin {
  58903. +
  58904. + /* generic fields */
  58905. + plugin_header h;
  58906. +
  58907. + /* VFS methods */
  58908. + struct inode_operations * inode_ops;
  58909. + struct file_operations * file_ops;
  58910. + struct address_space_operations * as_ops;
  58911. + /**
  58912. + * Private methods. These are optional. If used they will allow you
  58913. + * to minimize the amount of code needed to implement a deviation
  58914. + * from some other method that also uses them.
  58915. + */
  58916. + /*
  58917. + * private inode_ops
  58918. + */
  58919. + int (*setattr)(struct dentry *, struct iattr *);
  58920. + /*
  58921. + * private file_ops
  58922. + */
  58923. + /* do whatever is necessary to do when object is opened */
  58924. + int (*open) (struct inode *inode, struct file *file);
  58925. + ssize_t (*read) (struct kiocb *iocb, struct iov_iter *iter);
  58926. + /* write as much as possible bytes from nominated @write_amount
  58927. + * before plugin scheduling is occurred. Save scheduling state
  58928. + * in @cont */
  58929. + ssize_t (*write) (struct file *, const char __user *buf,
  58930. + size_t write_amount, loff_t * off,
  58931. + struct dispatch_context * cont);
  58932. + int (*ioctl) (struct file *filp, unsigned int cmd, unsigned long arg);
  58933. + int (*mmap) (struct file *, struct vm_area_struct *);
  58934. + int (*release) (struct inode *, struct file *);
  58935. + /*
  58936. + * private a_ops
  58937. + */
  58938. + int (*readpage) (struct file *file, struct page *page);
  58939. + int (*readpages)(struct file *file, struct address_space *mapping,
  58940. + struct list_head *pages, unsigned nr_pages);
  58941. + int (*writepages)(struct address_space *mapping,
  58942. + struct writeback_control *wbc);
  58943. + int (*write_begin)(struct file *file, struct page *page,
  58944. + loff_t pos, unsigned len, void **fsdata);
  58945. + int (*write_end)(struct file *file, struct page *page,
  58946. + loff_t pos, unsigned copied, void *fsdata);
  58947. + sector_t (*bmap) (struct address_space * mapping, sector_t lblock);
  58948. + /* other private methods */
  58949. + /* save inode cached stat-data onto disk. It was called
  58950. + reiserfs_update_sd() in 3.x */
  58951. + int (*write_sd_by_inode) (struct inode *);
  58952. + /*
  58953. + * Construct flow into @flow according to user-supplied data.
  58954. + *
  58955. + * This is used by read/write methods to construct a flow to
  58956. + * write/read. ->flow_by_inode() is plugin method, rather than single
  58957. + * global implementation, because key in a flow used by plugin may
  58958. + * depend on data in a @buf.
  58959. + *
  58960. + * NIKITA-FIXME-HANS: please create statistics on what functions are
  58961. + * dereferenced how often for the mongo benchmark. You can supervise
  58962. + * Elena doing this for you if that helps. Email me the list of the
  58963. + * top 10, with their counts, and an estimate of the total number of
  58964. + * CPU cycles spent dereferencing as a percentage of CPU cycles spent
  58965. + * processing (non-idle processing). If the total percent is, say,
  58966. + * less than 1%, it will make our coding discussions much easier, and
  58967. + * keep me from questioning whether functions like the below are too
  58968. + * frequently called to be dereferenced. If the total percent is more
  58969. + * than 1%, perhaps private methods should be listed in a "required"
  58970. + * comment at the top of each plugin (with stern language about how if
  58971. + * the comment is missing it will not be accepted by the maintainer),
  58972. + * and implemented using macros not dereferenced functions. How about
  58973. + * replacing this whole private methods part of the struct with a
  58974. + * thorough documentation of what the standard helper functions are for
  58975. + * use in constructing plugins? I think users have been asking for
  58976. + * that, though not in so many words.
  58977. + */
  58978. + int (*flow_by_inode) (struct inode *, const char __user *buf,
  58979. + int user, loff_t size,
  58980. + loff_t off, rw_op op, flow_t *);
  58981. + /*
  58982. + * Return the key used to retrieve an offset of a file. It is used by
  58983. + * default implementation of ->flow_by_inode() method
  58984. + * (common_build_flow()) and, among other things, to get to the extent
  58985. + * from jnode of unformatted node.
  58986. + */
  58987. + int (*key_by_inode) (struct inode *, loff_t off, reiser4_key *);
  58988. +
  58989. + /* NIKITA-FIXME-HANS: this comment is not as clear to others as you
  58990. + * think.... */
  58991. + /*
  58992. + * set the plugin for a file. Called during file creation in creat()
  58993. + * but not reiser4() unless an inode already exists for the file.
  58994. + */
  58995. + int (*set_plug_in_inode) (struct inode *inode, struct inode *parent,
  58996. + reiser4_object_create_data *);
  58997. +
  58998. + /* NIKITA-FIXME-HANS: comment and name seem to say different things,
  58999. + * are you setting up the object itself also or just adjusting the
  59000. + * parent?.... */
  59001. + /* set up plugins for new @object created in @parent. @root is root
  59002. + directory. */
  59003. + int (*adjust_to_parent) (struct inode *object, struct inode *parent,
  59004. + struct inode *root);
  59005. + /*
  59006. + * this does whatever is necessary to do when object is created. For
  59007. + * instance, for unix files stat data is inserted. It is supposed to be
  59008. + * called by create of struct inode_operations.
  59009. + */
  59010. + int (*create_object) (struct inode *object, struct inode *parent,
  59011. + reiser4_object_create_data *);
  59012. + /*
  59013. + * this method should check REISER4_NO_SD and set REISER4_NO_SD on
  59014. + * success. Deletion of an object usually includes removal of items
  59015. + * building file body (for directories this is removal of "." and "..")
  59016. + * and removal of stat-data item.
  59017. + */
  59018. + int (*delete_object) (struct inode *);
  59019. +
  59020. + /* add link from @parent to @object */
  59021. + int (*add_link) (struct inode *object, struct inode *parent);
  59022. +
  59023. + /* remove link from @parent to @object */
  59024. + int (*rem_link) (struct inode *object, struct inode *parent);
  59025. +
  59026. + /*
  59027. + * return true if item addressed by @coord belongs to @inode. This is
  59028. + * used by read/write to properly slice flow into items in presence of
  59029. + * multiple key assignment policies, because items of a file are not
  59030. + * necessarily contiguous in a key space, for example, in a plan-b.
  59031. + */
  59032. + int (*owns_item) (const struct inode *, const coord_t *);
  59033. +
  59034. + /* checks whether yet another hard links to this object can be
  59035. + added */
  59036. + int (*can_add_link) (const struct inode *);
  59037. +
  59038. + /* checks whether hard links to this object can be removed */
  59039. + int (*can_rem_link) (const struct inode *);
  59040. +
  59041. + /* not empty for DIRECTORY_FILE_PLUGIN_ID only currently. It calls
  59042. + detach of directory plugin to remove ".." */
  59043. + int (*detach) (struct inode *child, struct inode *parent);
  59044. +
  59045. + /* process safe-link during mount */
  59046. + int (*safelink) (struct inode *object, reiser4_safe_link_t link,
  59047. + __u64 value);
  59048. +
  59049. + /* The couple of estimate methods for all file operations */
  59050. + struct {
  59051. + reiser4_block_nr(*create) (const struct inode *);
  59052. + reiser4_block_nr(*update) (const struct inode *);
  59053. + reiser4_block_nr(*unlink) (const struct inode *,
  59054. + const struct inode *);
  59055. + } estimate;
  59056. +
  59057. + /*
  59058. + * reiser4 specific part of inode has a union of structures which are
  59059. + * specific to a plugin. This method is called when inode is read
  59060. + * (read_inode) and when file is created (common_create_child) so that
  59061. + * file plugin could initialize its inode data
  59062. + */
  59063. + void (*init_inode_data) (struct inode *, reiser4_object_create_data * ,
  59064. + int);
  59065. +
  59066. + /*
  59067. + * This method performs progressive deletion of items and whole nodes
  59068. + * from right to left.
  59069. + *
  59070. + * @tap: the point deletion process begins from,
  59071. + * @from_key: the beginning of the deleted key range,
  59072. + * @to_key: the end of the deleted key range,
  59073. + * @smallest_removed: the smallest removed key,
  59074. + *
  59075. + * @return: 0 if success, error code otherwise, -E_REPEAT means that
  59076. + * long cut_tree operation was interrupted for allowing atom commit .
  59077. + */
  59078. + int (*cut_tree_worker) (tap_t *, const reiser4_key * from_key,
  59079. + const reiser4_key * to_key,
  59080. + reiser4_key * smallest_removed, struct inode *,
  59081. + int, int *);
  59082. +
  59083. + /* called from ->destroy_inode() */
  59084. + void (*destroy_inode) (struct inode *);
  59085. +
  59086. + /*
  59087. + * methods to serialize object identify. This is used, for example, by
  59088. + * reiser4_{en,de}code_fh().
  59089. + */
  59090. + struct {
  59091. + /* store object's identity at @area */
  59092. + char *(*write) (struct inode *inode, char *area);
  59093. + /* parse object from wire to the @obj */
  59094. + char *(*read) (char *area, reiser4_object_on_wire * obj);
  59095. + /* given object identity in @obj, find or create its dentry */
  59096. + struct dentry *(*get) (struct super_block *s,
  59097. + reiser4_object_on_wire * obj);
  59098. + /* how many bytes ->wire.write() consumes */
  59099. + int (*size) (struct inode *inode);
  59100. + /* finish with object identify */
  59101. + void (*done) (reiser4_object_on_wire * obj);
  59102. + } wire;
  59103. +} file_plugin;
  59104. +
  59105. +extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID];
  59106. +
  59107. +struct reiser4_object_on_wire {
  59108. + file_plugin *plugin;
  59109. + union {
  59110. + struct {
  59111. + obj_key_id key_id;
  59112. + } std;
  59113. + void *generic;
  59114. + } u;
  59115. +};
  59116. +
  59117. +/* builtin dir-plugins */
  59118. +typedef enum {
  59119. + HASHED_DIR_PLUGIN_ID,
  59120. + SEEKABLE_HASHED_DIR_PLUGIN_ID,
  59121. + LAST_DIR_ID
  59122. +} reiser4_dir_id;
  59123. +
  59124. +typedef struct dir_plugin {
  59125. + /* generic fields */
  59126. + plugin_header h;
  59127. +
  59128. + struct inode_operations * inode_ops;
  59129. + struct file_operations * file_ops;
  59130. + struct address_space_operations * as_ops;
  59131. +
  59132. + /*
  59133. + * private methods: These are optional. If used they will allow you to
  59134. + * minimize the amount of code needed to implement a deviation from
  59135. + * some other method that uses them. You could logically argue that
  59136. + * they should be a separate type of plugin.
  59137. + */
  59138. +
  59139. + struct dentry *(*get_parent) (struct inode *childdir);
  59140. +
  59141. + /*
  59142. + * check whether "name" is acceptable name to be inserted into this
  59143. + * object. Optionally implemented by directory-like objects. Can check
  59144. + * for maximal length, reserved symbols etc
  59145. + */
  59146. + int (*is_name_acceptable) (const struct inode *inode, const char *name,
  59147. + int len);
  59148. +
  59149. + void (*build_entry_key) (const struct inode *dir /* directory where
  59150. + * entry is (or will
  59151. + * be) in.*/ ,
  59152. + const struct qstr *name /* name of file
  59153. + * referenced by this
  59154. + * entry */ ,
  59155. + reiser4_key * result /* resulting key of
  59156. + * directory entry */ );
  59157. + int (*build_readdir_key) (struct file *dir, reiser4_key * result);
  59158. + int (*add_entry) (struct inode *object, struct dentry *where,
  59159. + reiser4_object_create_data * data,
  59160. + reiser4_dir_entry_desc * entry);
  59161. + int (*rem_entry) (struct inode *object, struct dentry *where,
  59162. + reiser4_dir_entry_desc * entry);
  59163. +
  59164. + /*
  59165. + * initialize directory structure for newly created object. For normal
  59166. + * unix directories, insert dot and dotdot.
  59167. + */
  59168. + int (*init) (struct inode *object, struct inode *parent,
  59169. + reiser4_object_create_data * data);
  59170. +
  59171. + /* destroy directory */
  59172. + int (*done) (struct inode *child);
  59173. +
  59174. + /* called when @subdir was just looked up in the @dir */
  59175. + int (*attach) (struct inode *subdir, struct inode *dir);
  59176. + int (*detach) (struct inode *subdir, struct inode *dir);
  59177. +
  59178. + struct {
  59179. + reiser4_block_nr(*add_entry) (const struct inode *);
  59180. + reiser4_block_nr(*rem_entry) (const struct inode *);
  59181. + reiser4_block_nr(*unlink) (const struct inode *,
  59182. + const struct inode *);
  59183. + } estimate;
  59184. +} dir_plugin;
  59185. +
  59186. +extern dir_plugin dir_plugins[LAST_DIR_ID];
  59187. +
  59188. +typedef struct formatting_plugin {
  59189. + /* generic fields */
  59190. + plugin_header h;
  59191. + /* returns non-zero iff file's tail has to be stored
  59192. + in a direct item. */
  59193. + int (*have_tail) (const struct inode *inode, loff_t size);
  59194. +} formatting_plugin;
  59195. +
  59196. +/**
  59197. + * Plugins of this interface implement different transaction models.
  59198. + * Transaction model is a high-level block allocator, which assigns block
  59199. + * numbers to dirty nodes, and, thereby, decides, how individual dirty
  59200. + * nodes of an atom will be committed.
  59201. + */
  59202. +typedef struct txmod_plugin {
  59203. + /* generic fields */
  59204. + plugin_header h;
  59205. + /**
  59206. + * allocate blocks in the FORWARD PARENT-FIRST context
  59207. + * for formatted nodes
  59208. + */
  59209. + int (*forward_alloc_formatted)(znode *node, const coord_t *parent_coord,
  59210. + flush_pos_t *pos); //was allocate_znode_loaded
  59211. + /**
  59212. + * allocate blocks in the REVERSE PARENT-FIRST context
  59213. + * for formatted nodes
  59214. + */
  59215. + int (*reverse_alloc_formatted)(jnode * node,
  59216. + const coord_t *parent_coord,
  59217. + flush_pos_t *pos); // was reverse_relocate_test
  59218. + /**
  59219. + * allocate blocks in the FORWARD PARENT-FIRST context
  59220. + * for unformatted nodes.
  59221. + *
  59222. + * This is called by handle_pos_on_twig to proceed extent unit
  59223. + * flush_pos->coord is set to. It is to prepare for flushing
  59224. + * sequence of not flushprepped nodes (slum). It supposes that
  59225. + * slum starts at flush_pos->pos_in_unit position within the extent
  59226. + */
  59227. + int (*forward_alloc_unformatted)(flush_pos_t *flush_pos); //was reiser4_alloc_extent
  59228. + /**
  59229. + * allocale blocks for unformatted nodes in squeeze_right_twig().
  59230. + * @coord is set to extent unit
  59231. + */
  59232. + squeeze_result (*squeeze_alloc_unformatted)(znode *left,
  59233. + const coord_t *coord,
  59234. + flush_pos_t *flush_pos,
  59235. + reiser4_key *stop_key); // was_squalloc_extent
  59236. +} txmod_plugin;
  59237. +
  59238. +typedef struct hash_plugin {
  59239. + /* generic fields */
  59240. + plugin_header h;
  59241. + /* computes hash of the given name */
  59242. + __u64(*hash) (const unsigned char *name, int len);
  59243. +} hash_plugin;
  59244. +
  59245. +typedef struct cipher_plugin {
  59246. + /* generic fields */
  59247. + plugin_header h;
  59248. + struct crypto_blkcipher * (*alloc) (void);
  59249. + void (*free) (struct crypto_blkcipher *tfm);
  59250. + /* Offset translator. For each offset this returns (k * offset), where
  59251. + k (k >= 1) is an expansion factor of the cipher algorithm.
  59252. + For all symmetric algorithms k == 1. For asymmetric algorithms (which
  59253. + inflate data) offset translation guarantees that all disk cluster's
  59254. + units will have keys smaller then next cluster's one.
  59255. + */
  59256. + loff_t(*scale) (struct inode *inode, size_t blocksize, loff_t src);
  59257. + /* Cipher algorithms can accept data only by chunks of cipher block
  59258. + size. This method is to align any flow up to cipher block size when
  59259. + we pass it to cipher algorithm. To align means to append padding of
  59260. + special format specific to the cipher algorithm */
  59261. + int (*align_stream) (__u8 *tail, int clust_size, int blocksize);
  59262. + /* low-level key manager (check, install, etc..) */
  59263. + int (*setkey) (struct crypto_tfm *tfm, const __u8 *key,
  59264. + unsigned int keylen);
  59265. + /* main text processing procedures */
  59266. + void (*encrypt) (__u32 *expkey, __u8 *dst, const __u8 *src);
  59267. + void (*decrypt) (__u32 *expkey, __u8 *dst, const __u8 *src);
  59268. +} cipher_plugin;
  59269. +
  59270. +typedef struct digest_plugin {
  59271. + /* generic fields */
  59272. + plugin_header h;
  59273. + /* fingerprint size in bytes */
  59274. + int fipsize;
  59275. + struct crypto_hash * (*alloc) (void);
  59276. + void (*free) (struct crypto_hash *tfm);
  59277. +} digest_plugin;
  59278. +
  59279. +typedef struct compression_plugin {
  59280. + /* generic fields */
  59281. + plugin_header h;
  59282. + int (*init) (void);
  59283. + /* the maximum number of bytes the size of the "compressed" data can
  59284. + * exceed the uncompressed data. */
  59285. + int (*overrun) (unsigned src_len);
  59286. + coa_t(*alloc) (tfm_action act);
  59287. + void (*free) (coa_t coa, tfm_action act);
  59288. + /* minimal size of the flow we still try to compress */
  59289. + int (*min_size_deflate) (void);
  59290. + __u32(*checksum) (char *data, __u32 length);
  59291. + /* main transform procedures */
  59292. + void (*compress) (coa_t coa, __u8 *src_first, size_t src_len,
  59293. + __u8 *dst_first, size_t *dst_len);
  59294. + void (*decompress) (coa_t coa, __u8 *src_first, size_t src_len,
  59295. + __u8 *dst_first, size_t *dst_len);
  59296. +} compression_plugin;
  59297. +
  59298. +typedef struct compression_mode_plugin {
  59299. + /* generic fields */
  59300. + plugin_header h;
  59301. + /* this is called when estimating compressibility
  59302. + of a logical cluster by its content */
  59303. + int (*should_deflate) (struct inode *inode, cloff_t index);
  59304. + /* this is called when results of compression should be saved */
  59305. + int (*accept_hook) (struct inode *inode, cloff_t index);
  59306. + /* this is called when results of compression should be discarded */
  59307. + int (*discard_hook) (struct inode *inode, cloff_t index);
  59308. +} compression_mode_plugin;
  59309. +
  59310. +typedef struct cluster_plugin {
  59311. + /* generic fields */
  59312. + plugin_header h;
  59313. + int shift;
  59314. +} cluster_plugin;
  59315. +
  59316. +typedef struct sd_ext_plugin {
  59317. + /* generic fields */
  59318. + plugin_header h;
  59319. + int (*present) (struct inode *inode, char **area, int *len);
  59320. + int (*absent) (struct inode *inode);
  59321. + int (*save_len) (struct inode *inode);
  59322. + int (*save) (struct inode *inode, char **area);
  59323. + /* alignment requirement for this stat-data part */
  59324. + int alignment;
  59325. +} sd_ext_plugin;
  59326. +
  59327. +/* this plugin contains methods to allocate objectid for newly created files,
  59328. + to deallocate objectid when file gets removed, to report number of used and
  59329. + free objectids */
  59330. +typedef struct oid_allocator_plugin {
  59331. + /* generic fields */
  59332. + plugin_header h;
  59333. + int (*init_oid_allocator) (reiser4_oid_allocator * map, __u64 nr_files,
  59334. + __u64 oids);
  59335. + /* used to report statfs->f_files */
  59336. + __u64(*oids_used) (reiser4_oid_allocator * map);
  59337. + /* get next oid to use */
  59338. + __u64(*next_oid) (reiser4_oid_allocator * map);
  59339. + /* used to report statfs->f_ffree */
  59340. + __u64(*oids_free) (reiser4_oid_allocator * map);
  59341. + /* allocate new objectid */
  59342. + int (*allocate_oid) (reiser4_oid_allocator * map, oid_t *);
  59343. + /* release objectid */
  59344. + int (*release_oid) (reiser4_oid_allocator * map, oid_t);
  59345. + /* how many pages to reserve in transaction for allocation of new
  59346. + objectid */
  59347. + int (*oid_reserve_allocate) (reiser4_oid_allocator * map);
  59348. + /* how many pages to reserve in transaction for freeing of an
  59349. + objectid */
  59350. + int (*oid_reserve_release) (reiser4_oid_allocator * map);
  59351. + void (*print_info) (const char *, reiser4_oid_allocator *);
  59352. +} oid_allocator_plugin;
  59353. +
  59354. +/* disk layout plugin: this specifies super block, journal, bitmap (if there
  59355. + are any) locations, etc */
  59356. +typedef struct disk_format_plugin {
  59357. + /* generic fields */
  59358. + plugin_header h;
  59359. + /* replay journal, initialize super_info_data, etc */
  59360. + int (*init_format) (struct super_block *, void *data);
  59361. +
  59362. + /* key of root directory stat data */
  59363. + const reiser4_key * (*root_dir_key) (const struct super_block *);
  59364. +
  59365. + int (*release) (struct super_block *);
  59366. + jnode * (*log_super) (struct super_block *);
  59367. + int (*check_open) (const struct inode *object);
  59368. + int (*version_update) (struct super_block *);
  59369. +} disk_format_plugin;
  59370. +
  59371. +struct jnode_plugin {
  59372. + /* generic fields */
  59373. + plugin_header h;
  59374. + int (*init) (jnode * node);
  59375. + int (*parse) (jnode * node);
  59376. + struct address_space *(*mapping) (const jnode * node);
  59377. + unsigned long (*index) (const jnode * node);
  59378. + jnode * (*clone) (jnode * node);
  59379. +};
  59380. +
  59381. +/* plugin instance. */
  59382. +/* */
  59383. +/* This is "wrapper" union for all types of plugins. Most of the code uses */
  59384. +/* plugins of particular type (file_plugin, dir_plugin, etc.) rather than */
  59385. +/* operates with pointers to reiser4_plugin. This union is only used in */
  59386. +/* some generic code in plugin/plugin.c that operates on all */
  59387. +/* plugins. Technically speaking purpose of this union is to add type */
  59388. +/* safety to said generic code: each plugin type (file_plugin, for */
  59389. +/* example), contains plugin_header as its first memeber. This first member */
  59390. +/* is located at the same place in memory as .h member of */
  59391. +/* reiser4_plugin. Generic code, obtains pointer to reiser4_plugin and */
  59392. +/* looks in the .h which is header of plugin type located in union. This */
  59393. +/* allows to avoid type-casts. */
  59394. +union reiser4_plugin {
  59395. + /* generic fields */
  59396. + plugin_header h;
  59397. + /* file plugin */
  59398. + file_plugin file;
  59399. + /* directory plugin */
  59400. + dir_plugin dir;
  59401. + /* hash plugin, used by directory plugin */
  59402. + hash_plugin hash;
  59403. + /* fibration plugin used by directory plugin */
  59404. + fibration_plugin fibration;
  59405. + /* cipher transform plugin, used by file plugin */
  59406. + cipher_plugin cipher;
  59407. + /* digest transform plugin, used by file plugin */
  59408. + digest_plugin digest;
  59409. + /* compression transform plugin, used by file plugin */
  59410. + compression_plugin compression;
  59411. + /* tail plugin, used by file plugin */
  59412. + formatting_plugin formatting;
  59413. + /* permission plugin */
  59414. + perm_plugin perm;
  59415. + /* node plugin */
  59416. + node_plugin node;
  59417. + /* item plugin */
  59418. + item_plugin item;
  59419. + /* stat-data extension plugin */
  59420. + sd_ext_plugin sd_ext;
  59421. + /* disk layout plugin */
  59422. + disk_format_plugin format;
  59423. + /* object id allocator plugin */
  59424. + oid_allocator_plugin oid_allocator;
  59425. + /* plugin for different jnode types */
  59426. + jnode_plugin jnode;
  59427. + /* compression mode plugin, used by object plugin */
  59428. + compression_mode_plugin compression_mode;
  59429. + /* cluster plugin, used by object plugin */
  59430. + cluster_plugin clust;
  59431. + /* transaction mode plugin */
  59432. + txmod_plugin txmod;
  59433. + /* place-holder for new plugin types that can be registered
  59434. + dynamically, and used by other dynamically loaded plugins. */
  59435. + void *generic;
  59436. +};
  59437. +
  59438. +struct reiser4_plugin_ops {
  59439. + /* called when plugin is initialized */
  59440. + int (*init) (reiser4_plugin * plugin);
  59441. + /* called when plugin is unloaded */
  59442. + int (*done) (reiser4_plugin * plugin);
  59443. + /* load given plugin from disk */
  59444. + int (*load) (struct inode *inode,
  59445. + reiser4_plugin * plugin, char **area, int *len);
  59446. + /* how many space is required to store this plugin's state
  59447. + in stat-data */
  59448. + int (*save_len) (struct inode *inode, reiser4_plugin * plugin);
  59449. + /* save persistent plugin-data to disk */
  59450. + int (*save) (struct inode *inode, reiser4_plugin * plugin,
  59451. + char **area);
  59452. + /* alignment requirement for on-disk state of this plugin
  59453. + in number of bytes */
  59454. + int alignment;
  59455. + /* install itself into given inode. This can return error
  59456. + (e.g., you cannot change hash of non-empty directory). */
  59457. + int (*change) (struct inode *inode, reiser4_plugin * plugin,
  59458. + pset_member memb);
  59459. + /* install itself into given inode. This can return error
  59460. + (e.g., you cannot change hash of non-empty directory). */
  59461. + int (*inherit) (struct inode *inode, struct inode *parent,
  59462. + reiser4_plugin * plugin);
  59463. +};
  59464. +
  59465. +/* functions implemented in fs/reiser4/plugin/plugin.c */
  59466. +
  59467. +/* stores plugin reference in reiser4-specific part of inode */
  59468. +extern int set_object_plugin(struct inode *inode, reiser4_plugin_id id);
  59469. +extern int init_plugins(void);
  59470. +
  59471. +/* builtin plugins */
  59472. +
  59473. +/* builtin hash-plugins */
  59474. +
  59475. +typedef enum {
  59476. + RUPASOV_HASH_ID,
  59477. + R5_HASH_ID,
  59478. + TEA_HASH_ID,
  59479. + FNV1_HASH_ID,
  59480. + DEGENERATE_HASH_ID,
  59481. + LAST_HASH_ID
  59482. +} reiser4_hash_id;
  59483. +
  59484. +/* builtin cipher plugins */
  59485. +
  59486. +typedef enum {
  59487. + NONE_CIPHER_ID,
  59488. + LAST_CIPHER_ID
  59489. +} reiser4_cipher_id;
  59490. +
  59491. +/* builtin digest plugins */
  59492. +
  59493. +typedef enum {
  59494. + SHA256_32_DIGEST_ID,
  59495. + LAST_DIGEST_ID
  59496. +} reiser4_digest_id;
  59497. +
  59498. +/* builtin compression mode plugins */
  59499. +typedef enum {
  59500. + NONE_COMPRESSION_MODE_ID,
  59501. + LATTD_COMPRESSION_MODE_ID,
  59502. + ULTIM_COMPRESSION_MODE_ID,
  59503. + FORCE_COMPRESSION_MODE_ID,
  59504. + CONVX_COMPRESSION_MODE_ID,
  59505. + LAST_COMPRESSION_MODE_ID
  59506. +} reiser4_compression_mode_id;
  59507. +
  59508. +/* builtin cluster plugins */
  59509. +typedef enum {
  59510. + CLUSTER_64K_ID,
  59511. + CLUSTER_32K_ID,
  59512. + CLUSTER_16K_ID,
  59513. + CLUSTER_8K_ID,
  59514. + CLUSTER_4K_ID,
  59515. + LAST_CLUSTER_ID
  59516. +} reiser4_cluster_id;
  59517. +
  59518. +/* builtin tail packing policies */
  59519. +typedef enum {
  59520. + NEVER_TAILS_FORMATTING_ID,
  59521. + ALWAYS_TAILS_FORMATTING_ID,
  59522. + SMALL_FILE_FORMATTING_ID,
  59523. + LAST_TAIL_FORMATTING_ID
  59524. +} reiser4_formatting_id;
  59525. +
  59526. +/* builtin transaction models */
  59527. +typedef enum {
  59528. + HYBRID_TXMOD_ID,
  59529. + JOURNAL_TXMOD_ID,
  59530. + WA_TXMOD_ID,
  59531. + LAST_TXMOD_ID
  59532. +} reiser4_txmod_id;
  59533. +
  59534. +
  59535. +/* data type used to pack parameters that we pass to vfs object creation
  59536. + function create_object() */
  59537. +struct reiser4_object_create_data {
  59538. + /* plugin to control created object */
  59539. + reiser4_file_id id;
  59540. + /* mode of regular file, directory or special file */
  59541. +/* what happens if some other sort of perm plugin is in use? */
  59542. + umode_t mode;
  59543. + /* rdev of special file */
  59544. + dev_t rdev;
  59545. + /* symlink target */
  59546. + const char *name;
  59547. + /* add here something for non-standard objects you invent, like
  59548. + query for interpolation file etc. */
  59549. +
  59550. + struct reiser4_crypto_info *crypto;
  59551. +
  59552. + struct inode *parent;
  59553. + struct dentry *dentry;
  59554. +};
  59555. +
  59556. +/* description of directory entry being created/destroyed/sought for
  59557. +
  59558. + It is passed down to the directory plugin and farther to the
  59559. + directory item plugin methods. Creation of new directory is done in
  59560. + several stages: first we search for an entry with the same name, then
  59561. + create new one. reiser4_dir_entry_desc is used to store some information
  59562. + collected at some stage of this process and required later: key of
  59563. + item that we want to insert/delete and pointer to an object that will
  59564. + be bound by the new directory entry. Probably some more fields will
  59565. + be added there.
  59566. +
  59567. +*/
  59568. +struct reiser4_dir_entry_desc {
  59569. + /* key of directory entry */
  59570. + reiser4_key key;
  59571. + /* object bound by this entry. */
  59572. + struct inode *obj;
  59573. +};
  59574. +
  59575. +#define MAX_PLUGIN_TYPE_LABEL_LEN 32
  59576. +#define MAX_PLUGIN_PLUG_LABEL_LEN 32
  59577. +
  59578. +#define PLUGIN_BY_ID(TYPE, ID, FIELD) \
  59579. +static inline TYPE *TYPE ## _by_id(reiser4_plugin_id id) \
  59580. +{ \
  59581. + reiser4_plugin *plugin = plugin_by_id(ID, id); \
  59582. + return plugin ? &plugin->FIELD : NULL; \
  59583. +} \
  59584. +static inline TYPE *TYPE ## _by_disk_id(reiser4_tree * tree, d16 *id) \
  59585. +{ \
  59586. + reiser4_plugin *plugin = plugin_by_disk_id(tree, ID, id); \
  59587. + return plugin ? &plugin->FIELD : NULL; \
  59588. +} \
  59589. +static inline TYPE *TYPE ## _by_unsafe_id(reiser4_plugin_id id) \
  59590. +{ \
  59591. + reiser4_plugin *plugin = plugin_by_unsafe_id(ID, id); \
  59592. + return plugin ? &plugin->FIELD : NULL; \
  59593. +} \
  59594. +static inline reiser4_plugin* TYPE ## _to_plugin(TYPE* plugin) \
  59595. +{ \
  59596. + return (reiser4_plugin *) plugin; \
  59597. +} \
  59598. +static inline reiser4_plugin_id TYPE ## _id(TYPE* plugin) \
  59599. +{ \
  59600. + return TYPE ## _to_plugin(plugin)->h.id; \
  59601. +} \
  59602. +typedef struct { int foo; } TYPE ## _plugin_dummy
  59603. +
  59604. +static inline int get_release_number_major(void)
  59605. +{
  59606. + return LAST_FORMAT_ID - 1;
  59607. +}
  59608. +
  59609. +static inline int get_release_number_minor(void)
  59610. +{
  59611. + return PLUGIN_LIBRARY_VERSION;
  59612. +}
  59613. +
  59614. +PLUGIN_BY_ID(item_plugin, REISER4_ITEM_PLUGIN_TYPE, item);
  59615. +PLUGIN_BY_ID(file_plugin, REISER4_FILE_PLUGIN_TYPE, file);
  59616. +PLUGIN_BY_ID(dir_plugin, REISER4_DIR_PLUGIN_TYPE, dir);
  59617. +PLUGIN_BY_ID(node_plugin, REISER4_NODE_PLUGIN_TYPE, node);
  59618. +PLUGIN_BY_ID(sd_ext_plugin, REISER4_SD_EXT_PLUGIN_TYPE, sd_ext);
  59619. +PLUGIN_BY_ID(perm_plugin, REISER4_PERM_PLUGIN_TYPE, perm);
  59620. +PLUGIN_BY_ID(hash_plugin, REISER4_HASH_PLUGIN_TYPE, hash);
  59621. +PLUGIN_BY_ID(fibration_plugin, REISER4_FIBRATION_PLUGIN_TYPE, fibration);
  59622. +PLUGIN_BY_ID(cipher_plugin, REISER4_CIPHER_PLUGIN_TYPE, cipher);
  59623. +PLUGIN_BY_ID(digest_plugin, REISER4_DIGEST_PLUGIN_TYPE, digest);
  59624. +PLUGIN_BY_ID(compression_plugin, REISER4_COMPRESSION_PLUGIN_TYPE, compression);
  59625. +PLUGIN_BY_ID(formatting_plugin, REISER4_FORMATTING_PLUGIN_TYPE, formatting);
  59626. +PLUGIN_BY_ID(disk_format_plugin, REISER4_FORMAT_PLUGIN_TYPE, format);
  59627. +PLUGIN_BY_ID(jnode_plugin, REISER4_JNODE_PLUGIN_TYPE, jnode);
  59628. +PLUGIN_BY_ID(compression_mode_plugin, REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  59629. + compression_mode);
  59630. +PLUGIN_BY_ID(cluster_plugin, REISER4_CLUSTER_PLUGIN_TYPE, clust);
  59631. +PLUGIN_BY_ID(txmod_plugin, REISER4_TXMOD_PLUGIN_TYPE, txmod);
  59632. +
  59633. +extern int save_plugin_id(reiser4_plugin * plugin, d16 * area);
  59634. +
  59635. +extern struct list_head *get_plugin_list(reiser4_plugin_type type_id);
  59636. +
  59637. +#define for_all_plugins(ptype, plugin) \
  59638. +for (plugin = list_entry(get_plugin_list(ptype)->next, reiser4_plugin, h.linkage); \
  59639. + get_plugin_list(ptype) != &plugin->h.linkage; \
  59640. + plugin = list_entry(plugin->h.linkage.next, reiser4_plugin, h.linkage))
  59641. +
  59642. +
  59643. +extern int grab_plugin_pset(struct inode *self, struct inode *ancestor,
  59644. + pset_member memb);
  59645. +extern int force_plugin_pset(struct inode *self, pset_member memb,
  59646. + reiser4_plugin *plug);
  59647. +extern int finish_pset(struct inode *inode);
  59648. +
  59649. +/* defined in fs/reiser4/plugin/object.c */
  59650. +extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID];
  59651. +/* defined in fs/reiser4/plugin/object.c */
  59652. +extern dir_plugin dir_plugins[LAST_DIR_ID];
  59653. +/* defined in fs/reiser4/plugin/item/static_stat.c */
  59654. +extern sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION];
  59655. +/* defined in fs/reiser4/plugin/hash.c */
  59656. +extern hash_plugin hash_plugins[LAST_HASH_ID];
  59657. +/* defined in fs/reiser4/plugin/fibration.c */
  59658. +extern fibration_plugin fibration_plugins[LAST_FIBRATION_ID];
  59659. +/* defined in fs/reiser4/plugin/txmod.c */
  59660. +extern txmod_plugin txmod_plugins[LAST_TXMOD_ID];
  59661. +/* defined in fs/reiser4/plugin/crypt.c */
  59662. +extern cipher_plugin cipher_plugins[LAST_CIPHER_ID];
  59663. +/* defined in fs/reiser4/plugin/digest.c */
  59664. +extern digest_plugin digest_plugins[LAST_DIGEST_ID];
  59665. +/* defined in fs/reiser4/plugin/compress/compress.c */
  59666. +extern compression_plugin compression_plugins[LAST_COMPRESSION_ID];
  59667. +/* defined in fs/reiser4/plugin/compress/compression_mode.c */
  59668. +extern compression_mode_plugin
  59669. +compression_mode_plugins[LAST_COMPRESSION_MODE_ID];
  59670. +/* defined in fs/reiser4/plugin/cluster.c */
  59671. +extern cluster_plugin cluster_plugins[LAST_CLUSTER_ID];
  59672. +/* defined in fs/reiser4/plugin/tail.c */
  59673. +extern formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID];
  59674. +/* defined in fs/reiser4/plugin/security/security.c */
  59675. +extern perm_plugin perm_plugins[LAST_PERM_ID];
  59676. +/* defined in fs/reiser4/plugin/item/item.c */
  59677. +extern item_plugin item_plugins[LAST_ITEM_ID];
  59678. +/* defined in fs/reiser4/plugin/node/node.c */
  59679. +extern node_plugin node_plugins[LAST_NODE_ID];
  59680. +/* defined in fs/reiser4/plugin/disk_format/disk_format.c */
  59681. +extern disk_format_plugin format_plugins[LAST_FORMAT_ID];
  59682. +
  59683. +/* __FS_REISER4_PLUGIN_TYPES_H__ */
  59684. +#endif
  59685. +
  59686. +/* Make Linus happy.
  59687. + Local variables:
  59688. + c-indentation-style: "K&R"
  59689. + mode-name: "LC"
  59690. + c-basic-offset: 8
  59691. + tab-width: 8
  59692. + fill-column: 120
  59693. + End:
  59694. +*/
  59695. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/plugin_header.h linux-5.16.14/fs/reiser4/plugin/plugin_header.h
  59696. --- linux-5.16.14.orig/fs/reiser4/plugin/plugin_header.h 1970-01-01 01:00:00.000000000 +0100
  59697. +++ linux-5.16.14/fs/reiser4/plugin/plugin_header.h 2022-03-12 13:26:19.684892806 +0100
  59698. @@ -0,0 +1,150 @@
  59699. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  59700. +
  59701. +/* plugin header. Data structures required by all plugin types. */
  59702. +
  59703. +#if !defined(__PLUGIN_HEADER_H__)
  59704. +#define __PLUGIN_HEADER_H__
  59705. +
  59706. +/* plugin data-types and constants */
  59707. +
  59708. +#include "../debug.h"
  59709. +#include "../dformat.h"
  59710. +
  59711. +/* The list of Reiser4 interfaces */
  59712. +typedef enum {
  59713. + REISER4_FILE_PLUGIN_TYPE, /* manage VFS objects */
  59714. + REISER4_DIR_PLUGIN_TYPE, /* manage directories */
  59715. + REISER4_ITEM_PLUGIN_TYPE, /* manage items */
  59716. + REISER4_NODE_PLUGIN_TYPE, /* manage formatted nodes */
  59717. + REISER4_HASH_PLUGIN_TYPE, /* hash methods */
  59718. + REISER4_FIBRATION_PLUGIN_TYPE, /* directory fibrations */
  59719. + REISER4_FORMATTING_PLUGIN_TYPE, /* dispatching policy */
  59720. + REISER4_PERM_PLUGIN_TYPE, /* stub (vacancy) */
  59721. + REISER4_SD_EXT_PLUGIN_TYPE, /* manage stat-data extensions */
  59722. + REISER4_FORMAT_PLUGIN_TYPE, /* disk format specifications */
  59723. + REISER4_JNODE_PLUGIN_TYPE, /* manage in-memory headers */
  59724. + REISER4_CIPHER_PLUGIN_TYPE, /* cipher transform methods */
  59725. + REISER4_DIGEST_PLUGIN_TYPE, /* digest transform methods */
  59726. + REISER4_COMPRESSION_PLUGIN_TYPE, /* compression methods */
  59727. + REISER4_COMPRESSION_MODE_PLUGIN_TYPE, /* dispatching policies */
  59728. + REISER4_CLUSTER_PLUGIN_TYPE, /* manage logical clusters */
  59729. + REISER4_TXMOD_PLUGIN_TYPE, /* transaction models */
  59730. + REISER4_PLUGIN_TYPES
  59731. +} reiser4_plugin_type;
  59732. +
  59733. +/* Supported plugin groups */
  59734. +typedef enum {
  59735. + REISER4_DIRECTORY_FILE,
  59736. + REISER4_REGULAR_FILE,
  59737. + REISER4_SYMLINK_FILE,
  59738. + REISER4_SPECIAL_FILE,
  59739. +} file_plugin_group;
  59740. +
  59741. +struct reiser4_plugin_ops;
  59742. +/* generic plugin operations, supported by each
  59743. + plugin type. */
  59744. +typedef struct reiser4_plugin_ops reiser4_plugin_ops;
  59745. +
  59746. +/* the common part of all plugin instances. */
  59747. +typedef struct plugin_header {
  59748. + /* plugin type */
  59749. + reiser4_plugin_type type_id;
  59750. + /* id of this plugin */
  59751. + reiser4_plugin_id id;
  59752. + /* bitmask of groups the plugin belongs to. */
  59753. + reiser4_plugin_groups groups;
  59754. + /* plugin operations */
  59755. + reiser4_plugin_ops *pops;
  59756. +/* NIKITA-FIXME-HANS: usage of and access to label and desc is not commented and
  59757. + * defined. */
  59758. + /* short label of this plugin */
  59759. + const char *label;
  59760. + /* descriptive string.. */
  59761. + const char *desc;
  59762. + /* list linkage */
  59763. + struct list_head linkage;
  59764. +} plugin_header;
  59765. +
  59766. +#define plugin_of_group(plug, group) (plug->h.groups & (1 << group))
  59767. +
  59768. +/* PRIVATE INTERFACES */
  59769. +/* NIKITA-FIXME-HANS: what is this for and why does it duplicate what is in
  59770. + * plugin_header? */
  59771. +/* plugin type representation. */
  59772. +struct reiser4_plugin_type_data {
  59773. + /* internal plugin type identifier. Should coincide with
  59774. + index of this item in plugins[] array. */
  59775. + reiser4_plugin_type type_id;
  59776. + /* short symbolic label of this plugin type. Should be no longer
  59777. + than MAX_PLUGIN_TYPE_LABEL_LEN characters including '\0'. */
  59778. + const char *label;
  59779. + /* plugin type description longer than .label */
  59780. + const char *desc;
  59781. +
  59782. +/* NIKITA-FIXME-HANS: define built-in */
  59783. + /* number of built-in plugin instances of this type */
  59784. + int builtin_num;
  59785. + /* array of built-in plugins */
  59786. + void *builtin;
  59787. + struct list_head plugins_list;
  59788. + size_t size;
  59789. +};
  59790. +
  59791. +extern struct reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES];
  59792. +
  59793. +int is_plugin_type_valid(reiser4_plugin_type type);
  59794. +int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id);
  59795. +
  59796. +static inline reiser4_plugin *plugin_at(struct reiser4_plugin_type_data *ptype,
  59797. + int i)
  59798. +{
  59799. + char *builtin;
  59800. +
  59801. + builtin = ptype->builtin;
  59802. + return (reiser4_plugin *) (builtin + i * ptype->size);
  59803. +}
  59804. +
  59805. +/* return plugin by its @type_id and @id */
  59806. +static inline reiser4_plugin *plugin_by_id(reiser4_plugin_type type,
  59807. + reiser4_plugin_id id)
  59808. +{
  59809. + assert("nikita-1651", is_plugin_type_valid(type));
  59810. + assert("nikita-1652", is_plugin_id_valid(type, id));
  59811. + return plugin_at(&plugins[type], id);
  59812. +}
  59813. +
  59814. +extern reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type_id,
  59815. + reiser4_plugin_id id);
  59816. +
  59817. +/**
  59818. + * plugin_by_disk_id - get reiser4_plugin
  59819. + * @type_id: plugin type id
  59820. + * @did: plugin id in disk format
  59821. + *
  59822. + * Returns reiser4_plugin by plugin type id an dplugin_id.
  59823. + */
  59824. +static inline reiser4_plugin *plugin_by_disk_id(reiser4_tree * tree UNUSED_ARG,
  59825. + reiser4_plugin_type type_id,
  59826. + __le16 *plugin_id)
  59827. +{
  59828. + /*
  59829. + * what we should do properly is to maintain within each file-system a
  59830. + * dictionary that maps on-disk plugin ids to "universal" ids. This
  59831. + * dictionary will be resolved on mount time, so that this function
  59832. + * will perform just one additional array lookup.
  59833. + */
  59834. + return plugin_by_unsafe_id(type_id, le16_to_cpu(*plugin_id));
  59835. +}
  59836. +
  59837. +/* __PLUGIN_HEADER_H__ */
  59838. +#endif
  59839. +
  59840. +/*
  59841. + * Local variables:
  59842. + * c-indentation-style: "K&R"
  59843. + * mode-name: "LC"
  59844. + * c-basic-offset: 8
  59845. + * tab-width: 8
  59846. + * fill-column: 79
  59847. + * End:
  59848. + */
  59849. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/plugin_set.c linux-5.16.14/fs/reiser4/plugin/plugin_set.c
  59850. --- linux-5.16.14.orig/fs/reiser4/plugin/plugin_set.c 1970-01-01 01:00:00.000000000 +0100
  59851. +++ linux-5.16.14/fs/reiser4/plugin/plugin_set.c 2022-03-12 13:26:19.684892806 +0100
  59852. @@ -0,0 +1,387 @@
  59853. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  59854. + * reiser4/README */
  59855. +/* This file contains Reiser4 plugin set operations */
  59856. +
  59857. +/* plugin sets
  59858. + *
  59859. + * Each file in reiser4 is controlled by a whole set of plugins (file plugin,
  59860. + * directory plugin, hash plugin, tail policy plugin, security plugin, etc.)
  59861. + * assigned (inherited, deduced from mode bits, etc.) at creation time. This
  59862. + * set of plugins (so called pset) is described by structure plugin_set (see
  59863. + * plugin/plugin_set.h), which contains pointers to all required plugins.
  59864. + *
  59865. + * Children can inherit some pset members from their parent, however sometimes
  59866. + * it is useful to specify members different from parent ones. Since object's
  59867. + * pset can not be easily changed without fatal consequences, we use for this
  59868. + * purpose another special plugin table (so called hset, or heir set) described
  59869. + * by the same structure.
  59870. + *
  59871. + * Inode only stores a pointers to pset and hset. Different inodes with the
  59872. + * same set of pset (hset) members point to the same pset (hset). This is
  59873. + * archived by storing psets and hsets in global hash table. Races are avoided
  59874. + * by simple (and efficient so far) solution of never recycling psets, even
  59875. + * when last inode pointing to it is destroyed.
  59876. + */
  59877. +
  59878. +#include "../debug.h"
  59879. +#include "../super.h"
  59880. +#include "plugin_set.h"
  59881. +
  59882. +#include <linux/slab.h>
  59883. +#include <linux/stddef.h>
  59884. +
  59885. +/* slab for plugin sets */
  59886. +static struct kmem_cache *plugin_set_slab;
  59887. +
  59888. +static spinlock_t plugin_set_lock[8] __cacheline_aligned_in_smp = {
  59889. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[0]),
  59890. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[1]),
  59891. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[2]),
  59892. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[3]),
  59893. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[4]),
  59894. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[5]),
  59895. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[6]),
  59896. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[7])
  59897. +};
  59898. +
  59899. +/* hash table support */
  59900. +
  59901. +#define PS_TABLE_SIZE (32)
  59902. +
  59903. +static inline plugin_set *cast_to(const unsigned long *a)
  59904. +{
  59905. + return container_of(a, plugin_set, hashval);
  59906. +}
  59907. +
  59908. +static inline int pseq(const unsigned long *a1, const unsigned long *a2)
  59909. +{
  59910. + plugin_set *set1;
  59911. + plugin_set *set2;
  59912. +
  59913. + /* make sure fields are not missed in the code below */
  59914. + static_assert(sizeof *set1 ==
  59915. + sizeof set1->hashval +
  59916. + sizeof set1->link +
  59917. + sizeof set1->file +
  59918. + sizeof set1->dir +
  59919. + sizeof set1->perm +
  59920. + sizeof set1->formatting +
  59921. + sizeof set1->hash +
  59922. + sizeof set1->fibration +
  59923. + sizeof set1->sd +
  59924. + sizeof set1->dir_item +
  59925. + sizeof set1->cipher +
  59926. + sizeof set1->digest +
  59927. + sizeof set1->compression +
  59928. + sizeof set1->compression_mode +
  59929. + sizeof set1->cluster +
  59930. + sizeof set1->create);
  59931. +
  59932. + set1 = cast_to(a1);
  59933. + set2 = cast_to(a2);
  59934. + return
  59935. + set1->hashval == set2->hashval &&
  59936. + set1->file == set2->file &&
  59937. + set1->dir == set2->dir &&
  59938. + set1->perm == set2->perm &&
  59939. + set1->formatting == set2->formatting &&
  59940. + set1->hash == set2->hash &&
  59941. + set1->fibration == set2->fibration &&
  59942. + set1->sd == set2->sd &&
  59943. + set1->dir_item == set2->dir_item &&
  59944. + set1->cipher == set2->cipher &&
  59945. + set1->digest == set2->digest &&
  59946. + set1->compression == set2->compression &&
  59947. + set1->compression_mode == set2->compression_mode &&
  59948. + set1->cluster == set2->cluster &&
  59949. + set1->create == set2->create;
  59950. +}
  59951. +
  59952. +#define HASH_FIELD(hash, set, field) \
  59953. +({ \
  59954. + (hash) += (unsigned long)(set)->field >> 2; \
  59955. +})
  59956. +
  59957. +static inline unsigned long calculate_hash(const plugin_set * set)
  59958. +{
  59959. + unsigned long result;
  59960. +
  59961. + result = 0;
  59962. + HASH_FIELD(result, set, file);
  59963. + HASH_FIELD(result, set, dir);
  59964. + HASH_FIELD(result, set, perm);
  59965. + HASH_FIELD(result, set, formatting);
  59966. + HASH_FIELD(result, set, hash);
  59967. + HASH_FIELD(result, set, fibration);
  59968. + HASH_FIELD(result, set, sd);
  59969. + HASH_FIELD(result, set, dir_item);
  59970. + HASH_FIELD(result, set, cipher);
  59971. + HASH_FIELD(result, set, digest);
  59972. + HASH_FIELD(result, set, compression);
  59973. + HASH_FIELD(result, set, compression_mode);
  59974. + HASH_FIELD(result, set, cluster);
  59975. + HASH_FIELD(result, set, create);
  59976. + return result & (PS_TABLE_SIZE - 1);
  59977. +}
  59978. +
  59979. +static inline unsigned long
  59980. +pshash(ps_hash_table * table, const unsigned long *a)
  59981. +{
  59982. + return *a;
  59983. +}
  59984. +
  59985. +/* The hash table definition */
  59986. +#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
  59987. +#define KFREE(ptr, size) kfree(ptr)
  59988. +TYPE_SAFE_HASH_DEFINE(ps, plugin_set, unsigned long, hashval, link, pshash,
  59989. + pseq);
  59990. +#undef KFREE
  59991. +#undef KMALLOC
  59992. +
  59993. +static ps_hash_table ps_table;
  59994. +static plugin_set empty_set = {
  59995. + .hashval = 0,
  59996. + .file = NULL,
  59997. + .dir = NULL,
  59998. + .perm = NULL,
  59999. + .formatting = NULL,
  60000. + .hash = NULL,
  60001. + .fibration = NULL,
  60002. + .sd = NULL,
  60003. + .dir_item = NULL,
  60004. + .cipher = NULL,
  60005. + .digest = NULL,
  60006. + .compression = NULL,
  60007. + .compression_mode = NULL,
  60008. + .cluster = NULL,
  60009. + .create = NULL,
  60010. + .link = {NULL}
  60011. +};
  60012. +
  60013. +plugin_set *plugin_set_get_empty(void)
  60014. +{
  60015. + return &empty_set;
  60016. +}
  60017. +
  60018. +void plugin_set_put(plugin_set * set)
  60019. +{
  60020. +}
  60021. +
  60022. +static inline unsigned long *pset_field(plugin_set * set, int offset)
  60023. +{
  60024. + return (unsigned long *)(((char *)set) + offset);
  60025. +}
  60026. +
  60027. +static int plugin_set_field(plugin_set ** set, const unsigned long val,
  60028. + const int offset)
  60029. +{
  60030. + unsigned long *spot;
  60031. + spinlock_t *lock;
  60032. + plugin_set replica;
  60033. + plugin_set *twin;
  60034. + plugin_set *psal;
  60035. + plugin_set *orig;
  60036. +
  60037. + assert("nikita-2902", set != NULL);
  60038. + assert("nikita-2904", *set != NULL);
  60039. +
  60040. + spot = pset_field(*set, offset);
  60041. + if (unlikely(*spot == val))
  60042. + return 0;
  60043. +
  60044. + replica = *(orig = *set);
  60045. + *pset_field(&replica, offset) = val;
  60046. + replica.hashval = calculate_hash(&replica);
  60047. + rcu_read_lock();
  60048. + twin = ps_hash_find(&ps_table, &replica.hashval);
  60049. + if (unlikely(twin == NULL)) {
  60050. + rcu_read_unlock();
  60051. + psal = kmem_cache_alloc(plugin_set_slab,
  60052. + reiser4_ctx_gfp_mask_get());
  60053. + if (psal == NULL)
  60054. + return RETERR(-ENOMEM);
  60055. + *psal = replica;
  60056. + lock = &plugin_set_lock[replica.hashval & 7];
  60057. + spin_lock(lock);
  60058. + twin = ps_hash_find(&ps_table, &replica.hashval);
  60059. + if (likely(twin == NULL)) {
  60060. + *set = psal;
  60061. + ps_hash_insert_rcu(&ps_table, psal);
  60062. + } else {
  60063. + *set = twin;
  60064. + kmem_cache_free(plugin_set_slab, psal);
  60065. + }
  60066. + spin_unlock(lock);
  60067. + } else {
  60068. + rcu_read_unlock();
  60069. + *set = twin;
  60070. + }
  60071. + return 0;
  60072. +}
  60073. +
  60074. +static struct {
  60075. + int offset;
  60076. + reiser4_plugin_groups groups;
  60077. + reiser4_plugin_type type;
  60078. +} pset_descr[PSET_LAST] = {
  60079. + [PSET_FILE] = {
  60080. + .offset = offsetof(plugin_set, file),
  60081. + .type = REISER4_FILE_PLUGIN_TYPE,
  60082. + .groups = 0
  60083. + },
  60084. + [PSET_DIR] = {
  60085. + .offset = offsetof(plugin_set, dir),
  60086. + .type = REISER4_DIR_PLUGIN_TYPE,
  60087. + .groups = 0
  60088. + },
  60089. + [PSET_PERM] = {
  60090. + .offset = offsetof(plugin_set, perm),
  60091. + .type = REISER4_PERM_PLUGIN_TYPE,
  60092. + .groups = 0
  60093. + },
  60094. + [PSET_FORMATTING] = {
  60095. + .offset = offsetof(plugin_set, formatting),
  60096. + .type = REISER4_FORMATTING_PLUGIN_TYPE,
  60097. + .groups = 0
  60098. + },
  60099. + [PSET_HASH] = {
  60100. + .offset = offsetof(plugin_set, hash),
  60101. + .type = REISER4_HASH_PLUGIN_TYPE,
  60102. + .groups = 0
  60103. + },
  60104. + [PSET_FIBRATION] = {
  60105. + .offset = offsetof(plugin_set, fibration),
  60106. + .type = REISER4_FIBRATION_PLUGIN_TYPE,
  60107. + .groups = 0
  60108. + },
  60109. + [PSET_SD] = {
  60110. + .offset = offsetof(plugin_set, sd),
  60111. + .type = REISER4_ITEM_PLUGIN_TYPE,
  60112. + .groups = (1 << STAT_DATA_ITEM_TYPE)
  60113. + },
  60114. + [PSET_DIR_ITEM] = {
  60115. + .offset = offsetof(plugin_set, dir_item),
  60116. + .type = REISER4_ITEM_PLUGIN_TYPE,
  60117. + .groups = (1 << DIR_ENTRY_ITEM_TYPE)
  60118. + },
  60119. + [PSET_CIPHER] = {
  60120. + .offset = offsetof(plugin_set, cipher),
  60121. + .type = REISER4_CIPHER_PLUGIN_TYPE,
  60122. + .groups = 0
  60123. + },
  60124. + [PSET_DIGEST] = {
  60125. + .offset = offsetof(plugin_set, digest),
  60126. + .type = REISER4_DIGEST_PLUGIN_TYPE,
  60127. + .groups = 0
  60128. + },
  60129. + [PSET_COMPRESSION] = {
  60130. + .offset = offsetof(plugin_set, compression),
  60131. + .type = REISER4_COMPRESSION_PLUGIN_TYPE,
  60132. + .groups = 0
  60133. + },
  60134. + [PSET_COMPRESSION_MODE] = {
  60135. + .offset = offsetof(plugin_set, compression_mode),
  60136. + .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  60137. + .groups = 0
  60138. + },
  60139. + [PSET_CLUSTER] = {
  60140. + .offset = offsetof(plugin_set, cluster),
  60141. + .type = REISER4_CLUSTER_PLUGIN_TYPE,
  60142. + .groups = 0
  60143. + },
  60144. + [PSET_CREATE] = {
  60145. + .offset = offsetof(plugin_set, create),
  60146. + .type = REISER4_FILE_PLUGIN_TYPE,
  60147. + .groups = (1 << REISER4_REGULAR_FILE)
  60148. + }
  60149. +};
  60150. +
  60151. +#define DEFINE_PSET_OPS(PREFIX) \
  60152. + reiser4_plugin_type PREFIX##_member_to_type_unsafe(pset_member memb) \
  60153. +{ \
  60154. + if (memb > PSET_LAST) \
  60155. + return REISER4_PLUGIN_TYPES; \
  60156. + return pset_descr[memb].type; \
  60157. +} \
  60158. + \
  60159. +int PREFIX##_set_unsafe(plugin_set ** set, pset_member memb, \
  60160. + reiser4_plugin * plugin) \
  60161. +{ \
  60162. + assert("nikita-3492", set != NULL); \
  60163. + assert("nikita-3493", *set != NULL); \
  60164. + assert("nikita-3494", plugin != NULL); \
  60165. + assert("nikita-3495", 0 <= memb && memb < PSET_LAST); \
  60166. + assert("nikita-3496", plugin->h.type_id == pset_descr[memb].type); \
  60167. + \
  60168. + if (pset_descr[memb].groups) \
  60169. + if (!(pset_descr[memb].groups & plugin->h.groups)) \
  60170. + return -EINVAL; \
  60171. + \
  60172. + return plugin_set_field(set, \
  60173. + (unsigned long)plugin, pset_descr[memb].offset); \
  60174. +} \
  60175. + \
  60176. +reiser4_plugin *PREFIX##_get(plugin_set * set, pset_member memb) \
  60177. +{ \
  60178. + assert("nikita-3497", set != NULL); \
  60179. + assert("nikita-3498", 0 <= memb && memb < PSET_LAST); \
  60180. + \
  60181. + return *(reiser4_plugin **) (((char *)set) + pset_descr[memb].offset); \
  60182. +}
  60183. +
  60184. +DEFINE_PSET_OPS(aset);
  60185. +
  60186. +int set_plugin(plugin_set ** set, pset_member memb, reiser4_plugin * plugin)
  60187. +{
  60188. + return plugin_set_field(set,
  60189. + (unsigned long)plugin, pset_descr[memb].offset);
  60190. +}
  60191. +
  60192. +/**
  60193. + * init_plugin_set - create plugin set cache and hash table
  60194. + *
  60195. + * Initializes slab cache of plugin_set-s and their hash table. It is part of
  60196. + * reiser4 module initialization.
  60197. + */
  60198. +int init_plugin_set(void)
  60199. +{
  60200. + int result;
  60201. +
  60202. + result = ps_hash_init(&ps_table, PS_TABLE_SIZE);
  60203. + if (result == 0) {
  60204. + plugin_set_slab = kmem_cache_create("plugin_set",
  60205. + sizeof(plugin_set), 0,
  60206. + SLAB_HWCACHE_ALIGN,
  60207. + NULL);
  60208. + if (plugin_set_slab == NULL)
  60209. + result = RETERR(-ENOMEM);
  60210. + }
  60211. + return result;
  60212. +}
  60213. +
  60214. +/**
  60215. + * done_plugin_set - delete plugin_set cache and plugin_set hash table
  60216. + *
  60217. + * This is called on reiser4 module unloading or system shutdown.
  60218. + */
  60219. +void done_plugin_set(void)
  60220. +{
  60221. + plugin_set *cur, *next;
  60222. +
  60223. + for_all_in_htable(&ps_table, ps, cur, next) {
  60224. + ps_hash_remove(&ps_table, cur);
  60225. + kmem_cache_free(plugin_set_slab, cur);
  60226. + }
  60227. + destroy_reiser4_cache(&plugin_set_slab);
  60228. + ps_hash_done(&ps_table);
  60229. +}
  60230. +
  60231. +/*
  60232. + * Local variables:
  60233. + * c-indentation-style: "K&R"
  60234. + * mode-name: "LC"
  60235. + * c-basic-offset: 8
  60236. + * tab-width: 8
  60237. + * fill-column: 120
  60238. + * End:
  60239. + */
  60240. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/plugin_set.h linux-5.16.14/fs/reiser4/plugin/plugin_set.h
  60241. --- linux-5.16.14.orig/fs/reiser4/plugin/plugin_set.h 1970-01-01 01:00:00.000000000 +0100
  60242. +++ linux-5.16.14/fs/reiser4/plugin/plugin_set.h 2022-03-12 13:26:19.684892806 +0100
  60243. @@ -0,0 +1,78 @@
  60244. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  60245. + * reiser4/README */
  60246. +
  60247. +/* Reiser4 plugin set definition.
  60248. + See fs/reiser4/plugin/plugin_set.c for details */
  60249. +
  60250. +#if !defined(__PLUGIN_SET_H__)
  60251. +#define __PLUGIN_SET_H__
  60252. +
  60253. +#include "../type_safe_hash.h"
  60254. +#include "plugin.h"
  60255. +
  60256. +#include <linux/rcupdate.h>
  60257. +
  60258. +struct plugin_set;
  60259. +typedef struct plugin_set plugin_set;
  60260. +
  60261. +TYPE_SAFE_HASH_DECLARE(ps, plugin_set);
  60262. +
  60263. +struct plugin_set {
  60264. + unsigned long hashval;
  60265. + /* plugin of file */
  60266. + file_plugin *file;
  60267. + /* plugin of dir */
  60268. + dir_plugin *dir;
  60269. + /* perm plugin for this file */
  60270. + perm_plugin *perm;
  60271. + /* tail policy plugin. Only meaningful for regular files */
  60272. + formatting_plugin *formatting;
  60273. + /* hash plugin. Only meaningful for directories. */
  60274. + hash_plugin *hash;
  60275. + /* fibration plugin. Only meaningful for directories. */
  60276. + fibration_plugin *fibration;
  60277. + /* plugin of stat-data */
  60278. + item_plugin *sd;
  60279. + /* plugin of items a directory is built of */
  60280. + item_plugin *dir_item;
  60281. + /* cipher plugin */
  60282. + cipher_plugin *cipher;
  60283. + /* digest plugin */
  60284. + digest_plugin *digest;
  60285. + /* compression plugin */
  60286. + compression_plugin *compression;
  60287. + /* compression mode plugin */
  60288. + compression_mode_plugin *compression_mode;
  60289. + /* cluster plugin */
  60290. + cluster_plugin *cluster;
  60291. + /* this specifies file plugin of regular children.
  60292. + only meaningful for directories */
  60293. + file_plugin *create;
  60294. + ps_hash_link link;
  60295. +};
  60296. +
  60297. +extern plugin_set *plugin_set_get_empty(void);
  60298. +extern void plugin_set_put(plugin_set * set);
  60299. +
  60300. +extern int init_plugin_set(void);
  60301. +extern void done_plugin_set(void);
  60302. +
  60303. +extern reiser4_plugin *aset_get(plugin_set * set, pset_member memb);
  60304. +extern int set_plugin(plugin_set ** set, pset_member memb,
  60305. + reiser4_plugin * plugin);
  60306. +extern int aset_set_unsafe(plugin_set ** set, pset_member memb,
  60307. + reiser4_plugin * plugin);
  60308. +extern reiser4_plugin_type aset_member_to_type_unsafe(pset_member memb);
  60309. +
  60310. +/* __PLUGIN_SET_H__ */
  60311. +#endif
  60312. +
  60313. +/* Make Linus happy.
  60314. + Local variables:
  60315. + c-indentation-style: "K&R"
  60316. + mode-name: "LC"
  60317. + c-basic-offset: 8
  60318. + tab-width: 8
  60319. + fill-column: 120
  60320. + End:
  60321. +*/
  60322. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/security/Makefile linux-5.16.14/fs/reiser4/plugin/security/Makefile
  60323. --- linux-5.16.14.orig/fs/reiser4/plugin/security/Makefile 1970-01-01 01:00:00.000000000 +0100
  60324. +++ linux-5.16.14/fs/reiser4/plugin/security/Makefile 2022-03-12 13:26:19.685892809 +0100
  60325. @@ -0,0 +1,6 @@
  60326. +
  60327. +MODULE := security_plugins
  60328. +
  60329. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  60330. +
  60331. +$(MODULE)-objs += perm.o
  60332. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/security/perm.c linux-5.16.14/fs/reiser4/plugin/security/perm.c
  60333. --- linux-5.16.14.orig/fs/reiser4/plugin/security/perm.c 1970-01-01 01:00:00.000000000 +0100
  60334. +++ linux-5.16.14/fs/reiser4/plugin/security/perm.c 2022-03-12 13:26:19.685892809 +0100
  60335. @@ -0,0 +1,33 @@
  60336. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  60337. +
  60338. +/*
  60339. + * This file contains implementation of permission plugins.
  60340. + * See the comments in perm.h
  60341. + */
  60342. +
  60343. +#include "../plugin.h"
  60344. +#include "../plugin_header.h"
  60345. +#include "../../debug.h"
  60346. +
  60347. +perm_plugin perm_plugins[LAST_PERM_ID] = {
  60348. + [NULL_PERM_ID] = {
  60349. + .h = {
  60350. + .type_id = REISER4_PERM_PLUGIN_TYPE,
  60351. + .id = NULL_PERM_ID,
  60352. + .pops = NULL,
  60353. + .label = "null",
  60354. + .desc = "stub permission plugin",
  60355. + .linkage = {NULL, NULL}
  60356. + }
  60357. + }
  60358. +};
  60359. +
  60360. +/*
  60361. + * Local variables:
  60362. + * c-indentation-style: "K&R"
  60363. + * mode-name: "LC"
  60364. + * c-basic-offset: 8
  60365. + * tab-width: 8
  60366. + * fill-column: 79
  60367. + * End:
  60368. + */
  60369. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/security/perm.h linux-5.16.14/fs/reiser4/plugin/security/perm.h
  60370. --- linux-5.16.14.orig/fs/reiser4/plugin/security/perm.h 1970-01-01 01:00:00.000000000 +0100
  60371. +++ linux-5.16.14/fs/reiser4/plugin/security/perm.h 2022-03-12 13:26:19.685892809 +0100
  60372. @@ -0,0 +1,38 @@
  60373. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  60374. +
  60375. +/* Perm (short for "permissions") plugins common stuff. */
  60376. +
  60377. +#if !defined( __REISER4_PERM_H__ )
  60378. +#define __REISER4_PERM_H__
  60379. +
  60380. +#include "../../forward.h"
  60381. +#include "../plugin_header.h"
  60382. +
  60383. +#include <linux/types.h>
  60384. +
  60385. +/* Definition of permission plugin */
  60386. +/* NIKITA-FIXME-HANS: define what this is targeted for.
  60387. + It does not seem to be intended for use with sys_reiser4. Explain. */
  60388. +
  60389. +/* NOTE-EDWARD: This seems to be intended for deprecated sys_reiser4.
  60390. + Consider it like a temporary "seam" and reserved pset member.
  60391. + If you have something usefull to add, then rename this plugin and add here */
  60392. +typedef struct perm_plugin {
  60393. + /* generic plugin fields */
  60394. + plugin_header h;
  60395. +} perm_plugin;
  60396. +
  60397. +typedef enum { NULL_PERM_ID, LAST_PERM_ID } reiser4_perm_id;
  60398. +
  60399. +/* __REISER4_PERM_H__ */
  60400. +#endif
  60401. +
  60402. +/* Make Linus happy.
  60403. + Local variables:
  60404. + c-indentation-style: "K&R"
  60405. + mode-name: "LC"
  60406. + c-basic-offset: 8
  60407. + tab-width: 8
  60408. + fill-column: 120
  60409. + End:
  60410. +*/
  60411. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/space/bitmap.c linux-5.16.14/fs/reiser4/plugin/space/bitmap.c
  60412. --- linux-5.16.14.orig/fs/reiser4/plugin/space/bitmap.c 1970-01-01 01:00:00.000000000 +0100
  60413. +++ linux-5.16.14/fs/reiser4/plugin/space/bitmap.c 2022-03-12 13:26:19.685892809 +0100
  60414. @@ -0,0 +1,1609 @@
  60415. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  60416. +
  60417. +#include "../../debug.h"
  60418. +#include "../../dformat.h"
  60419. +#include "../../txnmgr.h"
  60420. +#include "../../jnode.h"
  60421. +#include "../../block_alloc.h"
  60422. +#include "../../tree.h"
  60423. +#include "../../super.h"
  60424. +#include "../plugin.h"
  60425. +#include "space_allocator.h"
  60426. +#include "bitmap.h"
  60427. +
  60428. +#include <linux/types.h>
  60429. +#include <linux/fs.h> /* for struct super_block */
  60430. +#include <linux/mutex.h>
  60431. +#include <asm/div64.h>
  60432. +
  60433. +/* Proposed (but discarded) optimization: dynamic loading/unloading of bitmap
  60434. + * blocks
  60435. +
  60436. + A useful optimization of reiser4 bitmap handling would be dynamic bitmap
  60437. + blocks loading/unloading which is different from v3.x where all bitmap
  60438. + blocks are loaded at mount time.
  60439. +
  60440. + To implement bitmap blocks unloading we need to count bitmap block usage
  60441. + and detect currently unused blocks allowing them to be unloaded. It is not
  60442. + a simple task since we allow several threads to modify one bitmap block
  60443. + simultaneously.
  60444. +
  60445. + Briefly speaking, the following schema is proposed: we count in special
  60446. + variable associated with each bitmap block. That is for counting of block
  60447. + alloc/dealloc operations on that bitmap block. With a deferred block
  60448. + deallocation feature of reiser4 all those operation will be represented in
  60449. + atom dirty/deleted lists as jnodes for freshly allocated or deleted
  60450. + nodes.
  60451. +
  60452. + So, we increment usage counter for each new node allocated or deleted, and
  60453. + decrement it at atom commit one time for each node from the dirty/deleted
  60454. + atom's list. Of course, freshly allocated node deletion and node reusing
  60455. + from atom deleted (if we do so) list should decrement bitmap usage counter
  60456. + also.
  60457. +
  60458. + This schema seems to be working but that reference counting is
  60459. + not easy to debug. I think we should agree with Hans and do not implement
  60460. + it in v4.0. Current code implements "on-demand" bitmap blocks loading only.
  60461. +
  60462. + For simplicity all bitmap nodes (both commit and working bitmap blocks) are
  60463. + loaded into memory on fs mount time or each bitmap nodes are loaded at the
  60464. + first access to it, the "dont_load_bitmap" mount option controls whether
  60465. + bimtap nodes should be loaded at mount time. Dynamic unloading of bitmap
  60466. + nodes currently is not supported. */
  60467. +
  60468. +#define CHECKSUM_SIZE 4
  60469. +
  60470. +#define BYTES_PER_LONG (sizeof(long))
  60471. +
  60472. +#if BITS_PER_LONG == 64
  60473. +# define LONG_INT_SHIFT (6)
  60474. +#else
  60475. +# define LONG_INT_SHIFT (5)
  60476. +#endif
  60477. +
  60478. +#define LONG_INT_MASK (BITS_PER_LONG - 1UL)
  60479. +
  60480. +typedef unsigned long ulong_t;
  60481. +
  60482. +#define bmap_size(blocksize) ((blocksize) - CHECKSUM_SIZE)
  60483. +#define bmap_bit_count(blocksize) (bmap_size(blocksize) << 3)
  60484. +
  60485. +/* Block allocation/deallocation are done through special bitmap objects which
  60486. + are allocated in an array at fs mount. */
  60487. +struct bitmap_node {
  60488. + struct mutex mutex; /* long term lock object */
  60489. +
  60490. + jnode *wjnode; /* j-nodes for WORKING ... */
  60491. + jnode *cjnode; /* ... and COMMIT bitmap blocks */
  60492. +
  60493. + bmap_off_t first_zero_bit; /* for skip_busy option implementation */
  60494. +
  60495. + atomic_t loaded; /* a flag which shows that bnode is loaded
  60496. + * already */
  60497. +};
  60498. +
  60499. +static inline char *bnode_working_data(struct bitmap_node *bnode)
  60500. +{
  60501. + char *data;
  60502. +
  60503. + data = jdata(bnode->wjnode);
  60504. + assert("zam-429", data != NULL);
  60505. +
  60506. + return data + CHECKSUM_SIZE;
  60507. +}
  60508. +
  60509. +static inline char *bnode_commit_data(const struct bitmap_node *bnode)
  60510. +{
  60511. + char *data;
  60512. +
  60513. + data = jdata(bnode->cjnode);
  60514. + assert("zam-430", data != NULL);
  60515. +
  60516. + return data + CHECKSUM_SIZE;
  60517. +}
  60518. +
  60519. +static inline __u32 bnode_commit_crc(const struct bitmap_node *bnode)
  60520. +{
  60521. + char *data;
  60522. +
  60523. + data = jdata(bnode->cjnode);
  60524. + assert("vpf-261", data != NULL);
  60525. +
  60526. + return le32_to_cpu(get_unaligned((d32 *)data));
  60527. +}
  60528. +
  60529. +static inline void bnode_set_commit_crc(struct bitmap_node *bnode, __u32 crc)
  60530. +{
  60531. + char *data;
  60532. +
  60533. + data = jdata(bnode->cjnode);
  60534. + assert("vpf-261", data != NULL);
  60535. +
  60536. + put_unaligned(cpu_to_le32(crc), (d32 *)data);
  60537. +}
  60538. +
  60539. +/* ZAM-FIXME-HANS: is the idea that this might be a union someday? having
  60540. + * written the code, does this added abstraction still have */
  60541. +/* ANSWER(Zam): No, the abstractions is in the level above (exact place is the
  60542. + * reiser4_space_allocator structure) */
  60543. +/* ZAM-FIXME-HANS: I don't understand your english in comment above. */
  60544. +/* FIXME-HANS(Zam): I don't understand the questions like "might be a union
  60545. + * someday?". What they about? If there is a reason to have a union, it should
  60546. + * be a union, if not, it should not be a union. "..might be someday" means no
  60547. + * reason. */
  60548. +struct bitmap_allocator_data {
  60549. + /* an array for bitmap blocks direct access */
  60550. + struct bitmap_node *bitmap;
  60551. +};
  60552. +
  60553. +#define get_barray(super) \
  60554. +(((struct bitmap_allocator_data *)(get_super_private(super)->space_allocator.u.generic)) -> bitmap)
  60555. +
  60556. +#define get_bnode(super, i) (get_barray(super) + i)
  60557. +
  60558. +/* allocate and initialize jnode with JNODE_BITMAP type */
  60559. +static jnode *bnew(void)
  60560. +{
  60561. + jnode *jal = jalloc();
  60562. +
  60563. + if (jal)
  60564. + jnode_init(jal, current_tree, JNODE_BITMAP);
  60565. +
  60566. + return jal;
  60567. +}
  60568. +
  60569. +/* this file contains:
  60570. + - bitmap based implementation of space allocation plugin
  60571. + - all the helper functions like set bit, find_first_zero_bit, etc */
  60572. +
  60573. +/* Audited by: green(2002.06.12) */
  60574. +static int find_next_zero_bit_in_word(ulong_t word, int start_bit)
  60575. +{
  60576. + ulong_t mask = 1UL << start_bit;
  60577. + int i = start_bit;
  60578. +
  60579. + while ((word & mask) != 0) {
  60580. + mask <<= 1;
  60581. + if (++i >= BITS_PER_LONG)
  60582. + break;
  60583. + }
  60584. +
  60585. + return i;
  60586. +}
  60587. +
  60588. +#include <linux/bitops.h>
  60589. +
  60590. +#if BITS_PER_LONG == 64
  60591. +
  60592. +#define OFF(addr) (((ulong_t)(addr) & (BYTES_PER_LONG - 1)) << 3)
  60593. +#define BASE(addr) ((ulong_t*) ((ulong_t)(addr) & ~(BYTES_PER_LONG - 1)))
  60594. +
  60595. +static inline void reiser4_set_bit(int nr, void *addr)
  60596. +{
  60597. + __test_and_set_bit_le(nr + OFF(addr), BASE(addr));
  60598. +}
  60599. +
  60600. +static inline void reiser4_clear_bit(int nr, void *addr)
  60601. +{
  60602. + __test_and_clear_bit_le(nr + OFF(addr), BASE(addr));
  60603. +}
  60604. +
  60605. +static inline int reiser4_test_bit(int nr, void *addr)
  60606. +{
  60607. + return test_bit_le(nr + OFF(addr), BASE(addr));
  60608. +}
  60609. +static inline int reiser4_find_next_zero_bit(void *addr, int maxoffset,
  60610. + int offset)
  60611. +{
  60612. + int off = OFF(addr);
  60613. +
  60614. + return find_next_zero_bit_le(BASE(addr), maxoffset + off,
  60615. + offset + off) - off;
  60616. +}
  60617. +
  60618. +#else
  60619. +
  60620. +#define reiser4_set_bit(nr, addr) __test_and_set_bit_le(nr, addr)
  60621. +#define reiser4_clear_bit(nr, addr) __test_and_clear_bit_le(nr, addr)
  60622. +#define reiser4_test_bit(nr, addr) test_bit_le(nr, addr)
  60623. +
  60624. +#define reiser4_find_next_zero_bit(addr, maxoffset, offset) \
  60625. +find_next_zero_bit_le(addr, maxoffset, offset)
  60626. +#endif
  60627. +
  60628. +/* Search for a set bit in the bit array [@start_offset, @max_offset[, offsets
  60629. + * are counted from @addr, return the offset of the first bit if it is found,
  60630. + * @maxoffset otherwise. */
  60631. +static bmap_off_t __reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset,
  60632. + bmap_off_t start_offset)
  60633. +{
  60634. + ulong_t *base = addr;
  60635. + /* start_offset is in bits, convert it to byte offset within bitmap. */
  60636. + int word_nr = start_offset >> LONG_INT_SHIFT;
  60637. + /* bit number within the byte. */
  60638. + int bit_nr = start_offset & LONG_INT_MASK;
  60639. + int max_word_nr = (max_offset - 1) >> LONG_INT_SHIFT;
  60640. +
  60641. + assert("zam-387", max_offset != 0);
  60642. +
  60643. + /* Unaligned @start_offset case. */
  60644. + if (bit_nr != 0) {
  60645. + bmap_nr_t nr;
  60646. +
  60647. + nr = find_next_zero_bit_in_word(~(base[word_nr]), bit_nr);
  60648. +
  60649. + if (nr < BITS_PER_LONG)
  60650. + return (word_nr << LONG_INT_SHIFT) + nr;
  60651. +
  60652. + ++word_nr;
  60653. + }
  60654. +
  60655. + /* Fast scan trough aligned words. */
  60656. + while (word_nr <= max_word_nr) {
  60657. + if (base[word_nr] != 0) {
  60658. + return (word_nr << LONG_INT_SHIFT)
  60659. + + find_next_zero_bit_in_word(~(base[word_nr]), 0);
  60660. + }
  60661. +
  60662. + ++word_nr;
  60663. + }
  60664. +
  60665. + return max_offset;
  60666. +}
  60667. +
  60668. +#if BITS_PER_LONG == 64
  60669. +
  60670. +static bmap_off_t reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset,
  60671. + bmap_off_t start_offset)
  60672. +{
  60673. + bmap_off_t off = OFF(addr);
  60674. +
  60675. + return __reiser4_find_next_set_bit(BASE(addr), max_offset + off,
  60676. + start_offset + off) - off;
  60677. +}
  60678. +
  60679. +#else
  60680. +#define reiser4_find_next_set_bit(addr, max_offset, start_offset) \
  60681. + __reiser4_find_next_set_bit(addr, max_offset, start_offset)
  60682. +#endif
  60683. +
  60684. +/* search for the first set bit in single word. */
  60685. +static int find_last_set_bit_in_word(ulong_t word, int start_bit)
  60686. +{
  60687. + ulong_t bit_mask;
  60688. + int nr = start_bit;
  60689. +
  60690. + assert("zam-965", start_bit < BITS_PER_LONG);
  60691. + assert("zam-966", start_bit >= 0);
  60692. +
  60693. + bit_mask = (1UL << nr);
  60694. +
  60695. + while (bit_mask != 0) {
  60696. + if (bit_mask & word)
  60697. + return nr;
  60698. + bit_mask >>= 1;
  60699. + nr--;
  60700. + }
  60701. + return BITS_PER_LONG;
  60702. +}
  60703. +
  60704. +/* Search bitmap for a set bit in backward direction from the end to the
  60705. + * beginning of given region
  60706. + *
  60707. + * @result: result offset of the last set bit
  60708. + * @addr: base memory address,
  60709. + * @low_off: low end of the search region, edge bit included into the region,
  60710. + * @high_off: high end of the search region, edge bit included into the region,
  60711. + *
  60712. + * @return: 0 - set bit was found, -1 otherwise.
  60713. + */
  60714. +static int
  60715. +reiser4_find_last_set_bit(bmap_off_t * result, void *addr, bmap_off_t low_off,
  60716. + bmap_off_t high_off)
  60717. +{
  60718. + ulong_t *base = addr;
  60719. + int last_word;
  60720. + int first_word;
  60721. + int last_bit;
  60722. + int nr;
  60723. +
  60724. + assert("zam-962", high_off >= low_off);
  60725. +
  60726. + last_word = high_off >> LONG_INT_SHIFT;
  60727. + last_bit = high_off & LONG_INT_MASK;
  60728. + first_word = low_off >> LONG_INT_SHIFT;
  60729. +
  60730. + if (last_bit < BITS_PER_LONG) {
  60731. + nr = find_last_set_bit_in_word(base[last_word], last_bit);
  60732. + if (nr < BITS_PER_LONG) {
  60733. + *result = (last_word << LONG_INT_SHIFT) + nr;
  60734. + return 0;
  60735. + }
  60736. + --last_word;
  60737. + }
  60738. + while (last_word >= first_word) {
  60739. + if (base[last_word] != 0x0) {
  60740. + last_bit =
  60741. + find_last_set_bit_in_word(base[last_word],
  60742. + BITS_PER_LONG - 1);
  60743. + assert("zam-972", last_bit < BITS_PER_LONG);
  60744. + *result = (last_word << LONG_INT_SHIFT) + last_bit;
  60745. + return 0;
  60746. + }
  60747. + --last_word;
  60748. + }
  60749. +
  60750. + return -1; /* set bit not found */
  60751. +}
  60752. +
  60753. +/* Search bitmap for a clear bit in backward direction from the end to the
  60754. + * beginning of given region */
  60755. +static int
  60756. +reiser4_find_last_zero_bit(bmap_off_t * result, void *addr, bmap_off_t low_off,
  60757. + bmap_off_t high_off)
  60758. +{
  60759. + ulong_t *base = addr;
  60760. + int last_word;
  60761. + int first_word;
  60762. + int last_bit;
  60763. + int nr;
  60764. +
  60765. + last_word = high_off >> LONG_INT_SHIFT;
  60766. + last_bit = high_off & LONG_INT_MASK;
  60767. + first_word = low_off >> LONG_INT_SHIFT;
  60768. +
  60769. + if (last_bit < BITS_PER_LONG) {
  60770. + nr = find_last_set_bit_in_word(~base[last_word], last_bit);
  60771. + if (nr < BITS_PER_LONG) {
  60772. + *result = (last_word << LONG_INT_SHIFT) + nr;
  60773. + return 0;
  60774. + }
  60775. + --last_word;
  60776. + }
  60777. + while (last_word >= first_word) {
  60778. + if (base[last_word] != (ulong_t) (-1)) {
  60779. + *result = (last_word << LONG_INT_SHIFT) +
  60780. + find_last_set_bit_in_word(~base[last_word],
  60781. + BITS_PER_LONG - 1);
  60782. + return 0;
  60783. + }
  60784. + --last_word;
  60785. + }
  60786. +
  60787. + return -1; /* zero bit not found */
  60788. +}
  60789. +
  60790. +/* Audited by: green(2002.06.12) */
  60791. +static void reiser4_clear_bits(char *addr, bmap_off_t start, bmap_off_t end)
  60792. +{
  60793. + int first_byte;
  60794. + int last_byte;
  60795. +
  60796. + unsigned char first_byte_mask = 0xFF;
  60797. + unsigned char last_byte_mask = 0xFF;
  60798. +
  60799. + assert("zam-410", start < end);
  60800. +
  60801. + first_byte = start >> 3;
  60802. + last_byte = (end - 1) >> 3;
  60803. +
  60804. + if (last_byte > first_byte + 1)
  60805. + memset(addr + first_byte + 1, 0,
  60806. + (size_t) (last_byte - first_byte - 1));
  60807. +
  60808. + first_byte_mask >>= 8 - (start & 0x7);
  60809. + last_byte_mask <<= ((end - 1) & 0x7) + 1;
  60810. +
  60811. + if (first_byte == last_byte) {
  60812. + addr[first_byte] &= (first_byte_mask | last_byte_mask);
  60813. + } else {
  60814. + addr[first_byte] &= first_byte_mask;
  60815. + addr[last_byte] &= last_byte_mask;
  60816. + }
  60817. +}
  60818. +
  60819. +/* Audited by: green(2002.06.12) */
  60820. +/* ZAM-FIXME-HANS: comment this */
  60821. +static void reiser4_set_bits(char *addr, bmap_off_t start, bmap_off_t end)
  60822. +{
  60823. + int first_byte;
  60824. + int last_byte;
  60825. +
  60826. + unsigned char first_byte_mask = 0xFF;
  60827. + unsigned char last_byte_mask = 0xFF;
  60828. +
  60829. + assert("zam-386", start < end);
  60830. +
  60831. + first_byte = start >> 3;
  60832. + last_byte = (end - 1) >> 3;
  60833. +
  60834. + if (last_byte > first_byte + 1)
  60835. + memset(addr + first_byte + 1, 0xFF,
  60836. + (size_t) (last_byte - first_byte - 1));
  60837. +
  60838. + first_byte_mask <<= start & 0x7;
  60839. + last_byte_mask >>= 7 - ((end - 1) & 0x7);
  60840. +
  60841. + if (first_byte == last_byte) {
  60842. + addr[first_byte] |= (first_byte_mask & last_byte_mask);
  60843. + } else {
  60844. + addr[first_byte] |= first_byte_mask;
  60845. + addr[last_byte] |= last_byte_mask;
  60846. + }
  60847. +}
  60848. +
  60849. +#define ADLER_BASE 65521
  60850. +#define ADLER_NMAX 5552
  60851. +
  60852. +/* Calculates the adler32 checksum for the data pointed by `data` of the
  60853. + length `len`. This function was originally taken from zlib, version 1.1.3,
  60854. + July 9th, 1998.
  60855. +
  60856. + Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
  60857. +
  60858. + This software is provided 'as-is', without any express or implied
  60859. + warranty. In no event will the authors be held liable for any damages
  60860. + arising from the use of this software.
  60861. +
  60862. + Permission is granted to anyone to use this software for any purpose,
  60863. + including commercial applications, and to alter it and redistribute it
  60864. + freely, subject to the following restrictions:
  60865. +
  60866. + 1. The origin of this software must not be misrepresented; you must not
  60867. + claim that you wrote the original software. If you use this software
  60868. + in a product, an acknowledgment in the product documentation would be
  60869. + appreciated but is not required.
  60870. + 2. Altered source versions must be plainly marked as such, and must not be
  60871. + misrepresented as being the original software.
  60872. + 3. This notice may not be removed or altered from any source distribution.
  60873. +
  60874. + Jean-loup Gailly Mark Adler
  60875. + jloup@gzip.org madler@alumni.caltech.edu
  60876. +
  60877. + The above comment applies only to the reiser4_adler32 function.
  60878. +*/
  60879. +
  60880. +__u32 reiser4_adler32(char *data, __u32 len)
  60881. +{
  60882. + unsigned char *t = data;
  60883. + __u32 s1 = 1;
  60884. + __u32 s2 = 0;
  60885. + int k;
  60886. +
  60887. + while (len > 0) {
  60888. + k = len < ADLER_NMAX ? len : ADLER_NMAX;
  60889. + len -= k;
  60890. +
  60891. + while (k--) {
  60892. + s1 += *t++;
  60893. + s2 += s1;
  60894. + }
  60895. +
  60896. + s1 %= ADLER_BASE;
  60897. + s2 %= ADLER_BASE;
  60898. + }
  60899. + return (s2 << 16) | s1;
  60900. +}
  60901. +
  60902. +#define sb_by_bnode(bnode) \
  60903. + ((struct super_block *)jnode_get_tree(bnode->wjnode)->super)
  60904. +
  60905. +static __u32 bnode_calc_crc(const struct bitmap_node *bnode, unsigned long size)
  60906. +{
  60907. + return reiser4_adler32(bnode_commit_data(bnode), bmap_size(size));
  60908. +}
  60909. +
  60910. +static int
  60911. +bnode_check_adler32(const struct bitmap_node *bnode, unsigned long size)
  60912. +{
  60913. + if (bnode_calc_crc(bnode, size) != bnode_commit_crc(bnode)) {
  60914. + bmap_nr_t bmap;
  60915. +
  60916. + bmap = bnode - get_bnode(sb_by_bnode(bnode), 0);
  60917. +
  60918. + warning("vpf-263",
  60919. + "Checksum for the bitmap block %llu is incorrect",
  60920. + bmap);
  60921. +
  60922. + return RETERR(-EIO);
  60923. + }
  60924. +
  60925. + return 0;
  60926. +}
  60927. +
  60928. +#define REISER4_CHECK_BMAP_CRC (0)
  60929. +
  60930. +#if REISER4_CHECK_BMAP_CRC
  60931. +static int bnode_check_crc(const struct bitmap_node *bnode)
  60932. +{
  60933. + return bnode_check_adler32(bnode,
  60934. + bmap_size(sb_by_bnode(bnode)->s_blocksize));
  60935. +}
  60936. +
  60937. +/* REISER4_CHECK_BMAP_CRC */
  60938. +#else
  60939. +
  60940. +#define bnode_check_crc(bnode) (0)
  60941. +
  60942. +/* REISER4_CHECK_BMAP_CRC */
  60943. +#endif
  60944. +
  60945. +/* Recalculates the adler32 checksum for only 1 byte change.
  60946. + adler - previous adler checksum
  60947. + old_data, data - old, new byte values.
  60948. + tail == (chunk - offset) : length, checksum was calculated for, - offset of
  60949. + the changed byte within this chunk.
  60950. + This function can be used for checksum calculation optimisation.
  60951. +*/
  60952. +
  60953. +static __u32
  60954. +adler32_recalc(__u32 adler, unsigned char old_data, unsigned char data,
  60955. + __u32 tail)
  60956. +{
  60957. + __u32 delta = data - old_data + 2 * ADLER_BASE;
  60958. + __u32 s1 = adler & 0xffff;
  60959. + __u32 s2 = (adler >> 16) & 0xffff;
  60960. +
  60961. + s1 = (delta + s1) % ADLER_BASE;
  60962. + s2 = (delta * tail + s2) % ADLER_BASE;
  60963. +
  60964. + return (s2 << 16) | s1;
  60965. +}
  60966. +
  60967. +#define LIMIT(val, boundary) ((val) > (boundary) ? (boundary) : (val))
  60968. +
  60969. +/**
  60970. + * get_nr_bitmap - calculate number of bitmap blocks
  60971. + * @super: super block with initialized blocksize and block count
  60972. + *
  60973. + * Calculates number of bitmap blocks of a filesystem which uses bitmaps to
  60974. + * maintain free disk space. It assumes that each bitmap addresses the same
  60975. + * number of blocks which is calculated by bmap_block_count macro defined in
  60976. + * above. Number of blocks in the filesystem has to be initialized in reiser4
  60977. + * private data of super block already so that it can be obtained via
  60978. + * reiser4_block_count(). Unfortunately, number of blocks addressed by a bitmap
  60979. + * is not power of 2 because 4 bytes are used for checksum. Therefore, we have
  60980. + * to use special function to divide and modulo 64bits filesystem block
  60981. + * counters.
  60982. + *
  60983. + * Example: suppose filesystem have 32768 blocks. Blocksize is 4096. Each bitmap
  60984. + * block addresses (4096 - 4) * 8 = 32736 blocks. Number of bitmaps to address
  60985. + * all 32768 blocks is calculated as (32768 - 1) / 32736 + 1 = 2.
  60986. + */
  60987. +static bmap_nr_t get_nr_bmap(const struct super_block *super)
  60988. +{
  60989. + u64 quotient;
  60990. +
  60991. + assert("zam-393", reiser4_block_count(super) != 0);
  60992. +
  60993. + quotient = reiser4_block_count(super) - 1;
  60994. + do_div(quotient, bmap_bit_count(super->s_blocksize));
  60995. + return quotient + 1;
  60996. +}
  60997. +
  60998. +/**
  60999. + * parse_blocknr - calculate bitmap number and offset in it by block number
  61000. + * @block: pointer to block number to calculate location in bitmap of
  61001. + * @bmap: pointer where to store bitmap block number
  61002. + * @offset: pointer where to store offset within bitmap block
  61003. + *
  61004. + * Calculates location of bit which is responsible for allocation/freeing of
  61005. + * block @*block. That location is represented by bitmap block number and offset
  61006. + * within that bitmap block.
  61007. + */
  61008. +static void
  61009. +parse_blocknr(const reiser4_block_nr *block, bmap_nr_t *bmap,
  61010. + bmap_off_t *offset)
  61011. +{
  61012. + struct super_block *super = get_current_context()->super;
  61013. + u64 quotient = *block;
  61014. +
  61015. + *offset = do_div(quotient, bmap_bit_count(super->s_blocksize));
  61016. + *bmap = quotient;
  61017. +
  61018. + assert("zam-433", *bmap < get_nr_bmap(super));
  61019. + assert("", *offset < bmap_bit_count(super->s_blocksize));
  61020. +}
  61021. +
  61022. +#if REISER4_DEBUG
  61023. +/* Audited by: green(2002.06.12) */
  61024. +static void
  61025. +check_block_range(const reiser4_block_nr * start, const reiser4_block_nr * len)
  61026. +{
  61027. + struct super_block *sb = reiser4_get_current_sb();
  61028. +
  61029. + assert("zam-436", sb != NULL);
  61030. +
  61031. + assert("zam-455", start != NULL);
  61032. + assert("zam-437", *start != 0);
  61033. + assert("zam-541", !reiser4_blocknr_is_fake(start));
  61034. + assert("zam-441", *start < reiser4_block_count(sb));
  61035. +
  61036. + if (len != NULL) {
  61037. + assert("zam-438", *len != 0);
  61038. + assert("zam-442", *start + *len <= reiser4_block_count(sb));
  61039. + }
  61040. +}
  61041. +
  61042. +static void check_bnode_loaded(const struct bitmap_node *bnode)
  61043. +{
  61044. + assert("zam-485", bnode != NULL);
  61045. + assert("zam-483", jnode_page(bnode->wjnode) != NULL);
  61046. + assert("zam-484", jnode_page(bnode->cjnode) != NULL);
  61047. + assert("nikita-2820", jnode_is_loaded(bnode->wjnode));
  61048. + assert("nikita-2821", jnode_is_loaded(bnode->cjnode));
  61049. +}
  61050. +
  61051. +#else
  61052. +
  61053. +# define check_block_range(start, len) do { /* nothing */} while(0)
  61054. +# define check_bnode_loaded(bnode) do { /* nothing */} while(0)
  61055. +
  61056. +#endif
  61057. +
  61058. +/* modify bnode->first_zero_bit (if we free bits before); bnode should be
  61059. + spin-locked */
  61060. +static inline void
  61061. +adjust_first_zero_bit(struct bitmap_node *bnode, bmap_off_t offset)
  61062. +{
  61063. + if (offset < bnode->first_zero_bit)
  61064. + bnode->first_zero_bit = offset;
  61065. +}
  61066. +
  61067. +/* return a physical disk address for logical bitmap number @bmap */
  61068. +/* FIXME-VS: this is somehow related to disk layout? */
  61069. +/* ZAM-FIXME-HANS: your answer is? Use not more than one function dereference
  61070. + * per block allocation so that performance is not affected. Probably this
  61071. + * whole file should be considered part of the disk layout plugin, and other
  61072. + * disk layouts can use other defines and efficiency will not be significantly
  61073. + * affected. */
  61074. +
  61075. +#define REISER4_FIRST_BITMAP_BLOCK \
  61076. + ((REISER4_MASTER_OFFSET / PAGE_SIZE) + 2)
  61077. +
  61078. +/* Audited by: green(2002.06.12) */
  61079. +static void
  61080. +get_bitmap_blocknr(struct super_block *super, bmap_nr_t bmap,
  61081. + reiser4_block_nr * bnr)
  61082. +{
  61083. +
  61084. + assert("zam-390", bmap < get_nr_bmap(super));
  61085. +
  61086. +#ifdef CONFIG_REISER4_BADBLOCKS
  61087. +#define BITMAP_PLUGIN_DISKMAP_ID ((0xc0e1<<16) | (0xe0ff))
  61088. + /* Check if the diskmap have this already, first. */
  61089. + if (reiser4_get_diskmap_value(BITMAP_PLUGIN_DISKMAP_ID, bmap, bnr) == 0)
  61090. + return; /* Found it in diskmap */
  61091. +#endif
  61092. + /* FIXME_ZAM: before discussing of disk layouts and disk format
  61093. + plugins I implement bitmap location scheme which is close to scheme
  61094. + used in reiser 3.6 */
  61095. + if (bmap == 0) {
  61096. + *bnr = REISER4_FIRST_BITMAP_BLOCK;
  61097. + } else {
  61098. + *bnr = bmap * bmap_bit_count(super->s_blocksize);
  61099. + }
  61100. +}
  61101. +
  61102. +/* construct a fake block number for shadow bitmap (WORKING BITMAP) block */
  61103. +/* Audited by: green(2002.06.12) */
  61104. +static void get_working_bitmap_blocknr(bmap_nr_t bmap, reiser4_block_nr * bnr)
  61105. +{
  61106. + *bnr =
  61107. + (reiser4_block_nr) ((bmap & ~REISER4_BLOCKNR_STATUS_BIT_MASK) |
  61108. + REISER4_BITMAP_BLOCKS_STATUS_VALUE);
  61109. +}
  61110. +
  61111. +/* bnode structure initialization */
  61112. +static void
  61113. +init_bnode(struct bitmap_node *bnode,
  61114. + struct super_block *super UNUSED_ARG, bmap_nr_t bmap UNUSED_ARG)
  61115. +{
  61116. + memset(bnode, 0, sizeof(struct bitmap_node));
  61117. +
  61118. + mutex_init(&bnode->mutex);
  61119. + atomic_set(&bnode->loaded, 0);
  61120. +}
  61121. +
  61122. +static void release(jnode * node)
  61123. +{
  61124. + jrelse(node);
  61125. + JF_SET(node, JNODE_HEARD_BANSHEE);
  61126. + jput(node);
  61127. +}
  61128. +
  61129. +/* This function is for internal bitmap.c use because it assumes that jnode is
  61130. + in under full control of this thread */
  61131. +static void done_bnode(struct bitmap_node *bnode)
  61132. +{
  61133. + if (bnode) {
  61134. + atomic_set(&bnode->loaded, 0);
  61135. + if (bnode->wjnode != NULL)
  61136. + release(bnode->wjnode);
  61137. + if (bnode->cjnode != NULL)
  61138. + release(bnode->cjnode);
  61139. + bnode->wjnode = bnode->cjnode = NULL;
  61140. + }
  61141. +}
  61142. +
  61143. +/* ZAM-FIXME-HANS: comment this. Called only by load_and_lock_bnode()*/
  61144. +static int prepare_bnode(struct bitmap_node *bnode, jnode **cjnode_ret,
  61145. + jnode **wjnode_ret)
  61146. +{
  61147. + struct super_block *super;
  61148. + jnode *cjnode;
  61149. + jnode *wjnode;
  61150. + bmap_nr_t bmap;
  61151. + int ret;
  61152. +
  61153. + super = reiser4_get_current_sb();
  61154. +
  61155. + *wjnode_ret = wjnode = bnew();
  61156. + if (wjnode == NULL) {
  61157. + *cjnode_ret = NULL;
  61158. + return RETERR(-ENOMEM);
  61159. + }
  61160. +
  61161. + *cjnode_ret = cjnode = bnew();
  61162. + if (cjnode == NULL)
  61163. + return RETERR(-ENOMEM);
  61164. +
  61165. + bmap = bnode - get_bnode(super, 0);
  61166. +
  61167. + get_working_bitmap_blocknr(bmap, &wjnode->blocknr);
  61168. + get_bitmap_blocknr(super, bmap, &cjnode->blocknr);
  61169. +
  61170. + jref(cjnode);
  61171. + jref(wjnode);
  61172. +
  61173. + /* load commit bitmap */
  61174. + ret = jload_gfp(cjnode, GFP_NOFS, 1);
  61175. +
  61176. + if (ret)
  61177. + goto error;
  61178. +
  61179. + /* allocate memory for working bitmap block. Note that for
  61180. + * bitmaps jinit_new() doesn't actually modifies node content,
  61181. + * so parallel calls to this are ok. */
  61182. + ret = jinit_new(wjnode, GFP_NOFS);
  61183. +
  61184. + if (ret != 0) {
  61185. + jrelse(cjnode);
  61186. + goto error;
  61187. + }
  61188. +
  61189. + return 0;
  61190. +
  61191. + error:
  61192. + jput(cjnode);
  61193. + jput(wjnode);
  61194. + *wjnode_ret = *cjnode_ret = NULL;
  61195. + return ret;
  61196. +
  61197. +}
  61198. +
  61199. +/* Check the bnode data on read. */
  61200. +static int check_struct_bnode(struct bitmap_node *bnode, __u32 blksize)
  61201. +{
  61202. + void *data;
  61203. + int ret;
  61204. +
  61205. + /* Check CRC */
  61206. + ret = bnode_check_adler32(bnode, blksize);
  61207. +
  61208. + if (ret) {
  61209. + return ret;
  61210. + }
  61211. +
  61212. + data = jdata(bnode->cjnode) + CHECKSUM_SIZE;
  61213. +
  61214. + /* Check the very first bit -- it must be busy. */
  61215. + if (!reiser4_test_bit(0, data)) {
  61216. + warning("vpf-1362", "The allocator block %llu is not marked "
  61217. + "as used.", (unsigned long long)bnode->cjnode->blocknr);
  61218. +
  61219. + return -EINVAL;
  61220. + }
  61221. +
  61222. + return 0;
  61223. +}
  61224. +
  61225. +/* load bitmap blocks "on-demand" */
  61226. +static int load_and_lock_bnode(struct bitmap_node *bnode)
  61227. +{
  61228. + int ret;
  61229. +
  61230. + jnode *cjnode;
  61231. + jnode *wjnode;
  61232. +
  61233. + assert("nikita-3040", reiser4_schedulable());
  61234. +
  61235. +/* ZAM-FIXME-HANS: since bitmaps are never unloaded, this does not
  61236. + * need to be atomic, right? Just leave a comment that if bitmaps were
  61237. + * unloadable, this would need to be atomic. */
  61238. + if (atomic_read(&bnode->loaded)) {
  61239. + /* bitmap is already loaded, nothing to do */
  61240. + check_bnode_loaded(bnode);
  61241. + mutex_lock(&bnode->mutex);
  61242. + assert("nikita-2827", atomic_read(&bnode->loaded));
  61243. + return 0;
  61244. + }
  61245. +
  61246. + ret = prepare_bnode(bnode, &cjnode, &wjnode);
  61247. + if (ret)
  61248. + return ret;
  61249. +
  61250. + mutex_lock(&bnode->mutex);
  61251. +
  61252. + if (!atomic_read(&bnode->loaded)) {
  61253. + assert("nikita-2822", cjnode != NULL);
  61254. + assert("nikita-2823", wjnode != NULL);
  61255. + assert("nikita-2824", jnode_is_loaded(cjnode));
  61256. + assert("nikita-2825", jnode_is_loaded(wjnode));
  61257. +
  61258. + bnode->wjnode = wjnode;
  61259. + bnode->cjnode = cjnode;
  61260. +
  61261. + ret = check_struct_bnode(bnode, current_blocksize);
  61262. + if (unlikely(ret != 0))
  61263. + goto error;
  61264. +
  61265. + atomic_set(&bnode->loaded, 1);
  61266. + /* working bitmap is initialized by on-disk
  61267. + * commit bitmap. This should be performed
  61268. + * under mutex. */
  61269. + memcpy(bnode_working_data(bnode),
  61270. + bnode_commit_data(bnode),
  61271. + bmap_size(current_blocksize));
  61272. + } else
  61273. + /* race: someone already loaded bitmap
  61274. + * while we were busy initializing data. */
  61275. + check_bnode_loaded(bnode);
  61276. + return 0;
  61277. +
  61278. + error:
  61279. + release(wjnode);
  61280. + release(cjnode);
  61281. + bnode->wjnode = NULL;
  61282. + bnode->cjnode = NULL;
  61283. + mutex_unlock(&bnode->mutex);
  61284. + return ret;
  61285. +}
  61286. +
  61287. +static void release_and_unlock_bnode(struct bitmap_node *bnode)
  61288. +{
  61289. + check_bnode_loaded(bnode);
  61290. + mutex_unlock(&bnode->mutex);
  61291. +}
  61292. +
  61293. +/* This function does all block allocation work but only for one bitmap
  61294. + block.*/
  61295. +/* FIXME_ZAM: It does not allow us to allocate block ranges across bitmap
  61296. + block responsibility zone boundaries. This had no sense in v3.6 but may
  61297. + have it in v4.x */
  61298. +/* ZAM-FIXME-HANS: do you mean search one bitmap block forward? */
  61299. +static int
  61300. +search_one_bitmap_forward(bmap_nr_t bmap, bmap_off_t * offset,
  61301. + bmap_off_t max_offset, int min_len, int max_len)
  61302. +{
  61303. + struct super_block *super = get_current_context()->super;
  61304. + struct bitmap_node *bnode = get_bnode(super, bmap);
  61305. +
  61306. + char *data;
  61307. +
  61308. + bmap_off_t search_end;
  61309. + bmap_off_t start;
  61310. + bmap_off_t end;
  61311. +
  61312. + int set_first_zero_bit = 0;
  61313. +
  61314. + int ret;
  61315. +
  61316. + assert("zam-364", min_len > 0);
  61317. + assert("zam-365", max_len >= min_len);
  61318. + assert("zam-366", *offset <= max_offset);
  61319. +
  61320. + ret = load_and_lock_bnode(bnode);
  61321. +
  61322. + if (ret)
  61323. + return ret;
  61324. +
  61325. + data = bnode_working_data(bnode);
  61326. +
  61327. + start = *offset;
  61328. +
  61329. + if (bnode->first_zero_bit >= start) {
  61330. + start = bnode->first_zero_bit;
  61331. + set_first_zero_bit = 1;
  61332. + }
  61333. +
  61334. + while (start + min_len < max_offset) {
  61335. +
  61336. + start =
  61337. + reiser4_find_next_zero_bit((long *)data, max_offset, start);
  61338. + if (set_first_zero_bit) {
  61339. + bnode->first_zero_bit = start;
  61340. + set_first_zero_bit = 0;
  61341. + }
  61342. + if (start >= max_offset)
  61343. + break;
  61344. +
  61345. + search_end = LIMIT(start + max_len, max_offset);
  61346. + end =
  61347. + reiser4_find_next_set_bit((long *)data, search_end, start);
  61348. + if (end >= start + min_len) {
  61349. + /* we can't trust find_next_set_bit result if set bit
  61350. + was not fount, result may be bigger than
  61351. + max_offset */
  61352. + if (end > search_end)
  61353. + end = search_end;
  61354. +
  61355. + ret = end - start;
  61356. + *offset = start;
  61357. +
  61358. + reiser4_set_bits(data, start, end);
  61359. +
  61360. + /* FIXME: we may advance first_zero_bit if [start,
  61361. + end] region overlaps the first_zero_bit point */
  61362. +
  61363. + break;
  61364. + }
  61365. +
  61366. + start = end + 1;
  61367. + }
  61368. +
  61369. + release_and_unlock_bnode(bnode);
  61370. +
  61371. + return ret;
  61372. +}
  61373. +
  61374. +static int
  61375. +search_one_bitmap_backward(bmap_nr_t bmap, bmap_off_t * start_offset,
  61376. + bmap_off_t end_offset, int min_len, int max_len)
  61377. +{
  61378. + struct super_block *super = get_current_context()->super;
  61379. + struct bitmap_node *bnode = get_bnode(super, bmap);
  61380. + char *data;
  61381. + bmap_off_t start;
  61382. + int ret;
  61383. +
  61384. + assert("zam-958", min_len > 0);
  61385. + assert("zam-959", max_len >= min_len);
  61386. + assert("zam-960", *start_offset >= end_offset);
  61387. +
  61388. + ret = load_and_lock_bnode(bnode);
  61389. + if (ret)
  61390. + return ret;
  61391. +
  61392. + data = bnode_working_data(bnode);
  61393. + start = *start_offset;
  61394. +
  61395. + while (1) {
  61396. + bmap_off_t end, search_end;
  61397. +
  61398. + /* Find the beginning of the zero filled region */
  61399. + if (reiser4_find_last_zero_bit(&start, data, end_offset, start))
  61400. + break;
  61401. + /* Is there more than `min_len' bits from `start' to
  61402. + * `end_offset'? */
  61403. + if (start < end_offset + min_len - 1)
  61404. + break;
  61405. +
  61406. + /* Do not search to `end_offset' if we need to find less than
  61407. + * `max_len' zero bits. */
  61408. + if (end_offset + max_len - 1 < start)
  61409. + search_end = start - max_len + 1;
  61410. + else
  61411. + search_end = end_offset;
  61412. +
  61413. + if (reiser4_find_last_set_bit(&end, data, search_end, start))
  61414. + end = search_end;
  61415. + else
  61416. + end++;
  61417. +
  61418. + if (end + min_len <= start + 1) {
  61419. + if (end < search_end)
  61420. + end = search_end;
  61421. + ret = start - end + 1;
  61422. + *start_offset = end; /* `end' is lowest offset */
  61423. + assert("zam-987",
  61424. + reiser4_find_next_set_bit(data, start + 1,
  61425. + end) >= start + 1);
  61426. + reiser4_set_bits(data, end, start + 1);
  61427. + break;
  61428. + }
  61429. +
  61430. + if (end <= end_offset)
  61431. + /* left search boundary reached. */
  61432. + break;
  61433. + start = end - 1;
  61434. + }
  61435. +
  61436. + release_and_unlock_bnode(bnode);
  61437. + return ret;
  61438. +}
  61439. +
  61440. +/* allocate contiguous range of blocks in bitmap */
  61441. +static int bitmap_alloc_forward(reiser4_block_nr * start,
  61442. + const reiser4_block_nr * end, int min_len,
  61443. + int max_len)
  61444. +{
  61445. + bmap_nr_t bmap, end_bmap;
  61446. + bmap_off_t offset, end_offset;
  61447. + int len;
  61448. +
  61449. + reiser4_block_nr tmp;
  61450. +
  61451. + struct super_block *super = get_current_context()->super;
  61452. + const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
  61453. +
  61454. + parse_blocknr(start, &bmap, &offset);
  61455. +
  61456. + tmp = *end - 1;
  61457. + parse_blocknr(&tmp, &end_bmap, &end_offset);
  61458. + ++end_offset;
  61459. +
  61460. + assert("zam-358", end_bmap >= bmap);
  61461. + assert("zam-359", ergo(end_bmap == bmap, end_offset >= offset));
  61462. +
  61463. + for (; bmap < end_bmap; bmap++, offset = 0) {
  61464. + len =
  61465. + search_one_bitmap_forward(bmap, &offset, max_offset,
  61466. + min_len, max_len);
  61467. + if (len != 0)
  61468. + goto out;
  61469. + }
  61470. +
  61471. + len =
  61472. + search_one_bitmap_forward(bmap, &offset, end_offset, min_len,
  61473. + max_len);
  61474. + out:
  61475. + *start = bmap * max_offset + offset;
  61476. + return len;
  61477. +}
  61478. +
  61479. +/* allocate contiguous range of blocks in bitmap (from @start to @end in
  61480. + * backward direction) */
  61481. +static int bitmap_alloc_backward(reiser4_block_nr * start,
  61482. + const reiser4_block_nr * end, int min_len,
  61483. + int max_len)
  61484. +{
  61485. + bmap_nr_t bmap, end_bmap;
  61486. + bmap_off_t offset, end_offset;
  61487. + int len;
  61488. + struct super_block *super = get_current_context()->super;
  61489. + const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
  61490. +
  61491. + parse_blocknr(start, &bmap, &offset);
  61492. + parse_blocknr(end, &end_bmap, &end_offset);
  61493. +
  61494. + assert("zam-961", end_bmap <= bmap);
  61495. + assert("zam-962", ergo(end_bmap == bmap, end_offset <= offset));
  61496. +
  61497. + for (; bmap > end_bmap; bmap--, offset = max_offset - 1) {
  61498. + len =
  61499. + search_one_bitmap_backward(bmap, &offset, 0, min_len,
  61500. + max_len);
  61501. + if (len != 0)
  61502. + goto out;
  61503. + }
  61504. +
  61505. + len =
  61506. + search_one_bitmap_backward(bmap, &offset, end_offset, min_len,
  61507. + max_len);
  61508. + out:
  61509. + *start = bmap * max_offset + offset;
  61510. + return len;
  61511. +}
  61512. +
  61513. +/* plugin->u.space_allocator.alloc_blocks() */
  61514. +static int alloc_blocks_forward(reiser4_blocknr_hint *hint, int needed,
  61515. + reiser4_block_nr *start, reiser4_block_nr *len)
  61516. +{
  61517. + struct super_block *super = get_current_context()->super;
  61518. + int actual_len;
  61519. +
  61520. + reiser4_block_nr search_start;
  61521. + reiser4_block_nr search_end;
  61522. +
  61523. + assert("zam-398", super != NULL);
  61524. + assert("zam-412", hint != NULL);
  61525. + assert("zam-397", hint->blk <= reiser4_block_count(super));
  61526. +
  61527. + if (hint->max_dist == 0)
  61528. + search_end = reiser4_block_count(super);
  61529. + else
  61530. + search_end =
  61531. + LIMIT(hint->blk + hint->max_dist,
  61532. + reiser4_block_count(super));
  61533. +
  61534. + /* We use @hint -> blk as a search start and search from it to the end
  61535. + of the disk or in given region if @hint -> max_dist is not zero */
  61536. + search_start = hint->blk;
  61537. +
  61538. + actual_len =
  61539. + bitmap_alloc_forward(&search_start, &search_end, 1, needed);
  61540. +
  61541. + /* There is only one bitmap search if max_dist was specified or first
  61542. + pass was from the beginning of the bitmap. We also do one pass for
  61543. + scanning bitmap in backward direction. */
  61544. + if (!(actual_len != 0 || hint->max_dist != 0 || search_start == 0)) {
  61545. + /* next step is a scanning from 0 to search_start */
  61546. + search_end = search_start;
  61547. + search_start = 0;
  61548. + actual_len =
  61549. + bitmap_alloc_forward(&search_start, &search_end, 1, needed);
  61550. + }
  61551. + if (actual_len == 0)
  61552. + return RETERR(-ENOSPC);
  61553. + if (actual_len < 0)
  61554. + return RETERR(actual_len);
  61555. + *len = actual_len;
  61556. + *start = search_start;
  61557. + return 0;
  61558. +}
  61559. +
  61560. +static int alloc_blocks_backward(reiser4_blocknr_hint * hint, int needed,
  61561. + reiser4_block_nr * start,
  61562. + reiser4_block_nr * len)
  61563. +{
  61564. + reiser4_block_nr search_start;
  61565. + reiser4_block_nr search_end;
  61566. + int actual_len;
  61567. +
  61568. + ON_DEBUG(struct super_block *super = reiser4_get_current_sb());
  61569. +
  61570. + assert("zam-969", super != NULL);
  61571. + assert("zam-970", hint != NULL);
  61572. + assert("zam-971", hint->blk <= reiser4_block_count(super));
  61573. +
  61574. + search_start = hint->blk;
  61575. + if (hint->max_dist == 0 || search_start <= hint->max_dist)
  61576. + search_end = 0;
  61577. + else
  61578. + search_end = search_start - hint->max_dist;
  61579. +
  61580. + actual_len =
  61581. + bitmap_alloc_backward(&search_start, &search_end, 1, needed);
  61582. + if (actual_len == 0)
  61583. + return RETERR(-ENOSPC);
  61584. + if (actual_len < 0)
  61585. + return RETERR(actual_len);
  61586. + *len = actual_len;
  61587. + *start = search_start;
  61588. + return 0;
  61589. +}
  61590. +
  61591. +/* plugin->u.space_allocator.alloc_blocks() */
  61592. +int reiser4_alloc_blocks_bitmap(reiser4_space_allocator * allocator,
  61593. + reiser4_blocknr_hint * hint, int needed,
  61594. + reiser4_block_nr * start, reiser4_block_nr * len)
  61595. +{
  61596. + if (hint->backward)
  61597. + return alloc_blocks_backward(hint, needed, start, len);
  61598. + return alloc_blocks_forward(hint, needed, start, len);
  61599. +}
  61600. +
  61601. +/* plugin->u.space_allocator.dealloc_blocks(). */
  61602. +/* It just frees blocks in WORKING BITMAP. Usually formatted an unformatted
  61603. + nodes deletion is deferred until transaction commit. However, deallocation
  61604. + of temporary objects like wandered blocks and transaction commit records
  61605. + requires immediate node deletion from WORKING BITMAP.*/
  61606. +void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator * allocator,
  61607. + reiser4_block_nr start, reiser4_block_nr len)
  61608. +{
  61609. + struct super_block *super = reiser4_get_current_sb();
  61610. +
  61611. + bmap_nr_t bmap;
  61612. + bmap_off_t offset;
  61613. +
  61614. + struct bitmap_node *bnode;
  61615. + int ret;
  61616. +
  61617. + assert("zam-468", len != 0);
  61618. + check_block_range(&start, &len);
  61619. +
  61620. + parse_blocknr(&start, &bmap, &offset);
  61621. +
  61622. + assert("zam-469", offset + len <= bmap_bit_count(super->s_blocksize));
  61623. +
  61624. + bnode = get_bnode(super, bmap);
  61625. +
  61626. + assert("zam-470", bnode != NULL);
  61627. +
  61628. + ret = load_and_lock_bnode(bnode);
  61629. + assert("zam-481", ret == 0);
  61630. +
  61631. + reiser4_clear_bits(bnode_working_data(bnode), offset,
  61632. + (bmap_off_t) (offset + len));
  61633. +
  61634. + adjust_first_zero_bit(bnode, offset);
  61635. +
  61636. + release_and_unlock_bnode(bnode);
  61637. +}
  61638. +
  61639. +static int check_blocks_one_bitmap(bmap_nr_t bmap, bmap_off_t start_offset,
  61640. + bmap_off_t end_offset, int desired)
  61641. +{
  61642. + struct super_block *super = reiser4_get_current_sb();
  61643. + struct bitmap_node *bnode = get_bnode(super, bmap);
  61644. + int ret;
  61645. +
  61646. + assert("nikita-2215", bnode != NULL);
  61647. +
  61648. + ret = load_and_lock_bnode(bnode);
  61649. + assert("zam-626", ret == 0);
  61650. +
  61651. + assert("nikita-2216", jnode_is_loaded(bnode->wjnode));
  61652. +
  61653. + if (desired) {
  61654. + ret = reiser4_find_next_zero_bit(bnode_working_data(bnode),
  61655. + end_offset, start_offset)
  61656. + >= end_offset;
  61657. + } else {
  61658. + ret = reiser4_find_next_set_bit(bnode_working_data(bnode),
  61659. + end_offset, start_offset)
  61660. + >= end_offset;
  61661. + }
  61662. +
  61663. + release_and_unlock_bnode(bnode);
  61664. +
  61665. + return ret;
  61666. +}
  61667. +
  61668. +/* plugin->u.space_allocator.check_blocks(). */
  61669. +int reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
  61670. + const reiser4_block_nr * len, int desired)
  61671. +{
  61672. + struct super_block *super = reiser4_get_current_sb();
  61673. +
  61674. + reiser4_block_nr end;
  61675. + bmap_nr_t bmap, end_bmap;
  61676. + bmap_off_t offset, end_offset;
  61677. + const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
  61678. +
  61679. + assert("intelfx-9", start != NULL);
  61680. + assert("intelfx-10", ergo(len != NULL, *len > 0));
  61681. +
  61682. + if (len != NULL) {
  61683. + check_block_range(start, len);
  61684. + end = *start + *len - 1;
  61685. + } else {
  61686. + /* on next line, end is used as temporary len for check_block_range() */
  61687. + end = 1; check_block_range(start, &end);
  61688. + end = *start;
  61689. + }
  61690. +
  61691. + parse_blocknr(start, &bmap, &offset);
  61692. +
  61693. + if (end == *start) {
  61694. + end_bmap = bmap;
  61695. + end_offset = offset;
  61696. + } else {
  61697. + parse_blocknr(&end, &end_bmap, &end_offset);
  61698. + }
  61699. + ++end_offset;
  61700. +
  61701. + assert("intelfx-4", end_bmap >= bmap);
  61702. + assert("intelfx-5", ergo(end_bmap == bmap, end_offset >= offset));
  61703. +
  61704. + for (; bmap < end_bmap; bmap++, offset = 0) {
  61705. + if (!check_blocks_one_bitmap(bmap, offset, max_offset, desired)) {
  61706. + return 0;
  61707. + }
  61708. + }
  61709. + return check_blocks_one_bitmap(bmap, offset, end_offset, desired);
  61710. +}
  61711. +
  61712. +/* conditional insertion of @node into atom's overwrite set if it was not there */
  61713. +static void cond_add_to_overwrite_set(txn_atom * atom, jnode * node)
  61714. +{
  61715. + assert("zam-546", atom != NULL);
  61716. + assert("zam-547", atom->stage == ASTAGE_PRE_COMMIT);
  61717. + assert("zam-548", node != NULL);
  61718. +
  61719. + spin_lock_atom(atom);
  61720. + spin_lock_jnode(node);
  61721. +
  61722. + if (node->atom == NULL) {
  61723. + JF_SET(node, JNODE_OVRWR);
  61724. + insert_into_atom_ovrwr_list(atom, node);
  61725. + } else {
  61726. + assert("zam-549", node->atom == atom);
  61727. + }
  61728. +
  61729. + spin_unlock_jnode(node);
  61730. + spin_unlock_atom(atom);
  61731. +}
  61732. +
  61733. +/* an actor which applies delete set to COMMIT bitmap pages and link modified
  61734. + pages in a single-linked list */
  61735. +static int
  61736. +apply_dset_to_commit_bmap(txn_atom * atom, const reiser4_block_nr * start,
  61737. + const reiser4_block_nr * len, void *data)
  61738. +{
  61739. +
  61740. + bmap_nr_t bmap;
  61741. + bmap_off_t offset;
  61742. + int ret;
  61743. +
  61744. + long long *blocks_freed_p = data;
  61745. +
  61746. + struct bitmap_node *bnode;
  61747. +
  61748. + struct super_block *sb = reiser4_get_current_sb();
  61749. +
  61750. + check_block_range(start, len);
  61751. +
  61752. + parse_blocknr(start, &bmap, &offset);
  61753. +
  61754. + /* FIXME-ZAM: we assume that all block ranges are allocated by this
  61755. + bitmap-based allocator and each block range can't go over a zone of
  61756. + responsibility of one bitmap block; same assumption is used in
  61757. + other journal hooks in bitmap code. */
  61758. + bnode = get_bnode(sb, bmap);
  61759. + assert("zam-448", bnode != NULL);
  61760. +
  61761. + /* it is safe to unlock atom with is in ASTAGE_PRE_COMMIT */
  61762. + assert("zam-767", atom->stage == ASTAGE_PRE_COMMIT);
  61763. + ret = load_and_lock_bnode(bnode);
  61764. + if (ret)
  61765. + return ret;
  61766. +
  61767. + /* put bnode into atom's overwrite set */
  61768. + cond_add_to_overwrite_set(atom, bnode->cjnode);
  61769. +
  61770. + data = bnode_commit_data(bnode);
  61771. +
  61772. + ret = bnode_check_crc(bnode);
  61773. + if (ret != 0)
  61774. + return ret;
  61775. +
  61776. + if (len != NULL) {
  61777. + /* FIXME-ZAM: a check that all bits are set should be there */
  61778. + assert("zam-443",
  61779. + offset + *len <= bmap_bit_count(sb->s_blocksize));
  61780. + reiser4_clear_bits(data, offset, (bmap_off_t) (offset + *len));
  61781. +
  61782. + (*blocks_freed_p) += *len;
  61783. + } else {
  61784. + reiser4_clear_bit(offset, data);
  61785. + (*blocks_freed_p)++;
  61786. + }
  61787. +
  61788. + bnode_set_commit_crc(bnode, bnode_calc_crc(bnode, sb->s_blocksize));
  61789. +
  61790. + release_and_unlock_bnode(bnode);
  61791. +
  61792. + return 0;
  61793. +}
  61794. +
  61795. +/* plugin->u.space_allocator.pre_commit_hook(). */
  61796. +/* It just applies transaction changes to fs-wide COMMIT BITMAP, hoping the
  61797. + rest is done by transaction manager (allocate wandered locations for COMMIT
  61798. + BITMAP blocks, copy COMMIT BITMAP blocks data). */
  61799. +/* Only one instance of this function can be running at one given time, because
  61800. + only one transaction can be committed a time, therefore it is safe to access
  61801. + some global variables without any locking */
  61802. +
  61803. +int reiser4_pre_commit_hook_bitmap(void)
  61804. +{
  61805. + struct super_block *super = reiser4_get_current_sb();
  61806. + txn_atom *atom;
  61807. +
  61808. + long long blocks_freed = 0;
  61809. +
  61810. + atom = get_current_atom_locked();
  61811. + assert("zam-876", atom->stage == ASTAGE_PRE_COMMIT);
  61812. + spin_unlock_atom(atom);
  61813. +
  61814. + { /* scan atom's captured list and find all freshly allocated nodes,
  61815. + * mark corresponded bits in COMMIT BITMAP as used */
  61816. + struct list_head *head = ATOM_CLEAN_LIST(atom);
  61817. + jnode *node = list_entry(head->next, jnode, capture_link);
  61818. +
  61819. + while (head != &node->capture_link) {
  61820. + /* we detect freshly allocated jnodes */
  61821. + if (JF_ISSET(node, JNODE_RELOC)) {
  61822. + int ret;
  61823. + bmap_nr_t bmap;
  61824. +
  61825. + bmap_off_t offset;
  61826. + bmap_off_t index;
  61827. + struct bitmap_node *bn;
  61828. + __u32 size = bmap_size(super->s_blocksize);
  61829. + __u32 crc;
  61830. + char byte;
  61831. +
  61832. + assert("zam-559", !JF_ISSET(node, JNODE_OVRWR));
  61833. + assert("zam-460",
  61834. + !reiser4_blocknr_is_fake(&node->blocknr));
  61835. +
  61836. + parse_blocknr(&node->blocknr, &bmap, &offset);
  61837. + bn = get_bnode(super, bmap);
  61838. +
  61839. + index = offset >> 3;
  61840. + assert("vpf-276", index < size);
  61841. +
  61842. + ret = bnode_check_crc(bnode);
  61843. + if (ret != 0)
  61844. + return ret;
  61845. +
  61846. + check_bnode_loaded(bn);
  61847. + load_and_lock_bnode(bn);
  61848. +
  61849. + byte = *(bnode_commit_data(bn) + index);
  61850. + reiser4_set_bit(offset, bnode_commit_data(bn));
  61851. +
  61852. + crc = adler32_recalc(bnode_commit_crc(bn), byte,
  61853. + *(bnode_commit_data(bn) +
  61854. + index),
  61855. + size - index),
  61856. + bnode_set_commit_crc(bn, crc);
  61857. +
  61858. + release_and_unlock_bnode(bn);
  61859. +
  61860. + ret = bnode_check_crc(bn);
  61861. + if (ret != 0)
  61862. + return ret;
  61863. +
  61864. + /* working of this depends on how it inserts
  61865. + new j-node into clean list, because we are
  61866. + scanning the same list now. It is OK, if
  61867. + insertion is done to the list front */
  61868. + cond_add_to_overwrite_set(atom, bn->cjnode);
  61869. + }
  61870. +
  61871. + node = list_entry(node->capture_link.next, jnode, capture_link);
  61872. + }
  61873. + }
  61874. +
  61875. + atom_dset_deferred_apply(atom, apply_dset_to_commit_bmap, &blocks_freed, 0);
  61876. +
  61877. + blocks_freed -= atom->nr_blocks_allocated;
  61878. +
  61879. + {
  61880. + reiser4_super_info_data *sbinfo;
  61881. +
  61882. + sbinfo = get_super_private(super);
  61883. +
  61884. + spin_lock_reiser4_super(sbinfo);
  61885. + sbinfo->blocks_free_committed += blocks_freed;
  61886. + spin_unlock_reiser4_super(sbinfo);
  61887. + }
  61888. +
  61889. + return 0;
  61890. +}
  61891. +
  61892. +/* plugin->u.space_allocator.init_allocator
  61893. + constructor of reiser4_space_allocator object. It is called on fs mount */
  61894. +int reiser4_init_allocator_bitmap(reiser4_space_allocator * allocator,
  61895. + struct super_block *super, void *arg)
  61896. +{
  61897. + struct bitmap_allocator_data *data = NULL;
  61898. + bmap_nr_t bitmap_blocks_nr;
  61899. + bmap_nr_t i;
  61900. +
  61901. + assert("nikita-3039", reiser4_schedulable());
  61902. +
  61903. + /* getting memory for bitmap allocator private data holder */
  61904. + data =
  61905. + kmalloc(sizeof(struct bitmap_allocator_data),
  61906. + reiser4_ctx_gfp_mask_get());
  61907. +
  61908. + if (data == NULL)
  61909. + return RETERR(-ENOMEM);
  61910. +
  61911. + /* allocation and initialization for the array of bnodes */
  61912. + bitmap_blocks_nr = get_nr_bmap(super);
  61913. +
  61914. + /* FIXME-ZAM: it is not clear what to do with huge number of bitmaps
  61915. + which is bigger than 2^32 (= 8 * 4096 * 4096 * 2^32 bytes = 5.76e+17,
  61916. + may I never meet someone who still uses the ia32 architecture when
  61917. + storage devices of that size enter the market, and wants to use ia32
  61918. + with that storage device, much less reiser4. ;-) -Hans). Kmalloc is not possible and,
  61919. + probably, another dynamic data structure should replace a static
  61920. + array of bnodes. */
  61921. + /*data->bitmap = reiser4_kmalloc((size_t) (sizeof (struct bitmap_node) * bitmap_blocks_nr), GFP_KERNEL); */
  61922. + data->bitmap = reiser4_vmalloc(sizeof(struct bitmap_node) * bitmap_blocks_nr);
  61923. + if (data->bitmap == NULL) {
  61924. + kfree(data);
  61925. + return RETERR(-ENOMEM);
  61926. + }
  61927. +
  61928. + for (i = 0; i < bitmap_blocks_nr; i++)
  61929. + init_bnode(data->bitmap + i, super, i);
  61930. +
  61931. + allocator->u.generic = data;
  61932. +
  61933. +#if REISER4_DEBUG
  61934. + get_super_private(super)->min_blocks_used += bitmap_blocks_nr;
  61935. +#endif
  61936. +
  61937. + /* Load all bitmap blocks at mount time. */
  61938. + if (!test_bit
  61939. + (REISER4_DONT_LOAD_BITMAP, &get_super_private(super)->fs_flags)) {
  61940. + __u64 start_time, elapsed_time;
  61941. + struct bitmap_node *bnode;
  61942. + int ret;
  61943. +
  61944. + if (REISER4_DEBUG)
  61945. + printk(KERN_INFO "loading reiser4 bitmap...");
  61946. + start_time = jiffies;
  61947. +
  61948. + for (i = 0; i < bitmap_blocks_nr; i++) {
  61949. + bnode = data->bitmap + i;
  61950. + ret = load_and_lock_bnode(bnode);
  61951. + if (ret) {
  61952. + reiser4_destroy_allocator_bitmap(allocator,
  61953. + super);
  61954. + return ret;
  61955. + }
  61956. + release_and_unlock_bnode(bnode);
  61957. + }
  61958. +
  61959. + elapsed_time = jiffies - start_time;
  61960. + if (REISER4_DEBUG)
  61961. + printk("...done (%llu jiffies)\n",
  61962. + (unsigned long long)elapsed_time);
  61963. + }
  61964. +
  61965. + return 0;
  61966. +}
  61967. +
  61968. +/* plugin->u.space_allocator.destroy_allocator
  61969. + destructor. It is called on fs unmount */
  61970. +int reiser4_destroy_allocator_bitmap(reiser4_space_allocator * allocator,
  61971. + struct super_block *super)
  61972. +{
  61973. + bmap_nr_t bitmap_blocks_nr;
  61974. + bmap_nr_t i;
  61975. +
  61976. + struct bitmap_allocator_data *data = allocator->u.generic;
  61977. +
  61978. + assert("zam-414", data != NULL);
  61979. + assert("zam-376", data->bitmap != NULL);
  61980. +
  61981. + bitmap_blocks_nr = get_nr_bmap(super);
  61982. +
  61983. + for (i = 0; i < bitmap_blocks_nr; i++) {
  61984. + struct bitmap_node *bnode = data->bitmap + i;
  61985. +
  61986. + mutex_lock(&bnode->mutex);
  61987. +
  61988. +#if REISER4_DEBUG
  61989. + if (atomic_read(&bnode->loaded)) {
  61990. + jnode *wj = bnode->wjnode;
  61991. + jnode *cj = bnode->cjnode;
  61992. +
  61993. + assert("zam-480", jnode_page(cj) != NULL);
  61994. + assert("zam-633", jnode_page(wj) != NULL);
  61995. +
  61996. + assert("zam-634",
  61997. + memcmp(jdata(wj), jdata(wj),
  61998. + bmap_size(super->s_blocksize)) == 0);
  61999. +
  62000. + }
  62001. +#endif
  62002. + done_bnode(bnode);
  62003. + mutex_unlock(&bnode->mutex);
  62004. + }
  62005. +
  62006. + vfree(data->bitmap);
  62007. + kfree(data);
  62008. +
  62009. + allocator->u.generic = NULL;
  62010. +
  62011. + return 0;
  62012. +}
  62013. +
  62014. +/*
  62015. + * Local variables:
  62016. + * c-indentation-style: "K&R"
  62017. + * mode-name: "LC"
  62018. + * c-basic-offset: 8
  62019. + * tab-width: 8
  62020. + * fill-column: 79
  62021. + * scroll-step: 1
  62022. + * End:
  62023. + */
  62024. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/space/bitmap.h linux-5.16.14/fs/reiser4/plugin/space/bitmap.h
  62025. --- linux-5.16.14.orig/fs/reiser4/plugin/space/bitmap.h 1970-01-01 01:00:00.000000000 +0100
  62026. +++ linux-5.16.14/fs/reiser4/plugin/space/bitmap.h 2022-03-12 13:26:19.685892809 +0100
  62027. @@ -0,0 +1,47 @@
  62028. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  62029. +
  62030. +#if !defined (__REISER4_PLUGIN_SPACE_BITMAP_H__)
  62031. +#define __REISER4_PLUGIN_SPACE_BITMAP_H__
  62032. +
  62033. +#include "../../dformat.h"
  62034. +#include "../../block_alloc.h"
  62035. +
  62036. +#include <linux/types.h> /* for __u?? */
  62037. +#include <linux/fs.h> /* for struct super_block */
  62038. +/* EDWARD-FIXME-HANS: write something as informative as the below for every .h file lacking it. */
  62039. +/* declarations of functions implementing methods of space allocator plugin for
  62040. + bitmap based allocator. The functions themselves are in bitmap.c */
  62041. +extern int reiser4_init_allocator_bitmap(reiser4_space_allocator *,
  62042. + struct super_block *, void *);
  62043. +extern int reiser4_destroy_allocator_bitmap(reiser4_space_allocator *,
  62044. + struct super_block *);
  62045. +extern int reiser4_alloc_blocks_bitmap(reiser4_space_allocator *,
  62046. + reiser4_blocknr_hint *, int needed,
  62047. + reiser4_block_nr * start,
  62048. + reiser4_block_nr * len);
  62049. +extern int reiser4_check_blocks_bitmap(const reiser4_block_nr *,
  62050. + const reiser4_block_nr *, int);
  62051. +extern void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator *,
  62052. + reiser4_block_nr,
  62053. + reiser4_block_nr);
  62054. +extern int reiser4_pre_commit_hook_bitmap(void);
  62055. +
  62056. +#define reiser4_post_commit_hook_bitmap() do{}while(0)
  62057. +#define reiser4_post_write_back_hook_bitmap() do{}while(0)
  62058. +#define reiser4_print_info_bitmap(pref, al) do{}while(0)
  62059. +
  62060. +typedef __u64 bmap_nr_t;
  62061. +typedef __u32 bmap_off_t;
  62062. +
  62063. +#endif /* __REISER4_PLUGIN_SPACE_BITMAP_H__ */
  62064. +
  62065. +/* Make Linus happy.
  62066. + Local variables:
  62067. + c-indentation-style: "K&R"
  62068. + mode-name: "LC"
  62069. + c-basic-offset: 8
  62070. + tab-width: 8
  62071. + fill-column: 120
  62072. + scroll-step: 1
  62073. + End:
  62074. +*/
  62075. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/space/Makefile linux-5.16.14/fs/reiser4/plugin/space/Makefile
  62076. --- linux-5.16.14.orig/fs/reiser4/plugin/space/Makefile 1970-01-01 01:00:00.000000000 +0100
  62077. +++ linux-5.16.14/fs/reiser4/plugin/space/Makefile 2022-03-12 13:26:19.685892809 +0100
  62078. @@ -0,0 +1,6 @@
  62079. +
  62080. +MODULE := space_plugins
  62081. +
  62082. +obj-$(CONFIG_REISER4_FS) := $(MODULE).o
  62083. +
  62084. +$(MODULE)-objs += bitmap.o
  62085. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/space/space_allocator.h linux-5.16.14/fs/reiser4/plugin/space/space_allocator.h
  62086. --- linux-5.16.14.orig/fs/reiser4/plugin/space/space_allocator.h 1970-01-01 01:00:00.000000000 +0100
  62087. +++ linux-5.16.14/fs/reiser4/plugin/space/space_allocator.h 2022-03-12 13:26:19.685892809 +0100
  62088. @@ -0,0 +1,80 @@
  62089. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  62090. +
  62091. +#ifndef __SPACE_ALLOCATOR_H__
  62092. +#define __SPACE_ALLOCATOR_H__
  62093. +
  62094. +#include "../../forward.h"
  62095. +#include "bitmap.h"
  62096. +/* NIKITA-FIXME-HANS: surely this could use a comment. Something about how bitmap is the only space allocator for now,
  62097. + * but... */
  62098. +#define DEF_SPACE_ALLOCATOR(allocator) \
  62099. + \
  62100. +static inline int sa_init_allocator (reiser4_space_allocator * al, struct super_block *s, void * opaque) \
  62101. +{ \
  62102. + return reiser4_init_allocator_##allocator (al, s, opaque); \
  62103. +} \
  62104. + \
  62105. +static inline void sa_destroy_allocator (reiser4_space_allocator *al, struct super_block *s) \
  62106. +{ \
  62107. + reiser4_destroy_allocator_##allocator (al, s); \
  62108. +} \
  62109. + \
  62110. +static inline int sa_alloc_blocks (reiser4_space_allocator *al, reiser4_blocknr_hint * hint, \
  62111. + int needed, reiser4_block_nr * start, reiser4_block_nr * len) \
  62112. +{ \
  62113. + return reiser4_alloc_blocks_##allocator (al, hint, needed, start, len); \
  62114. +} \
  62115. +static inline void sa_dealloc_blocks (reiser4_space_allocator * al, reiser4_block_nr start, reiser4_block_nr len) \
  62116. +{ \
  62117. + reiser4_dealloc_blocks_##allocator (al, start, len); \
  62118. +} \
  62119. + \
  62120. +static inline int sa_check_blocks (const reiser4_block_nr * start, const reiser4_block_nr * end, int desired) \
  62121. +{ \
  62122. + return reiser4_check_blocks_##allocator (start, end, desired); \
  62123. +} \
  62124. + \
  62125. +static inline void sa_pre_commit_hook (void) \
  62126. +{ \
  62127. + reiser4_pre_commit_hook_##allocator (); \
  62128. +} \
  62129. + \
  62130. +static inline void sa_post_commit_hook (void) \
  62131. +{ \
  62132. + reiser4_post_commit_hook_##allocator (); \
  62133. +} \
  62134. + \
  62135. +static inline void sa_post_write_back_hook (void) \
  62136. +{ \
  62137. + reiser4_post_write_back_hook_##allocator(); \
  62138. +} \
  62139. + \
  62140. +static inline void sa_print_info(const char * prefix, reiser4_space_allocator * al) \
  62141. +{ \
  62142. + reiser4_print_info_##allocator (prefix, al); \
  62143. +}
  62144. +
  62145. +DEF_SPACE_ALLOCATOR(bitmap)
  62146. +
  62147. +/* this object is part of reiser4 private in-core super block */
  62148. +struct reiser4_space_allocator {
  62149. + union {
  62150. + /* space allocators might use this pointer to reference their
  62151. + * data. */
  62152. + void *generic;
  62153. + } u;
  62154. +};
  62155. +
  62156. +/* __SPACE_ALLOCATOR_H__ */
  62157. +#endif
  62158. +
  62159. +/* Make Linus happy.
  62160. + Local variables:
  62161. + c-indentation-style: "K&R"
  62162. + mode-name: "LC"
  62163. + c-basic-offset: 8
  62164. + tab-width: 8
  62165. + fill-column: 120
  62166. + scroll-step: 1
  62167. + End:
  62168. +*/
  62169. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/tail_policy.c linux-5.16.14/fs/reiser4/plugin/tail_policy.c
  62170. --- linux-5.16.14.orig/fs/reiser4/plugin/tail_policy.c 1970-01-01 01:00:00.000000000 +0100
  62171. +++ linux-5.16.14/fs/reiser4/plugin/tail_policy.c 2022-03-12 13:26:19.685892809 +0100
  62172. @@ -0,0 +1,113 @@
  62173. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  62174. + * reiser4/README */
  62175. +
  62176. +/* Formatting policy plugins */
  62177. +
  62178. +/*
  62179. + * Formatting policy plugin is used by object plugin (of regular file) to
  62180. + * convert file between two representations.
  62181. + *
  62182. + * Currently following policies are implemented:
  62183. + * never store file in formatted nodes
  62184. + * always store file in formatted nodes
  62185. + * store file in formatted nodes if file is smaller than 4 blocks (default)
  62186. + */
  62187. +
  62188. +#include "../tree.h"
  62189. +#include "../inode.h"
  62190. +#include "../super.h"
  62191. +#include "object.h"
  62192. +#include "plugin.h"
  62193. +#include "node/node.h"
  62194. +#include "plugin_header.h"
  62195. +
  62196. +#include <linux/pagemap.h>
  62197. +#include <linux/fs.h> /* For struct inode */
  62198. +
  62199. +/**
  62200. + * have_formatting_never -
  62201. + * @inode:
  62202. + * @size:
  62203. + *
  62204. + *
  62205. + */
  62206. +/* Never store file's tail as direct item */
  62207. +/* Audited by: green(2002.06.12) */
  62208. +static int have_formatting_never(const struct inode *inode UNUSED_ARG
  62209. + /* inode to operate on */ ,
  62210. + loff_t size UNUSED_ARG/* new object size */)
  62211. +{
  62212. + return 0;
  62213. +}
  62214. +
  62215. +/* Always store file's tail as direct item */
  62216. +/* Audited by: green(2002.06.12) */
  62217. +static int
  62218. +have_formatting_always(const struct inode *inode UNUSED_ARG
  62219. + /* inode to operate on */ ,
  62220. + loff_t size UNUSED_ARG/* new object size */)
  62221. +{
  62222. + return 1;
  62223. +}
  62224. +
  62225. +/* This function makes test if we should store file denoted @inode as tails only
  62226. + or as extents only. */
  62227. +static int
  62228. +have_formatting_default(const struct inode *inode UNUSED_ARG
  62229. + /* inode to operate on */ ,
  62230. + loff_t size/* new object size */)
  62231. +{
  62232. + assert("umka-1253", inode != NULL);
  62233. +
  62234. + if (size > inode->i_sb->s_blocksize * 4)
  62235. + return 0;
  62236. +
  62237. + return 1;
  62238. +}
  62239. +
  62240. +/* tail plugins */
  62241. +formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID] = {
  62242. + [NEVER_TAILS_FORMATTING_ID] = {
  62243. + .h = {
  62244. + .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
  62245. + .id = NEVER_TAILS_FORMATTING_ID,
  62246. + .pops = NULL,
  62247. + .label = "never",
  62248. + .desc = "Never store file's tail",
  62249. + .linkage = {NULL, NULL}
  62250. + },
  62251. + .have_tail = have_formatting_never
  62252. + },
  62253. + [ALWAYS_TAILS_FORMATTING_ID] = {
  62254. + .h = {
  62255. + .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
  62256. + .id = ALWAYS_TAILS_FORMATTING_ID,
  62257. + .pops = NULL,
  62258. + .label = "always",
  62259. + .desc = "Always store file's tail",
  62260. + .linkage = {NULL, NULL}
  62261. + },
  62262. + .have_tail = have_formatting_always
  62263. + },
  62264. + [SMALL_FILE_FORMATTING_ID] = {
  62265. + .h = {
  62266. + .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
  62267. + .id = SMALL_FILE_FORMATTING_ID,
  62268. + .pops = NULL,
  62269. + .label = "4blocks",
  62270. + .desc = "store files shorter than 4 blocks in tail items",
  62271. + .linkage = {NULL, NULL}
  62272. + },
  62273. + .have_tail = have_formatting_default
  62274. + }
  62275. +};
  62276. +
  62277. +/*
  62278. + * Local variables:
  62279. + * c-indentation-style: "K&R"
  62280. + * mode-name: "LC"
  62281. + * c-basic-offset: 8
  62282. + * tab-width: 8
  62283. + * fill-column: 79
  62284. + * End:
  62285. + */
  62286. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/plugin/txmod.c linux-5.16.14/fs/reiser4/plugin/txmod.c
  62287. --- linux-5.16.14.orig/fs/reiser4/plugin/txmod.c 1970-01-01 01:00:00.000000000 +0100
  62288. +++ linux-5.16.14/fs/reiser4/plugin/txmod.c 2022-03-12 13:26:19.686892811 +0100
  62289. @@ -0,0 +1,1238 @@
  62290. +#include "../forward.h"
  62291. +#include "../debug.h"
  62292. +#include "../coord.h"
  62293. +#include "../plugin/plugin.h"
  62294. +#include "../jnode.h"
  62295. +#include "../znode.h"
  62296. +#include "../block_alloc.h"
  62297. +#include "../reiser4.h"
  62298. +#include "../flush.h"
  62299. +
  62300. +/*
  62301. + * This file contains implementation of different transaction models.
  62302. + *
  62303. + * Transaction model is a high-level block allocator, which assigns block
  62304. + * numbers to dirty nodes, and, thereby, decides, how those nodes will be
  62305. + * committed.
  62306. + *
  62307. + * Every dirty node of reiser4 atom can be committed by either of the
  62308. + * following two ways:
  62309. + * 1) via journal;
  62310. + * 2) using "write-anywhere" technique.
  62311. + *
  62312. + * If the allocator doesn't change on-disk location of a node, then
  62313. + * this node will be committed using journalling technique (overwrite).
  62314. + * Otherwise, it will be comitted via write-anywhere technique (relocate):
  62315. + *
  62316. + * relocate <---- allocate --- > overwrite
  62317. + *
  62318. + * So, in our interpretation the 2 traditional "classic" strategies in
  62319. + * committing transactions (journalling and "write-anywhere") are just two
  62320. + * boundary cases: 1) when all nodes are overwritten, and 2) when all nodes
  62321. + * are relocated.
  62322. + *
  62323. + * Besides those 2 boundary cases we can implement in reiser4 the infinite
  62324. + * set of their various combinations, so that user can choose what is really
  62325. + * suitable for his needs.
  62326. + */
  62327. +
  62328. +/* jnode_make_wander_nolock <- find_flush_start_jnode (special case for znode-above-root)
  62329. + <- jnode_make_wander */
  62330. +void jnode_make_wander_nolock(jnode * node);
  62331. +
  62332. +/* jnode_make_wander <- txmod.forward_alloc_formatted */
  62333. +void jnode_make_wander(jnode * node);
  62334. +
  62335. +/* jnode_make_reloc_nolock <- znode_make_reloc
  62336. + <- unformatted_make_reloc */
  62337. +static void jnode_make_reloc_nolock(flush_queue_t * fq, jnode * node);
  62338. +
  62339. +
  62340. +
  62341. + /* Handle formatted nodes in forward context */
  62342. +
  62343. +
  62344. +/**
  62345. + * txmod.forward_alloc_formatted <- allocate_znode <- alloc_pos_and_ancestors <- jnode_flush
  62346. + * <- alloc_one_ancestor <- alloc_pos_and_ancestors <- jnode_flush
  62347. + * <- alloc_one_ancestor (recursive)
  62348. + * <- lock_parent_and_allocate_znode <- squalloc_upper_levels <- check_parents_and_squalloc_upper_levels <- squalloc_upper_levels (recursive)
  62349. + * <- handle_pos_on_formatted
  62350. + * <- handle_pos_on_formatted
  62351. + * <- handle_pos_end_of_twig
  62352. + * <- handle_pos_to_leaf
  62353. + */
  62354. +void znode_make_reloc(znode * z, flush_queue_t * fq);
  62355. +
  62356. +
  62357. + /* Handle unformatted nodes */
  62358. +
  62359. +
  62360. +/* unformatted_make_reloc <- assign_real_blocknrs <- txmod.forward_alloc_unformatted
  62361. + <- txmod.squeeze_alloc_unformatted
  62362. +*/
  62363. +void unformatted_make_reloc(jnode *node, flush_queue_t *fq);
  62364. +
  62365. +static void forward_overwrite_unformatted(flush_pos_t *flush_pos, oid_t oid,
  62366. + unsigned long index, reiser4_block_nr width);
  62367. +
  62368. +/* mark_jnode_overwrite <- forward_overwrite_unformatted <- txmod.forward_alloc_unformatted
  62369. + squeeze_overwrite_unformatted <- txmod.squeeze_alloc_unformatted
  62370. +*/
  62371. +static void mark_jnode_overwrite(struct list_head *jnodes, jnode *node);
  62372. +
  62373. +int split_allocated_extent(coord_t *coord, reiser4_block_nr pos_in_unit);
  62374. +int allocated_extent_slum_size(flush_pos_t *flush_pos, oid_t oid,
  62375. + unsigned long index, unsigned long count);
  62376. +void allocate_blocks_unformatted(reiser4_blocknr_hint *preceder,
  62377. + reiser4_block_nr wanted_count,
  62378. + reiser4_block_nr *first_allocated,
  62379. + reiser4_block_nr *allocated,
  62380. + block_stage_t block_stage);
  62381. +void assign_real_blocknrs(flush_pos_t *flush_pos, oid_t oid,
  62382. + unsigned long index, reiser4_block_nr count,
  62383. + reiser4_block_nr first);
  62384. +int convert_extent(coord_t *coord, reiser4_extent *replace);
  62385. +int put_unit_to_end(znode *node,
  62386. + const reiser4_key *key, reiser4_extent *copy_ext);
  62387. +
  62388. +/*
  62389. + * txmod.forward_alloc_unformatted <- handle_pos_on_twig
  62390. + * txmod.squeeze_alloc_unformatted <- squeeze_right_twig
  62391. + */
  62392. +
  62393. +/* Common functions */
  62394. +
  62395. +/**
  62396. + * Mark node JNODE_OVRWR and put it on atom->overwrite_nodes list.
  62397. + * Atom lock and jnode lock should be taken before calling this
  62398. + * function.
  62399. + */
  62400. +void jnode_make_wander_nolock(jnode * node)
  62401. +{
  62402. + txn_atom *atom;
  62403. +
  62404. + assert("nikita-2432", !JF_ISSET(node, JNODE_RELOC));
  62405. + assert("nikita-3153", JF_ISSET(node, JNODE_DIRTY));
  62406. + assert("zam-897", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
  62407. + assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node)));
  62408. +
  62409. + atom = node->atom;
  62410. +
  62411. + assert("zam-895", atom != NULL);
  62412. + assert("zam-894", atom_is_protected(atom));
  62413. +
  62414. + JF_SET(node, JNODE_OVRWR);
  62415. + /* move node to atom's overwrite list */
  62416. + list_move_tail(&node->capture_link, ATOM_OVRWR_LIST(atom));
  62417. + ON_DEBUG(count_jnode(atom, node, DIRTY_LIST, OVRWR_LIST, 1));
  62418. +}
  62419. +
  62420. +/*
  62421. + * Same as jnode_make_wander_nolock, but all necessary locks
  62422. + * are taken inside this function.
  62423. + */
  62424. +void jnode_make_wander(jnode * node)
  62425. +{
  62426. + txn_atom *atom;
  62427. +
  62428. + spin_lock_jnode(node);
  62429. + atom = jnode_get_atom(node);
  62430. + assert("zam-913", atom != NULL);
  62431. + assert("zam-914", !JF_ISSET(node, JNODE_RELOC));
  62432. +
  62433. + jnode_make_wander_nolock(node);
  62434. + spin_unlock_atom(atom);
  62435. + spin_unlock_jnode(node);
  62436. +}
  62437. +
  62438. +/* this just sets RELOC bit */
  62439. +static void jnode_make_reloc_nolock(flush_queue_t * fq, jnode * node)
  62440. +{
  62441. + assert_spin_locked(&(node->guard));
  62442. + assert("zam-916", JF_ISSET(node, JNODE_DIRTY));
  62443. + assert("zam-917", !JF_ISSET(node, JNODE_RELOC));
  62444. + assert("zam-918", !JF_ISSET(node, JNODE_OVRWR));
  62445. + assert("zam-920", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
  62446. + assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node)));
  62447. + jnode_set_reloc(node);
  62448. +}
  62449. +
  62450. +/*
  62451. + * Mark znode RELOC and put it on flush queue
  62452. + */
  62453. +void znode_make_reloc(znode * z, flush_queue_t * fq)
  62454. +{
  62455. + jnode *node;
  62456. + txn_atom *atom;
  62457. +
  62458. + node = ZJNODE(z);
  62459. + spin_lock_jnode(node);
  62460. +
  62461. + atom = jnode_get_atom(node);
  62462. + assert("zam-919", atom != NULL);
  62463. +
  62464. + jnode_make_reloc_nolock(fq, node);
  62465. + queue_jnode(fq, node);
  62466. +
  62467. + spin_unlock_atom(atom);
  62468. + spin_unlock_jnode(node);
  62469. +}
  62470. +
  62471. +/* Mark unformatted node RELOC and put it on flush queue */
  62472. +void unformatted_make_reloc(jnode *node, flush_queue_t *fq)
  62473. +{
  62474. + assert("vs-1479", jnode_is_unformatted(node));
  62475. +
  62476. + jnode_make_reloc_nolock(fq, node);
  62477. + queue_jnode(fq, node);
  62478. +}
  62479. +
  62480. +/**
  62481. + * mark_jnode_overwrite - assign node to overwrite set
  62482. + * @jnodes: overwrite set list head
  62483. + * @node: jnode to belong to overwrite set
  62484. + *
  62485. + * Sets OVRWR jnode state bit and puts @node to the end of list head @jnodes
  62486. + * which is an accumulator for nodes before they get to overwrite set list of
  62487. + * atom.
  62488. + */
  62489. +static void mark_jnode_overwrite(struct list_head *jnodes, jnode *node)
  62490. +{
  62491. + spin_lock_jnode(node);
  62492. +
  62493. + assert("zam-917", !JF_ISSET(node, JNODE_RELOC));
  62494. + assert("zam-918", !JF_ISSET(node, JNODE_OVRWR));
  62495. +
  62496. + JF_SET(node, JNODE_OVRWR);
  62497. + list_move_tail(&node->capture_link, jnodes);
  62498. + ON_DEBUG(count_jnode(node->atom, node, DIRTY_LIST, OVRWR_LIST, 0));
  62499. +
  62500. + spin_unlock_jnode(node);
  62501. +}
  62502. +
  62503. +static int forward_relocate_unformatted(flush_pos_t *flush_pos,
  62504. + reiser4_extent *ext,
  62505. + extent_state state,
  62506. + oid_t oid, __u64 index,
  62507. + __u64 width, int *exit)
  62508. +{
  62509. + int result;
  62510. + coord_t *coord;
  62511. + reiser4_extent replace_ext;
  62512. + reiser4_block_nr protected;
  62513. + reiser4_block_nr start;
  62514. + reiser4_block_nr first_allocated;
  62515. + __u64 allocated;
  62516. + block_stage_t block_stage;
  62517. +
  62518. + *exit = 0;
  62519. + coord = &flush_pos->coord;
  62520. + start = extent_get_start(ext);
  62521. +
  62522. + if (flush_pos->pos_in_unit) {
  62523. + /*
  62524. + * split extent unit into two ones
  62525. + */
  62526. + result = split_allocated_extent(coord,
  62527. + flush_pos->pos_in_unit);
  62528. + flush_pos->pos_in_unit = 0;
  62529. + *exit = 1;
  62530. + return result;
  62531. + }
  62532. + /*
  62533. + * limit number of nodes to allocate
  62534. + */
  62535. + if (flush_pos->nr_to_write < width)
  62536. + width = flush_pos->nr_to_write;
  62537. +
  62538. + if (state == ALLOCATED_EXTENT) {
  62539. + /*
  62540. + * all protected nodes are not flushprepped, therefore
  62541. + * they are counted as flush_reserved
  62542. + */
  62543. + block_stage = BLOCK_FLUSH_RESERVED;
  62544. + protected = allocated_extent_slum_size(flush_pos, oid,
  62545. + index, width);
  62546. + if (protected == 0) {
  62547. + flush_pos->state = POS_INVALID;
  62548. + flush_pos->pos_in_unit = 0;
  62549. + *exit = 1;
  62550. + return 0;
  62551. + }
  62552. + } else {
  62553. + block_stage = BLOCK_UNALLOCATED;
  62554. + protected = width;
  62555. + }
  62556. + /*
  62557. + * look at previous unit if possible. If it is allocated, make
  62558. + * preceder more precise
  62559. + */
  62560. + if (coord->unit_pos &&
  62561. + (state_of_extent(ext - 1) == ALLOCATED_EXTENT))
  62562. + reiser4_pos_hint(flush_pos)->blk =
  62563. + extent_get_start(ext - 1) +
  62564. + extent_get_width(ext - 1);
  62565. + /*
  62566. + * allocate new block numbers for protected nodes
  62567. + */
  62568. + allocate_blocks_unformatted(reiser4_pos_hint(flush_pos),
  62569. + protected,
  62570. + &first_allocated, &allocated,
  62571. + block_stage);
  62572. +
  62573. + if (state == ALLOCATED_EXTENT)
  62574. + /*
  62575. + * on relocating - free nodes which are going to be
  62576. + * relocated
  62577. + */
  62578. + reiser4_dealloc_blocks(&start, &allocated, 0, BA_DEFER);
  62579. +
  62580. + /* assign new block numbers to protected nodes */
  62581. + assign_real_blocknrs(flush_pos, oid, index, allocated, first_allocated);
  62582. +
  62583. + /* prepare extent which will replace current one */
  62584. + reiser4_set_extent(&replace_ext, first_allocated, allocated);
  62585. +
  62586. + /* adjust extent item */
  62587. + result = convert_extent(coord, &replace_ext);
  62588. + if (result != 0 && result != -ENOMEM) {
  62589. + warning("vs-1461",
  62590. + "Failed to allocate extent. Should not happen\n");
  62591. + *exit = 1;
  62592. + return result;
  62593. + }
  62594. + /*
  62595. + * break flush: we prepared for flushing as many blocks as we
  62596. + * were asked for
  62597. + */
  62598. + if (flush_pos->nr_to_write == allocated)
  62599. + flush_pos->state = POS_INVALID;
  62600. + return 0;
  62601. +}
  62602. +
  62603. +static squeeze_result squeeze_relocate_unformatted(znode *left,
  62604. + const coord_t *coord,
  62605. + flush_pos_t *flush_pos,
  62606. + reiser4_key *key,
  62607. + reiser4_key *stop_key)
  62608. +{
  62609. + int result;
  62610. + reiser4_extent *ext;
  62611. + __u64 index;
  62612. + __u64 width;
  62613. + reiser4_block_nr start;
  62614. + extent_state state;
  62615. + oid_t oid;
  62616. + reiser4_block_nr first_allocated;
  62617. + __u64 allocated;
  62618. + __u64 protected;
  62619. + reiser4_extent copy_extent;
  62620. + block_stage_t block_stage;
  62621. +
  62622. + assert("edward-1610", flush_pos->pos_in_unit == 0);
  62623. + assert("edward-1611", coord_is_leftmost_unit(coord));
  62624. + assert("edward-1612", item_is_extent(coord));
  62625. +
  62626. + ext = extent_by_coord(coord);
  62627. + index = extent_unit_index(coord);
  62628. + start = extent_get_start(ext);
  62629. + width = extent_get_width(ext);
  62630. + state = state_of_extent(ext);
  62631. + unit_key_by_coord(coord, key);
  62632. + oid = get_key_objectid(key);
  62633. +
  62634. + assert("edward-1613", state != HOLE_EXTENT);
  62635. +
  62636. + if (state == ALLOCATED_EXTENT) {
  62637. + /*
  62638. + * all protected nodes are not flushprepped,
  62639. + * therefore they are counted as flush_reserved
  62640. + */
  62641. + block_stage = BLOCK_FLUSH_RESERVED;
  62642. + protected = allocated_extent_slum_size(flush_pos, oid,
  62643. + index, width);
  62644. + if (protected == 0) {
  62645. + flush_pos->state = POS_INVALID;
  62646. + flush_pos->pos_in_unit = 0;
  62647. + return 0;
  62648. + }
  62649. + } else {
  62650. + block_stage = BLOCK_UNALLOCATED;
  62651. + protected = width;
  62652. + }
  62653. + /*
  62654. + * look at previous unit if possible. If it is allocated, make
  62655. + * preceder more precise
  62656. + */
  62657. + if (coord->unit_pos &&
  62658. + (state_of_extent(ext - 1) == ALLOCATED_EXTENT))
  62659. + reiser4_pos_hint(flush_pos)->blk =
  62660. + extent_get_start(ext - 1) +
  62661. + extent_get_width(ext - 1);
  62662. + /*
  62663. + * allocate new block numbers for protected nodes
  62664. + */
  62665. + allocate_blocks_unformatted(reiser4_pos_hint(flush_pos),
  62666. + protected,
  62667. + &first_allocated, &allocated,
  62668. + block_stage);
  62669. + /*
  62670. + * prepare extent which will be copied to left
  62671. + */
  62672. + reiser4_set_extent(&copy_extent, first_allocated, allocated);
  62673. + result = put_unit_to_end(left, key, &copy_extent);
  62674. +
  62675. + if (result == -E_NODE_FULL) {
  62676. + /*
  62677. + * free blocks which were just allocated
  62678. + */
  62679. + reiser4_dealloc_blocks(&first_allocated, &allocated,
  62680. + (state == ALLOCATED_EXTENT)
  62681. + ? BLOCK_FLUSH_RESERVED
  62682. + : BLOCK_UNALLOCATED,
  62683. + BA_PERMANENT);
  62684. + /*
  62685. + * rewind the preceder
  62686. + */
  62687. + flush_pos->preceder.blk = first_allocated;
  62688. + check_preceder(flush_pos->preceder.blk);
  62689. + return SQUEEZE_TARGET_FULL;
  62690. + }
  62691. + if (state == ALLOCATED_EXTENT) {
  62692. + /*
  62693. + * free nodes which were relocated
  62694. + */
  62695. + reiser4_dealloc_blocks(&start, &allocated, 0, BA_DEFER);
  62696. + }
  62697. + /*
  62698. + * assign new block numbers to protected nodes
  62699. + */
  62700. + assign_real_blocknrs(flush_pos, oid, index, allocated,
  62701. + first_allocated);
  62702. + set_key_offset(key,
  62703. + get_key_offset(key) +
  62704. + (allocated << current_blocksize_bits));
  62705. + return SQUEEZE_CONTINUE;
  62706. +}
  62707. +
  62708. +/**
  62709. + * forward_overwrite_unformatted - put bunch of jnodes to overwrite set
  62710. + * @flush_pos: flush position
  62711. + * @oid: objectid of file jnodes belong to
  62712. + * @index: starting index
  62713. + * @width: extent width
  62714. + *
  62715. + * Puts nodes of one extent (file objectid @oid, extent width @width) to atom's
  62716. + * overwrite set. Starting from the one with index @index. If end of slum is
  62717. + * detected (node is not found or flushprepped) - stop iterating and set flush
  62718. + * position's state to POS_INVALID.
  62719. + */
  62720. +static void forward_overwrite_unformatted(flush_pos_t *flush_pos, oid_t oid,
  62721. + unsigned long index,
  62722. + reiser4_block_nr width)
  62723. +{
  62724. + unsigned long i;
  62725. + reiser4_tree *tree;
  62726. + jnode *node;
  62727. + txn_atom *atom;
  62728. + LIST_HEAD(jnodes);
  62729. +
  62730. + tree = current_tree;
  62731. +
  62732. + atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos));
  62733. + assert("vs-1478", atom);
  62734. +
  62735. + for (i = flush_pos->pos_in_unit; i < width; i++, index++) {
  62736. + node = jlookup(tree, oid, index);
  62737. + if (!node) {
  62738. + flush_pos->state = POS_INVALID;
  62739. + break;
  62740. + }
  62741. + if (jnode_check_flushprepped(node)) {
  62742. + flush_pos->state = POS_INVALID;
  62743. + atomic_dec(&node->x_count);
  62744. + break;
  62745. + }
  62746. + if (node->atom != atom) {
  62747. + flush_pos->state = POS_INVALID;
  62748. + atomic_dec(&node->x_count);
  62749. + break;
  62750. + }
  62751. + mark_jnode_overwrite(&jnodes, node);
  62752. + atomic_dec(&node->x_count);
  62753. + }
  62754. +
  62755. + list_splice_init(&jnodes, ATOM_OVRWR_LIST(atom)->prev);
  62756. + spin_unlock_atom(atom);
  62757. +}
  62758. +
  62759. +static squeeze_result squeeze_overwrite_unformatted(znode *left,
  62760. + const coord_t *coord,
  62761. + flush_pos_t *flush_pos,
  62762. + reiser4_key *key,
  62763. + reiser4_key *stop_key)
  62764. +{
  62765. + int result;
  62766. + reiser4_extent *ext;
  62767. + __u64 index;
  62768. + __u64 width;
  62769. + reiser4_block_nr start;
  62770. + extent_state state;
  62771. + oid_t oid;
  62772. + reiser4_extent copy_extent;
  62773. +
  62774. + assert("vs-1457", flush_pos->pos_in_unit == 0);
  62775. + assert("vs-1467", coord_is_leftmost_unit(coord));
  62776. + assert("vs-1467", item_is_extent(coord));
  62777. +
  62778. + ext = extent_by_coord(coord);
  62779. + index = extent_unit_index(coord);
  62780. + start = extent_get_start(ext);
  62781. + width = extent_get_width(ext);
  62782. + state = state_of_extent(ext);
  62783. + unit_key_by_coord(coord, key);
  62784. + oid = get_key_objectid(key);
  62785. + /*
  62786. + * try to copy unit as it is to left neighbor
  62787. + * and make all first not flushprepped nodes
  62788. + * overwrite nodes
  62789. + */
  62790. + reiser4_set_extent(&copy_extent, start, width);
  62791. +
  62792. + result = put_unit_to_end(left, key, &copy_extent);
  62793. + if (result == -E_NODE_FULL)
  62794. + return SQUEEZE_TARGET_FULL;
  62795. +
  62796. + if (state != HOLE_EXTENT)
  62797. + forward_overwrite_unformatted(flush_pos, oid, index, width);
  62798. +
  62799. + set_key_offset(key,
  62800. + get_key_offset(key) + (width << current_blocksize_bits));
  62801. + return SQUEEZE_CONTINUE;
  62802. +}
  62803. +
  62804. +/************************ HYBRID TRANSACTION MODEL ****************************/
  62805. +
  62806. +/**
  62807. + * This is the default transaction model suggested by Josh MacDonald and
  62808. + * Hans Reiser. This was the single hardcoded transaction mode till Feb 2014
  62809. + * when Edward introduced pure Journalling and pure Write-Anywhere.
  62810. + *
  62811. + * In this mode all relocate-overwrite decisions are result of attempts to
  62812. + * defragment atom's locality.
  62813. + */
  62814. +
  62815. +/* REVERSE PARENT-FIRST RELOCATION POLICIES */
  62816. +
  62817. +/* This implements the is-it-close-enough-to-its-preceder? test for relocation
  62818. + in the reverse parent-first relocate context. Here all we know is the
  62819. + preceder and the block number. Since we are going in reverse, the preceder
  62820. + may still be relocated as well, so we can't ask the block allocator "is there
  62821. + a closer block available to relocate?" here. In the _forward_ parent-first
  62822. + relocate context (not here) we actually call the block allocator to try and
  62823. + find a closer location.
  62824. +*/
  62825. +static int reverse_try_defragment_if_close(const reiser4_block_nr * pblk,
  62826. + const reiser4_block_nr * nblk)
  62827. +{
  62828. + reiser4_block_nr dist;
  62829. +
  62830. + assert("jmacd-7710", *pblk != 0 && *nblk != 0);
  62831. + assert("jmacd-7711", !reiser4_blocknr_is_fake(pblk));
  62832. + assert("jmacd-7712", !reiser4_blocknr_is_fake(nblk));
  62833. +
  62834. + /* Distance is the absolute value. */
  62835. + dist = (*pblk > *nblk) ? (*pblk - *nblk) : (*nblk - *pblk);
  62836. +
  62837. + /* If the block is less than FLUSH_RELOCATE_DISTANCE blocks away from
  62838. + its preceder block, do not relocate. */
  62839. + if (dist <= get_current_super_private()->flush.relocate_distance)
  62840. + return 0;
  62841. +
  62842. + return 1;
  62843. +}
  62844. +
  62845. +/**
  62846. + * This function is a predicate that tests for relocation. Always called in the
  62847. + * reverse-parent-first context, when we are asking whether the current node
  62848. + * should be relocated in order to expand the flush by dirtying the parent level
  62849. + * (and thus proceeding to flush that level). When traversing in the forward
  62850. + * parent-first direction (not here), relocation decisions are handled in two
  62851. + * places: allocate_znode() and extent_needs_allocation().
  62852. + */
  62853. +static int reverse_alloc_formatted_hybrid(jnode * node,
  62854. + const coord_t *parent_coord,
  62855. + flush_pos_t *pos)
  62856. +{
  62857. + reiser4_block_nr pblk = 0;
  62858. + reiser4_block_nr nblk = 0;
  62859. +
  62860. + assert("jmacd-8989", !jnode_is_root(node));
  62861. + /*
  62862. + * This function is called only from the
  62863. + * reverse_relocate_check_dirty_parent() and only if the parent
  62864. + * node is clean. This implies that the parent has the real (i.e., not
  62865. + * fake) block number, and, so does the child, because otherwise the
  62866. + * parent would be dirty.
  62867. + */
  62868. +
  62869. + /* New nodes are treated as if they are being relocated. */
  62870. + if (JF_ISSET(node, JNODE_CREATED) ||
  62871. + (pos->leaf_relocate && jnode_get_level(node) == LEAF_LEVEL))
  62872. + return 1;
  62873. +
  62874. + /* Find the preceder. FIXME(B): When the child is an unformatted,
  62875. + previously existing node, the coord may be leftmost even though the
  62876. + child is not the parent-first preceder of the parent. If the first
  62877. + dirty node appears somewhere in the middle of the first extent unit,
  62878. + this preceder calculation is wrong.
  62879. + Needs more logic in here. */
  62880. + if (coord_is_leftmost_unit(parent_coord)) {
  62881. + pblk = *znode_get_block(parent_coord->node);
  62882. + } else {
  62883. + pblk = pos->preceder.blk;
  62884. + }
  62885. + check_preceder(pblk);
  62886. +
  62887. + /* If (pblk == 0) then the preceder isn't allocated or isn't known:
  62888. + relocate. */
  62889. + if (pblk == 0)
  62890. + return 1;
  62891. +
  62892. + nblk = *jnode_get_block(node);
  62893. +
  62894. + if (reiser4_blocknr_is_fake(&nblk))
  62895. + /* child is unallocated, mark parent dirty */
  62896. + return 1;
  62897. +
  62898. + return reverse_try_defragment_if_close(&pblk, &nblk);
  62899. +}
  62900. +
  62901. +/**
  62902. + * A subroutine of forward_alloc_formatted_hybrid(), this is called first to see
  62903. + * if there is a close position to relocate to. It may return ENOSPC if there is
  62904. + * no close position. If there is no close position it may not relocate. This
  62905. + * takes care of updating the parent node with the relocated block address.
  62906. + *
  62907. + * was allocate_znode_update()
  62908. + */
  62909. +static int forward_try_defragment_locality(znode * node,
  62910. + const coord_t *parent_coord,
  62911. + flush_pos_t *pos)
  62912. +{
  62913. + int ret;
  62914. + reiser4_block_nr blk;
  62915. + lock_handle uber_lock;
  62916. + int flush_reserved_used = 0;
  62917. + int grabbed;
  62918. + reiser4_context *ctx;
  62919. + reiser4_super_info_data *sbinfo;
  62920. +
  62921. + init_lh(&uber_lock);
  62922. +
  62923. + ctx = get_current_context();
  62924. + sbinfo = get_super_private(ctx->super);
  62925. +
  62926. + grabbed = ctx->grabbed_blocks;
  62927. +
  62928. + ret = zload(node);
  62929. + if (ret)
  62930. + return ret;
  62931. +
  62932. + if (ZF_ISSET(node, JNODE_CREATED)) {
  62933. + assert("zam-816", reiser4_blocknr_is_fake(znode_get_block(node)));
  62934. + pos->preceder.block_stage = BLOCK_UNALLOCATED;
  62935. + } else {
  62936. + pos->preceder.block_stage = BLOCK_GRABBED;
  62937. +
  62938. + /* The disk space for relocating the @node is already reserved
  62939. + * in "flush reserved" counter if @node is leaf, otherwise we
  62940. + * grab space using BA_RESERVED (means grab space from whole
  62941. + * disk not from only 95%). */
  62942. + if (znode_get_level(node) == LEAF_LEVEL) {
  62943. + /*
  62944. + * earlier (during do_jnode_make_dirty()) we decided
  62945. + * that @node can possibly go into overwrite set and
  62946. + * reserved block for its wandering location.
  62947. + */
  62948. + txn_atom *atom = get_current_atom_locked();
  62949. + assert("nikita-3449",
  62950. + ZF_ISSET(node, JNODE_FLUSH_RESERVED));
  62951. + flush_reserved2grabbed(atom, (__u64) 1);
  62952. + spin_unlock_atom(atom);
  62953. + /*
  62954. + * we are trying to move node into relocate
  62955. + * set. Allocation of relocated position "uses"
  62956. + * reserved block.
  62957. + */
  62958. + ZF_CLR(node, JNODE_FLUSH_RESERVED);
  62959. + flush_reserved_used = 1;
  62960. + } else {
  62961. + ret = reiser4_grab_space_force((__u64) 1, BA_RESERVED);
  62962. + if (ret != 0)
  62963. + goto exit;
  62964. + }
  62965. + }
  62966. +
  62967. + /* We may do not use 5% of reserved disk space here and flush will not
  62968. + pack tightly. */
  62969. + ret = reiser4_alloc_block(&pos->preceder, &blk,
  62970. + BA_FORMATTED | BA_PERMANENT);
  62971. + if (ret)
  62972. + goto exit;
  62973. +
  62974. + if (!ZF_ISSET(node, JNODE_CREATED) &&
  62975. + (ret = reiser4_dealloc_block(znode_get_block(node), 0,
  62976. + BA_DEFER | BA_FORMATTED)))
  62977. + goto exit;
  62978. +
  62979. + if (likely(!znode_is_root(node))) {
  62980. + item_plugin *iplug;
  62981. +
  62982. + iplug = item_plugin_by_coord(parent_coord);
  62983. + assert("nikita-2954", iplug->f.update != NULL);
  62984. + iplug->f.update(parent_coord, &blk);
  62985. +
  62986. + znode_make_dirty(parent_coord->node);
  62987. +
  62988. + } else {
  62989. + reiser4_tree *tree = znode_get_tree(node);
  62990. + znode *uber;
  62991. +
  62992. + /* We take a longterm lock on the fake node in order to change
  62993. + the root block number. This may cause atom fusion. */
  62994. + ret = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
  62995. + &uber_lock);
  62996. + /* The fake node cannot be deleted, and we must have priority
  62997. + here, and may not be confused with ENOSPC. */
  62998. + assert("jmacd-74412",
  62999. + ret != -EINVAL && ret != -E_DEADLOCK && ret != -ENOSPC);
  63000. +
  63001. + if (ret)
  63002. + goto exit;
  63003. +
  63004. + uber = uber_lock.node;
  63005. +
  63006. + write_lock_tree(tree);
  63007. + tree->root_block = blk;
  63008. + write_unlock_tree(tree);
  63009. +
  63010. + znode_make_dirty(uber);
  63011. + }
  63012. + ret = znode_rehash(node, &blk);
  63013. +exit:
  63014. + if (ret) {
  63015. + /* Get flush reserved block back if something fails, because
  63016. + * callers assume that on error block wasn't relocated and its
  63017. + * flush reserved block wasn't used. */
  63018. + if (flush_reserved_used) {
  63019. + /*
  63020. + * ok, we failed to move node into relocate
  63021. + * set. Restore status quo.
  63022. + */
  63023. + grabbed2flush_reserved((__u64) 1);
  63024. + ZF_SET(node, JNODE_FLUSH_RESERVED);
  63025. + }
  63026. + }
  63027. + zrelse(node);
  63028. + done_lh(&uber_lock);
  63029. + grabbed2free_mark(grabbed);
  63030. + return ret;
  63031. +}
  63032. +
  63033. +/*
  63034. + * Make the final relocate/wander decision during
  63035. + * forward parent-first squalloc for a formatted node
  63036. + */
  63037. +static int forward_alloc_formatted_hybrid(znode * node,
  63038. + const coord_t *parent_coord,
  63039. + flush_pos_t *pos)
  63040. +{
  63041. + int ret;
  63042. + reiser4_super_info_data *sbinfo = get_current_super_private();
  63043. + /**
  63044. + * FIXME(D): We have the node write-locked and should have checked for !
  63045. + * allocated() somewhere before reaching this point, but there can be a
  63046. + * race, so this assertion is bogus.
  63047. + */
  63048. + assert("edward-1614", znode_is_loaded(node));
  63049. + assert("jmacd-7987", !jnode_check_flushprepped(ZJNODE(node)));
  63050. + assert("jmacd-7988", znode_is_write_locked(node));
  63051. + assert("jmacd-7989", coord_is_invalid(parent_coord)
  63052. + || znode_is_write_locked(parent_coord->node));
  63053. +
  63054. + if (ZF_ISSET(node, JNODE_REPACK) || ZF_ISSET(node, JNODE_CREATED) ||
  63055. + znode_is_root(node) ||
  63056. + /*
  63057. + * We have enough nodes to relocate no matter what.
  63058. + */
  63059. + (pos->leaf_relocate != 0 && znode_get_level(node) == LEAF_LEVEL)) {
  63060. + /*
  63061. + * No need to decide with new nodes, they are treated the same
  63062. + * as relocate. If the root node is dirty, relocate.
  63063. + */
  63064. + if (pos->preceder.blk == 0) {
  63065. + /*
  63066. + * preceder is unknown and we have decided to relocate
  63067. + * node -- using of default value for search start is
  63068. + * better than search from block #0.
  63069. + */
  63070. + get_blocknr_hint_default(&pos->preceder.blk);
  63071. + check_preceder(pos->preceder.blk);
  63072. + }
  63073. + goto best_reloc;
  63074. +
  63075. + } else if (pos->preceder.blk == 0) {
  63076. + /* If we don't know the preceder, leave it where it is. */
  63077. + jnode_make_wander(ZJNODE(node));
  63078. + } else {
  63079. + /* Make a decision based on block distance. */
  63080. + reiser4_block_nr dist;
  63081. + reiser4_block_nr nblk = *znode_get_block(node);
  63082. +
  63083. + assert("jmacd-6172", !reiser4_blocknr_is_fake(&nblk));
  63084. + assert("jmacd-6173", !reiser4_blocknr_is_fake(&pos->preceder.blk));
  63085. + assert("jmacd-6174", pos->preceder.blk != 0);
  63086. +
  63087. + if (pos->preceder.blk == nblk - 1) {
  63088. + /* Ideal. */
  63089. + jnode_make_wander(ZJNODE(node));
  63090. + } else {
  63091. +
  63092. + dist =
  63093. + (nblk <
  63094. + pos->preceder.blk) ? (pos->preceder.blk -
  63095. + nblk) : (nblk -
  63096. + pos->preceder.blk);
  63097. +
  63098. + /* See if we can find a closer block
  63099. + (forward direction only). */
  63100. + pos->preceder.max_dist =
  63101. + min((reiser4_block_nr) sbinfo->flush.
  63102. + relocate_distance, dist);
  63103. + pos->preceder.level = znode_get_level(node);
  63104. +
  63105. + ret = forward_try_defragment_locality(node,
  63106. + parent_coord,
  63107. + pos);
  63108. + pos->preceder.max_dist = 0;
  63109. +
  63110. + if (ret && (ret != -ENOSPC))
  63111. + return ret;
  63112. +
  63113. + if (ret == 0) {
  63114. + /* Got a better allocation. */
  63115. + znode_make_reloc(node, pos->fq);
  63116. + } else if (dist < sbinfo->flush.relocate_distance) {
  63117. + /* The present allocation is good enough. */
  63118. + jnode_make_wander(ZJNODE(node));
  63119. + } else {
  63120. + /*
  63121. + * Otherwise, try to relocate to the best
  63122. + * position.
  63123. + */
  63124. + best_reloc:
  63125. + ret = forward_try_defragment_locality(node,
  63126. + parent_coord,
  63127. + pos);
  63128. + if (ret != 0)
  63129. + return ret;
  63130. + /*
  63131. + * set JNODE_RELOC bit _after_ node gets
  63132. + * allocated
  63133. + */
  63134. + znode_make_reloc(node, pos->fq);
  63135. + }
  63136. + }
  63137. + }
  63138. + /*
  63139. + * This is the new preceder
  63140. + */
  63141. + pos->preceder.blk = *znode_get_block(node);
  63142. + check_preceder(pos->preceder.blk);
  63143. + pos->alloc_cnt += 1;
  63144. +
  63145. + assert("jmacd-4277", !reiser4_blocknr_is_fake(&pos->preceder.blk));
  63146. +
  63147. + return 0;
  63148. +}
  63149. +
  63150. +static int forward_alloc_unformatted_hybrid(flush_pos_t *flush_pos)
  63151. +{
  63152. + coord_t *coord;
  63153. + reiser4_extent *ext;
  63154. + oid_t oid;
  63155. + __u64 index;
  63156. + __u64 width;
  63157. + extent_state state;
  63158. + reiser4_key key;
  63159. +
  63160. + assert("vs-1468", flush_pos->state == POS_ON_EPOINT);
  63161. + assert("vs-1469", coord_is_existing_unit(&flush_pos->coord)
  63162. + && item_is_extent(&flush_pos->coord));
  63163. +
  63164. + coord = &flush_pos->coord;
  63165. +
  63166. + ext = extent_by_coord(coord);
  63167. + state = state_of_extent(ext);
  63168. + if (state == HOLE_EXTENT) {
  63169. + flush_pos->state = POS_INVALID;
  63170. + return 0;
  63171. + }
  63172. + item_key_by_coord(coord, &key);
  63173. + oid = get_key_objectid(&key);
  63174. + index = extent_unit_index(coord) + flush_pos->pos_in_unit;
  63175. + width = extent_get_width(ext);
  63176. +
  63177. + assert("vs-1457", width > flush_pos->pos_in_unit);
  63178. +
  63179. + if (flush_pos->leaf_relocate || state == UNALLOCATED_EXTENT) {
  63180. + int exit;
  63181. + int result;
  63182. + result = forward_relocate_unformatted(flush_pos, ext, state,
  63183. + oid,
  63184. + index, width, &exit);
  63185. + if (exit)
  63186. + return result;
  63187. + } else
  63188. + forward_overwrite_unformatted(flush_pos, oid, index, width);
  63189. +
  63190. + flush_pos->pos_in_unit = 0;
  63191. + return 0;
  63192. +}
  63193. +
  63194. +static squeeze_result squeeze_alloc_unformatted_hybrid(znode *left,
  63195. + const coord_t *coord,
  63196. + flush_pos_t *flush_pos,
  63197. + reiser4_key *stop_key)
  63198. +{
  63199. + squeeze_result ret;
  63200. + reiser4_key key;
  63201. + reiser4_extent *ext;
  63202. + extent_state state;
  63203. +
  63204. + ext = extent_by_coord(coord);
  63205. + state = state_of_extent(ext);
  63206. +
  63207. + if ((flush_pos->leaf_relocate && state == ALLOCATED_EXTENT) ||
  63208. + (state == UNALLOCATED_EXTENT))
  63209. + /*
  63210. + * relocate
  63211. + */
  63212. + ret = squeeze_relocate_unformatted(left, coord,
  63213. + flush_pos, &key, stop_key);
  63214. + else
  63215. + /*
  63216. + * (state == ALLOCATED_EXTENT && !flush_pos->leaf_relocate) ||
  63217. + * state == HOLE_EXTENT - overwrite
  63218. + */
  63219. + ret = squeeze_overwrite_unformatted(left, coord,
  63220. + flush_pos, &key, stop_key);
  63221. + if (ret == SQUEEZE_CONTINUE)
  63222. + *stop_key = key;
  63223. + return ret;
  63224. +}
  63225. +
  63226. +/*********************** JOURNAL TRANSACTION MODEL ****************************/
  63227. +
  63228. +static int forward_alloc_formatted_journal(znode * node,
  63229. + const coord_t *parent_coord,
  63230. + flush_pos_t *pos)
  63231. +{
  63232. + int ret;
  63233. +
  63234. + if (ZF_ISSET(node, JNODE_CREATED)) {
  63235. + if (pos->preceder.blk == 0) {
  63236. + /*
  63237. + * preceder is unknown and we have decided to relocate
  63238. + * node -- using of default value for search start is
  63239. + * better than search from block #0.
  63240. + */
  63241. + get_blocknr_hint_default(&pos->preceder.blk);
  63242. + check_preceder(pos->preceder.blk);
  63243. + }
  63244. + ret = forward_try_defragment_locality(node,
  63245. + parent_coord,
  63246. + pos);
  63247. + if (ret != 0) {
  63248. + warning("edward-1615",
  63249. + "forward defrag failed (%d)", ret);
  63250. + return ret;
  63251. + }
  63252. + /*
  63253. + * set JNODE_RELOC bit _after_ node gets
  63254. + * allocated
  63255. + */
  63256. + znode_make_reloc(node, pos->fq);
  63257. + }
  63258. + else
  63259. + jnode_make_wander(ZJNODE(node));
  63260. + /*
  63261. + * This is the new preceder
  63262. + */
  63263. + pos->preceder.blk = *znode_get_block(node);
  63264. + check_preceder(pos->preceder.blk);
  63265. + pos->alloc_cnt += 1;
  63266. +
  63267. + assert("edward-1616", !reiser4_blocknr_is_fake(&pos->preceder.blk));
  63268. + return 0;
  63269. +}
  63270. +
  63271. +static int forward_alloc_unformatted_journal(flush_pos_t *flush_pos)
  63272. +{
  63273. +
  63274. + coord_t *coord;
  63275. + reiser4_extent *ext;
  63276. + oid_t oid;
  63277. + __u64 index;
  63278. + __u64 width;
  63279. + extent_state state;
  63280. + reiser4_key key;
  63281. +
  63282. + assert("edward-1617", flush_pos->state == POS_ON_EPOINT);
  63283. + assert("edward-1618", coord_is_existing_unit(&flush_pos->coord)
  63284. + && item_is_extent(&flush_pos->coord));
  63285. +
  63286. + coord = &flush_pos->coord;
  63287. +
  63288. + ext = extent_by_coord(coord);
  63289. + state = state_of_extent(ext);
  63290. + if (state == HOLE_EXTENT) {
  63291. + flush_pos->state = POS_INVALID;
  63292. + return 0;
  63293. + }
  63294. + item_key_by_coord(coord, &key);
  63295. + oid = get_key_objectid(&key);
  63296. + index = extent_unit_index(coord) + flush_pos->pos_in_unit;
  63297. + width = extent_get_width(ext);
  63298. +
  63299. + assert("edward-1619", width > flush_pos->pos_in_unit);
  63300. +
  63301. + if (state == UNALLOCATED_EXTENT) {
  63302. + int exit;
  63303. + int result;
  63304. + result = forward_relocate_unformatted(flush_pos, ext, state,
  63305. + oid,
  63306. + index, width, &exit);
  63307. + if (exit)
  63308. + return result;
  63309. + }
  63310. + else
  63311. + /*
  63312. + * state == ALLOCATED_EXTENT
  63313. + * keep old allocation
  63314. + */
  63315. + forward_overwrite_unformatted(flush_pos, oid, index, width);
  63316. +
  63317. + flush_pos->pos_in_unit = 0;
  63318. + return 0;
  63319. +}
  63320. +
  63321. +static squeeze_result squeeze_alloc_unformatted_journal(znode *left,
  63322. + const coord_t *coord,
  63323. + flush_pos_t *flush_pos,
  63324. + reiser4_key *stop_key)
  63325. +{
  63326. + squeeze_result ret;
  63327. + reiser4_key key;
  63328. + reiser4_extent *ext;
  63329. + extent_state state;
  63330. +
  63331. + ext = extent_by_coord(coord);
  63332. + state = state_of_extent(ext);
  63333. +
  63334. + if (state == UNALLOCATED_EXTENT)
  63335. + ret = squeeze_relocate_unformatted(left, coord,
  63336. + flush_pos, &key, stop_key);
  63337. + else
  63338. + /*
  63339. + * state == ALLOCATED_EXTENT || state == HOLE_EXTENT
  63340. + */
  63341. + ret = squeeze_overwrite_unformatted(left, coord,
  63342. + flush_pos, &key, stop_key);
  63343. + if (ret == SQUEEZE_CONTINUE)
  63344. + *stop_key = key;
  63345. + return ret;
  63346. +}
  63347. +
  63348. +/********************** WA (Write-Anywhere) TRANSACTION MODEL ***************/
  63349. +
  63350. +static int forward_alloc_formatted_wa(znode * node,
  63351. + const coord_t *parent_coord,
  63352. + flush_pos_t *pos)
  63353. +{
  63354. + int ret;
  63355. +
  63356. + assert("edward-1620", znode_is_loaded(node));
  63357. + assert("edward-1621", !jnode_check_flushprepped(ZJNODE(node)));
  63358. + assert("edward-1622", znode_is_write_locked(node));
  63359. + assert("edward-1623", coord_is_invalid(parent_coord)
  63360. + || znode_is_write_locked(parent_coord->node));
  63361. +
  63362. + if (pos->preceder.blk == 0) {
  63363. + /*
  63364. + * preceder is unknown and we have decided to relocate
  63365. + * node -- using of default value for search start is
  63366. + * better than search from block #0.
  63367. + */
  63368. + get_blocknr_hint_default(&pos->preceder.blk);
  63369. + check_preceder(pos->preceder.blk);
  63370. + }
  63371. + ret = forward_try_defragment_locality(node, parent_coord, pos);
  63372. + if (ret && (ret != -ENOSPC)) {
  63373. + warning("edward-1624",
  63374. + "forward defrag failed (%d)", ret);
  63375. + return ret;
  63376. + }
  63377. + if (ret == 0)
  63378. + znode_make_reloc(node, pos->fq);
  63379. + else {
  63380. + ret = forward_try_defragment_locality(node, parent_coord, pos);
  63381. + if (ret) {
  63382. + warning("edward-1625",
  63383. + "forward defrag failed (%d)", ret);
  63384. + return ret;
  63385. + }
  63386. + /* set JNODE_RELOC bit _after_ node gets allocated */
  63387. + znode_make_reloc(node, pos->fq);
  63388. + }
  63389. + /*
  63390. + * This is the new preceder
  63391. + */
  63392. + pos->preceder.blk = *znode_get_block(node);
  63393. + check_preceder(pos->preceder.blk);
  63394. + pos->alloc_cnt += 1;
  63395. +
  63396. + assert("edward-1626", !reiser4_blocknr_is_fake(&pos->preceder.blk));
  63397. + return 0;
  63398. +}
  63399. +
  63400. +static int forward_alloc_unformatted_wa(flush_pos_t *flush_pos)
  63401. +{
  63402. + int exit;
  63403. + int result;
  63404. +
  63405. + coord_t *coord;
  63406. + reiser4_extent *ext;
  63407. + oid_t oid;
  63408. + __u64 index;
  63409. + __u64 width;
  63410. + extent_state state;
  63411. + reiser4_key key;
  63412. +
  63413. + assert("edward-1627", flush_pos->state == POS_ON_EPOINT);
  63414. + assert("edward-1628", coord_is_existing_unit(&flush_pos->coord)
  63415. + && item_is_extent(&flush_pos->coord));
  63416. +
  63417. + coord = &flush_pos->coord;
  63418. +
  63419. + ext = extent_by_coord(coord);
  63420. + state = state_of_extent(ext);
  63421. + if (state == HOLE_EXTENT) {
  63422. + flush_pos->state = POS_INVALID;
  63423. + return 0;
  63424. + }
  63425. +
  63426. + item_key_by_coord(coord, &key);
  63427. + oid = get_key_objectid(&key);
  63428. + index = extent_unit_index(coord) + flush_pos->pos_in_unit;
  63429. + width = extent_get_width(ext);
  63430. +
  63431. + assert("edward-1629", width > flush_pos->pos_in_unit);
  63432. + assert("edward-1630",
  63433. + state == ALLOCATED_EXTENT || state == UNALLOCATED_EXTENT);
  63434. + /*
  63435. + * always relocate
  63436. + */
  63437. + result = forward_relocate_unformatted(flush_pos, ext, state, oid,
  63438. + index, width, &exit);
  63439. + if (exit)
  63440. + return result;
  63441. + flush_pos->pos_in_unit = 0;
  63442. + return 0;
  63443. +}
  63444. +
  63445. +static squeeze_result squeeze_alloc_unformatted_wa(znode *left,
  63446. + const coord_t *coord,
  63447. + flush_pos_t *flush_pos,
  63448. + reiser4_key *stop_key)
  63449. +{
  63450. + squeeze_result ret;
  63451. + reiser4_key key;
  63452. + reiser4_extent *ext;
  63453. + extent_state state;
  63454. +
  63455. + ext = extent_by_coord(coord);
  63456. + state = state_of_extent(ext);
  63457. +
  63458. + if (state == HOLE_EXTENT)
  63459. + /*
  63460. + * hole extents are handled in squeeze_overwrite
  63461. + */
  63462. + ret = squeeze_overwrite_unformatted(left, coord,
  63463. + flush_pos, &key, stop_key);
  63464. + else
  63465. + ret = squeeze_relocate_unformatted(left, coord,
  63466. + flush_pos, &key, stop_key);
  63467. + if (ret == SQUEEZE_CONTINUE)
  63468. + *stop_key = key;
  63469. + return ret;
  63470. +}
  63471. +
  63472. +/******************************************************************************/
  63473. +
  63474. +txmod_plugin txmod_plugins[LAST_TXMOD_ID] = {
  63475. + [HYBRID_TXMOD_ID] = {
  63476. + .h = {
  63477. + .type_id = REISER4_TXMOD_PLUGIN_TYPE,
  63478. + .id = HYBRID_TXMOD_ID,
  63479. + .pops = NULL,
  63480. + .label = "hybrid",
  63481. + .desc = "Hybrid Transaction Model",
  63482. + .linkage = {NULL, NULL}
  63483. + },
  63484. + .forward_alloc_formatted = forward_alloc_formatted_hybrid,
  63485. + .reverse_alloc_formatted = reverse_alloc_formatted_hybrid,
  63486. + .forward_alloc_unformatted = forward_alloc_unformatted_hybrid,
  63487. + .squeeze_alloc_unformatted = squeeze_alloc_unformatted_hybrid
  63488. + },
  63489. + [JOURNAL_TXMOD_ID] = {
  63490. + .h = {
  63491. + .type_id = REISER4_TXMOD_PLUGIN_TYPE,
  63492. + .id = JOURNAL_TXMOD_ID,
  63493. + .pops = NULL,
  63494. + .label = "journal",
  63495. + .desc = "Journalling Transaction Model",
  63496. + .linkage = {NULL, NULL}
  63497. + },
  63498. + .forward_alloc_formatted = forward_alloc_formatted_journal,
  63499. + .reverse_alloc_formatted = NULL,
  63500. + .forward_alloc_unformatted = forward_alloc_unformatted_journal,
  63501. + .squeeze_alloc_unformatted = squeeze_alloc_unformatted_journal
  63502. + },
  63503. + [WA_TXMOD_ID] = {
  63504. + .h = {
  63505. + .type_id = REISER4_TXMOD_PLUGIN_TYPE,
  63506. + .id = WA_TXMOD_ID,
  63507. + .pops = NULL,
  63508. + .label = "wa",
  63509. + .desc = "Write-Anywhere Transaction Model",
  63510. + .linkage = {NULL, NULL}
  63511. + },
  63512. + .forward_alloc_formatted = forward_alloc_formatted_wa,
  63513. + .reverse_alloc_formatted = NULL,
  63514. + .forward_alloc_unformatted = forward_alloc_unformatted_wa,
  63515. + .squeeze_alloc_unformatted = squeeze_alloc_unformatted_wa
  63516. + }
  63517. +};
  63518. +
  63519. +/*
  63520. + * Local variables:
  63521. + * c-indentation-style: "K&R"
  63522. + * mode-name: "LC"
  63523. + * c-basic-offset: 8
  63524. + * tab-width: 8
  63525. + * fill-column: 79
  63526. + * End:
  63527. + */
  63528. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/pool.c linux-5.16.14/fs/reiser4/pool.c
  63529. --- linux-5.16.14.orig/fs/reiser4/pool.c 1970-01-01 01:00:00.000000000 +0100
  63530. +++ linux-5.16.14/fs/reiser4/pool.c 2022-03-12 13:26:19.686892811 +0100
  63531. @@ -0,0 +1,231 @@
  63532. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  63533. + * reiser4/README */
  63534. +
  63535. +/* Fast pool allocation.
  63536. +
  63537. + There are situations when some sub-system normally asks memory allocator
  63538. + for only few objects, but under some circumstances could require much
  63539. + more. Typical and actually motivating example is tree balancing. It needs
  63540. + to keep track of nodes that were involved into it, and it is well-known
  63541. + that in reasonable packed balanced tree most (92.938121%) percent of all
  63542. + balancings end up after working with only few nodes (3.141592 on
  63543. + average). But in rare cases balancing can involve much more nodes
  63544. + (3*tree_height+1 in extremal situation).
  63545. +
  63546. + On the one hand, we don't want to resort to dynamic allocation (slab,
  63547. + malloc(), etc.) to allocate data structures required to keep track of
  63548. + nodes during balancing. On the other hand, we cannot statically allocate
  63549. + required amount of space on the stack, because first: it is useless wastage
  63550. + of precious resource, and second: this amount is unknown in advance (tree
  63551. + height can change).
  63552. +
  63553. + Pools, implemented in this file are solution for this problem:
  63554. +
  63555. + - some configurable amount of objects is statically preallocated on the
  63556. + stack
  63557. +
  63558. + - if this preallocated pool is exhausted and more objects is requested
  63559. + they are allocated dynamically.
  63560. +
  63561. + Pools encapsulate distinction between statically and dynamically allocated
  63562. + objects. Both allocation and recycling look exactly the same.
  63563. +
  63564. + To keep track of dynamically allocated objects, pool adds its own linkage
  63565. + to each object.
  63566. +
  63567. + NOTE-NIKITA This linkage also contains some balancing-specific data. This
  63568. + is not perfect. On the other hand, balancing is currently the only client
  63569. + of pool code.
  63570. +
  63571. + NOTE-NIKITA Another desirable feature is to rewrite all pool manipulation
  63572. + functions in the style of tslist/tshash, i.e., make them unreadable, but
  63573. + type-safe.
  63574. +
  63575. +*/
  63576. +
  63577. +#include "debug.h"
  63578. +#include "pool.h"
  63579. +#include "super.h"
  63580. +
  63581. +#include <linux/types.h>
  63582. +#include <linux/err.h>
  63583. +
  63584. +/* initialize new pool object @h */
  63585. +static void reiser4_init_pool_obj(struct reiser4_pool_header *h)
  63586. +{
  63587. + INIT_LIST_HEAD(&h->usage_linkage);
  63588. + INIT_LIST_HEAD(&h->level_linkage);
  63589. + INIT_LIST_HEAD(&h->extra_linkage);
  63590. +}
  63591. +
  63592. +/* initialize new pool */
  63593. +void reiser4_init_pool(struct reiser4_pool *pool /* pool to initialize */ ,
  63594. + size_t obj_size /* size of objects in @pool */ ,
  63595. + int num_of_objs /* number of preallocated objects */ ,
  63596. + char *data/* area for preallocated objects */)
  63597. +{
  63598. + struct reiser4_pool_header *h;
  63599. + int i;
  63600. +
  63601. + assert("nikita-955", pool != NULL);
  63602. + assert("nikita-1044", obj_size > 0);
  63603. + assert("nikita-956", num_of_objs >= 0);
  63604. + assert("nikita-957", data != NULL);
  63605. +
  63606. + memset(pool, 0, sizeof *pool);
  63607. + pool->obj_size = obj_size;
  63608. + pool->data = data;
  63609. + INIT_LIST_HEAD(&pool->free);
  63610. + INIT_LIST_HEAD(&pool->used);
  63611. + INIT_LIST_HEAD(&pool->extra);
  63612. + memset(data, 0, obj_size * num_of_objs);
  63613. + for (i = 0; i < num_of_objs; ++i) {
  63614. + h = (struct reiser4_pool_header *) (data + i * obj_size);
  63615. + reiser4_init_pool_obj(h);
  63616. + /* add pool header to the end of pool's free list */
  63617. + list_add_tail(&h->usage_linkage, &pool->free);
  63618. + }
  63619. +}
  63620. +
  63621. +/* release pool resources
  63622. +
  63623. + Release all resources acquired by this pool, specifically, dynamically
  63624. + allocated objects.
  63625. +
  63626. +*/
  63627. +void reiser4_done_pool(struct reiser4_pool *pool UNUSED_ARG)
  63628. +{
  63629. +}
  63630. +
  63631. +/* allocate carry object from @pool
  63632. +
  63633. + First, try to get preallocated object. If this fails, resort to dynamic
  63634. + allocation.
  63635. +
  63636. +*/
  63637. +static void *reiser4_pool_alloc(struct reiser4_pool *pool)
  63638. +{
  63639. + struct reiser4_pool_header *result;
  63640. +
  63641. + assert("nikita-959", pool != NULL);
  63642. +
  63643. + if (!list_empty(&pool->free)) {
  63644. + struct list_head *linkage;
  63645. +
  63646. + linkage = pool->free.next;
  63647. + list_del(linkage);
  63648. + INIT_LIST_HEAD(linkage);
  63649. + result = list_entry(linkage, struct reiser4_pool_header,
  63650. + usage_linkage);
  63651. + BUG_ON(!list_empty(&result->level_linkage) ||
  63652. + !list_empty(&result->extra_linkage));
  63653. + } else {
  63654. + /* pool is empty. Extra allocations don't deserve dedicated
  63655. + slab to be served from, as they are expected to be rare. */
  63656. + result = kmalloc(pool->obj_size, reiser4_ctx_gfp_mask_get());
  63657. + if (result != 0) {
  63658. + reiser4_init_pool_obj(result);
  63659. + list_add(&result->extra_linkage, &pool->extra);
  63660. + } else
  63661. + return ERR_PTR(RETERR(-ENOMEM));
  63662. + BUG_ON(!list_empty(&result->usage_linkage) ||
  63663. + !list_empty(&result->level_linkage));
  63664. + }
  63665. + ++pool->objs;
  63666. + list_add(&result->usage_linkage, &pool->used);
  63667. + memset(result + 1, 0, pool->obj_size - sizeof *result);
  63668. + return result;
  63669. +}
  63670. +
  63671. +/* return object back to the pool */
  63672. +void reiser4_pool_free(struct reiser4_pool *pool,
  63673. + struct reiser4_pool_header *h)
  63674. +{
  63675. + assert("nikita-961", h != NULL);
  63676. + assert("nikita-962", pool != NULL);
  63677. +
  63678. + --pool->objs;
  63679. + assert("nikita-963", pool->objs >= 0);
  63680. +
  63681. + list_del_init(&h->usage_linkage);
  63682. + list_del_init(&h->level_linkage);
  63683. +
  63684. + if (list_empty(&h->extra_linkage))
  63685. + /*
  63686. + * pool header is not an extra one. Push it onto free list
  63687. + * using usage_linkage
  63688. + */
  63689. + list_add(&h->usage_linkage, &pool->free);
  63690. + else {
  63691. + /* remove pool header from pool's extra list and kfree it */
  63692. + list_del(&h->extra_linkage);
  63693. + kfree(h);
  63694. + }
  63695. +}
  63696. +
  63697. +/* add new object to the carry level list
  63698. +
  63699. + Carry level is FIFO most of the time, but not always. Complications arise
  63700. + when make_space() function tries to go to the left neighbor and thus adds
  63701. + carry node before existing nodes, and also, when updating delimiting keys
  63702. + after moving data between two nodes, we want left node to be locked before
  63703. + right node.
  63704. +
  63705. + Latter case is confusing at the first glance. Problem is that COP_UPDATE
  63706. + opration that updates delimiting keys is sometimes called with two nodes
  63707. + (when data are moved between two nodes) and sometimes with only one node
  63708. + (when leftmost item is deleted in a node). In any case operation is
  63709. + supplied with at least node whose left delimiting key is to be updated
  63710. + (that is "right" node).
  63711. +
  63712. + @pool - from which to allocate new object;
  63713. + @list - where to add object;
  63714. + @reference - after (or before) which existing object to add
  63715. +*/
  63716. +struct reiser4_pool_header *reiser4_add_obj(struct reiser4_pool *pool,
  63717. + struct list_head *list,
  63718. + pool_ordering order,
  63719. + struct reiser4_pool_header *reference)
  63720. +{
  63721. + struct reiser4_pool_header *result;
  63722. +
  63723. + assert("nikita-972", pool != NULL);
  63724. +
  63725. + result = reiser4_pool_alloc(pool);
  63726. + if (IS_ERR(result))
  63727. + return result;
  63728. +
  63729. + assert("nikita-973", result != NULL);
  63730. +
  63731. + switch (order) {
  63732. + case POOLO_BEFORE:
  63733. + __list_add(&result->level_linkage,
  63734. + reference->level_linkage.prev,
  63735. + &reference->level_linkage);
  63736. + break;
  63737. + case POOLO_AFTER:
  63738. + __list_add(&result->level_linkage,
  63739. + &reference->level_linkage,
  63740. + reference->level_linkage.next);
  63741. + break;
  63742. + case POOLO_LAST:
  63743. + list_add_tail(&result->level_linkage, list);
  63744. + break;
  63745. + case POOLO_FIRST:
  63746. + list_add(&result->level_linkage, list);
  63747. + break;
  63748. + default:
  63749. + wrong_return_value("nikita-927", "order");
  63750. + }
  63751. + return result;
  63752. +}
  63753. +
  63754. +/* Make Linus happy.
  63755. + Local variables:
  63756. + c-indentation-style: "K&R"
  63757. + mode-name: "LC"
  63758. + c-basic-offset: 8
  63759. + tab-width: 8
  63760. + fill-column: 120
  63761. + End:
  63762. +*/
  63763. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/pool.h linux-5.16.14/fs/reiser4/pool.h
  63764. --- linux-5.16.14.orig/fs/reiser4/pool.h 1970-01-01 01:00:00.000000000 +0100
  63765. +++ linux-5.16.14/fs/reiser4/pool.h 2022-03-12 13:26:19.686892811 +0100
  63766. @@ -0,0 +1,57 @@
  63767. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  63768. + * reiser4/README */
  63769. +
  63770. +/* Fast pool allocation */
  63771. +
  63772. +#ifndef __REISER4_POOL_H__
  63773. +#define __REISER4_POOL_H__
  63774. +
  63775. +#include <linux/types.h>
  63776. +
  63777. +struct reiser4_pool {
  63778. + size_t obj_size;
  63779. + int objs;
  63780. + char *data;
  63781. + struct list_head free;
  63782. + struct list_head used;
  63783. + struct list_head extra;
  63784. +};
  63785. +
  63786. +struct reiser4_pool_header {
  63787. + /* object is either on free or "used" lists */
  63788. + struct list_head usage_linkage;
  63789. + struct list_head level_linkage;
  63790. + struct list_head extra_linkage;
  63791. +};
  63792. +
  63793. +typedef enum {
  63794. + POOLO_BEFORE,
  63795. + POOLO_AFTER,
  63796. + POOLO_LAST,
  63797. + POOLO_FIRST
  63798. +} pool_ordering;
  63799. +
  63800. +/* pool manipulation functions */
  63801. +
  63802. +extern void reiser4_init_pool(struct reiser4_pool *pool, size_t obj_size,
  63803. + int num_of_objs, char *data);
  63804. +extern void reiser4_done_pool(struct reiser4_pool *pool);
  63805. +extern void reiser4_pool_free(struct reiser4_pool *pool,
  63806. + struct reiser4_pool_header *h);
  63807. +struct reiser4_pool_header *reiser4_add_obj(struct reiser4_pool *pool,
  63808. + struct list_head *list,
  63809. + pool_ordering order,
  63810. + struct reiser4_pool_header *reference);
  63811. +
  63812. +/* __REISER4_POOL_H__ */
  63813. +#endif
  63814. +
  63815. +/* Make Linus happy.
  63816. + Local variables:
  63817. + c-indentation-style: "K&R"
  63818. + mode-name: "LC"
  63819. + c-basic-offset: 8
  63820. + tab-width: 8
  63821. + fill-column: 120
  63822. + End:
  63823. +*/
  63824. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/readahead.c linux-5.16.14/fs/reiser4/readahead.c
  63825. --- linux-5.16.14.orig/fs/reiser4/readahead.c 1970-01-01 01:00:00.000000000 +0100
  63826. +++ linux-5.16.14/fs/reiser4/readahead.c 2022-03-12 13:26:19.686892811 +0100
  63827. @@ -0,0 +1,140 @@
  63828. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  63829. + * reiser4/README */
  63830. +
  63831. +#include "forward.h"
  63832. +#include "tree.h"
  63833. +#include "tree_walk.h"
  63834. +#include "super.h"
  63835. +#include "inode.h"
  63836. +#include "key.h"
  63837. +#include "znode.h"
  63838. +
  63839. +#include <linux/swap.h> /* for totalram_pages */
  63840. +
  63841. +void reiser4_init_ra_info(ra_info_t *rai)
  63842. +{
  63843. + rai->key_to_stop = *reiser4_min_key();
  63844. +}
  63845. +
  63846. +/* global formatted node readahead parameter. It can be set by mount option
  63847. + * -o readahead:NUM:1 */
  63848. +static inline int ra_adjacent_only(int flags)
  63849. +{
  63850. + return flags & RA_ADJACENT_ONLY;
  63851. +}
  63852. +
  63853. +/* this is used by formatted_readahead to decide whether read for right neighbor
  63854. + * of node is to be issued. It returns 1 if right neighbor's first key is less
  63855. + * or equal to readahead's stop key */
  63856. +static int should_readahead_neighbor(znode * node, ra_info_t *info)
  63857. +{
  63858. + int result;
  63859. +
  63860. + read_lock_dk(znode_get_tree(node));
  63861. + result = keyle(znode_get_rd_key(node), &info->key_to_stop);
  63862. + read_unlock_dk(znode_get_tree(node));
  63863. + return result;
  63864. +}
  63865. +
  63866. +#define LOW_MEM_PERCENTAGE (5)
  63867. +
  63868. +static int low_on_memory(void)
  63869. +{
  63870. + unsigned int freepages;
  63871. +
  63872. + freepages = nr_free_pages();
  63873. + return freepages < (totalram_pages() * LOW_MEM_PERCENTAGE / 100);
  63874. +}
  63875. +
  63876. +/* start read for @node and for a few of its right neighbors */
  63877. +void formatted_readahead(znode * node, ra_info_t *info)
  63878. +{
  63879. + struct formatted_ra_params *ra_params;
  63880. + znode *cur;
  63881. + int i;
  63882. + int grn_flags;
  63883. + lock_handle next_lh;
  63884. +
  63885. + /* do nothing if node block number has not been assigned to node (which
  63886. + * means it is still in cache). */
  63887. + if (reiser4_blocknr_is_fake(znode_get_block(node)))
  63888. + return;
  63889. +
  63890. + ra_params = get_current_super_ra_params();
  63891. +
  63892. + if (znode_page(node) == NULL)
  63893. + jstartio(ZJNODE(node));
  63894. +
  63895. + if (znode_get_level(node) != LEAF_LEVEL)
  63896. + return;
  63897. +
  63898. + /* don't waste memory for read-ahead when low on memory */
  63899. + if (low_on_memory())
  63900. + return;
  63901. +
  63902. + /* We can have locked nodes on upper tree levels, in this situation lock
  63903. + priorities do not help to resolve deadlocks, we have to use TRY_LOCK
  63904. + here. */
  63905. + grn_flags = (GN_CAN_USE_UPPER_LEVELS | GN_TRY_LOCK);
  63906. +
  63907. + i = 0;
  63908. + cur = zref(node);
  63909. + init_lh(&next_lh);
  63910. + while (i < ra_params->max) {
  63911. + const reiser4_block_nr * nextblk;
  63912. +
  63913. + if (!should_readahead_neighbor(cur, info))
  63914. + break;
  63915. +
  63916. + if (reiser4_get_right_neighbor
  63917. + (&next_lh, cur, ZNODE_READ_LOCK, grn_flags))
  63918. + break;
  63919. +
  63920. + nextblk = znode_get_block(next_lh.node);
  63921. + if (reiser4_blocknr_is_fake(nextblk) ||
  63922. + (ra_adjacent_only(ra_params->flags)
  63923. + && *nextblk != *znode_get_block(cur) + 1))
  63924. + break;
  63925. +
  63926. + zput(cur);
  63927. + cur = zref(next_lh.node);
  63928. + done_lh(&next_lh);
  63929. + if (znode_page(cur) == NULL)
  63930. + jstartio(ZJNODE(cur));
  63931. + else
  63932. + /* Do not scan read-ahead window if pages already
  63933. + * allocated (and i/o already started). */
  63934. + break;
  63935. +
  63936. + i++;
  63937. + }
  63938. + zput(cur);
  63939. + done_lh(&next_lh);
  63940. +}
  63941. +
  63942. +void reiser4_readdir_readahead_init(struct inode *dir, tap_t *tap)
  63943. +{
  63944. + reiser4_key *stop_key;
  63945. +
  63946. + assert("nikita-3542", dir != NULL);
  63947. + assert("nikita-3543", tap != NULL);
  63948. +
  63949. + stop_key = &tap->ra_info.key_to_stop;
  63950. + /* initialize readdir readahead information: include into readahead
  63951. + * stat data of all files of the directory */
  63952. + set_key_locality(stop_key, get_inode_oid(dir));
  63953. + set_key_type(stop_key, KEY_SD_MINOR);
  63954. + set_key_ordering(stop_key, get_key_ordering(reiser4_max_key()));
  63955. + set_key_objectid(stop_key, get_key_objectid(reiser4_max_key()));
  63956. + set_key_offset(stop_key, get_key_offset(reiser4_max_key()));
  63957. +}
  63958. +
  63959. +/*
  63960. + Local variables:
  63961. + c-indentation-style: "K&R"
  63962. + mode-name: "LC"
  63963. + c-basic-offset: 8
  63964. + tab-width: 8
  63965. + fill-column: 80
  63966. + End:
  63967. +*/
  63968. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/readahead.h linux-5.16.14/fs/reiser4/readahead.h
  63969. --- linux-5.16.14.orig/fs/reiser4/readahead.h 1970-01-01 01:00:00.000000000 +0100
  63970. +++ linux-5.16.14/fs/reiser4/readahead.h 2022-03-12 13:26:19.686892811 +0100
  63971. @@ -0,0 +1,42 @@
  63972. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  63973. + * reiser4/README */
  63974. +
  63975. +#ifndef __READAHEAD_H__
  63976. +#define __READAHEAD_H__
  63977. +
  63978. +#include "key.h"
  63979. +
  63980. +typedef enum {
  63981. + RA_ADJACENT_ONLY = 1, /* only requests nodes which are adjacent.
  63982. + Default is NO (not only adjacent) */
  63983. +} ra_global_flags;
  63984. +
  63985. +/* reiser4 super block has a field of this type.
  63986. + It controls readahead during tree traversals */
  63987. +struct formatted_ra_params {
  63988. + unsigned long max; /* request not more than this amount of nodes.
  63989. + Default is totalram_pages() / 4 */
  63990. + int flags;
  63991. +};
  63992. +
  63993. +typedef struct {
  63994. + reiser4_key key_to_stop;
  63995. +} ra_info_t;
  63996. +
  63997. +void formatted_readahead(znode * , ra_info_t *);
  63998. +void reiser4_init_ra_info(ra_info_t *rai);
  63999. +
  64000. +extern void reiser4_readdir_readahead_init(struct inode *dir, tap_t *tap);
  64001. +
  64002. +/* __READAHEAD_H__ */
  64003. +#endif
  64004. +
  64005. +/*
  64006. + Local variables:
  64007. + c-indentation-style: "K&R"
  64008. + mode-name: "LC"
  64009. + c-basic-offset: 8
  64010. + tab-width: 8
  64011. + fill-column: 120
  64012. + End:
  64013. +*/
  64014. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/README linux-5.16.14/fs/reiser4/README
  64015. --- linux-5.16.14.orig/fs/reiser4/README 1970-01-01 01:00:00.000000000 +0100
  64016. +++ linux-5.16.14/fs/reiser4/README 2022-03-12 13:26:19.638892695 +0100
  64017. @@ -0,0 +1,128 @@
  64018. +[LICENSING]
  64019. +
  64020. +Reiser4 is hereby licensed under the GNU General
  64021. +Public License version 2.
  64022. +
  64023. +Source code files that contain the phrase "licensing governed by
  64024. +reiser4/README" are "governed files" throughout this file. Governed
  64025. +files are licensed under the GPL. The portions of them owned by Hans
  64026. +Reiser, or authorized to be licensed by him, have been in the past,
  64027. +and likely will be in the future, licensed to other parties under
  64028. +other licenses. If you add your code to governed files, and don't
  64029. +want it to be owned by Hans Reiser, put your copyright label on that
  64030. +code so the poor blight and his customers can keep things straight.
  64031. +All portions of governed files not labeled otherwise are owned by Hans
  64032. +Reiser, and by adding your code to it, widely distributing it to
  64033. +others or sending us a patch, and leaving the sentence in stating that
  64034. +licensing is governed by the statement in this file, you accept this.
  64035. +It will be a kindness if you identify whether Hans Reiser is allowed
  64036. +to license code labeled as owned by you on your behalf other than
  64037. +under the GPL, because he wants to know if it is okay to do so and put
  64038. +a check in the mail to you (for non-trivial improvements) when he
  64039. +makes his next sale. He makes no guarantees as to the amount if any,
  64040. +though he feels motivated to motivate contributors, and you can surely
  64041. +discuss this with him before or after contributing. You have the
  64042. +right to decline to allow him to license your code contribution other
  64043. +than under the GPL.
  64044. +
  64045. +Further licensing options are available for commercial and/or other
  64046. +interests directly from Hans Reiser: reiser@namesys.com. If you interpret
  64047. +the GPL as not allowing those additional licensing options, you read
  64048. +it wrongly, and Richard Stallman agrees with me, when carefully read
  64049. +you can see that those restrictions on additional terms do not apply
  64050. +to the owner of the copyright, and my interpretation of this shall
  64051. +govern for this license.
  64052. +
  64053. +[END LICENSING]
  64054. +
  64055. +Reiser4 is a file system based on dancing tree algorithms, and is
  64056. +described at http://www.namesys.com
  64057. +
  64058. +mkfs.reiser4 and other utilities are on our webpage or wherever your
  64059. +Linux provider put them. You really want to be running the latest
  64060. +version off the website if you use fsck.
  64061. +
  64062. +Yes, if you update your reiser4 kernel module you do have to
  64063. +recompile your kernel, most of the time. The errors you get will be
  64064. +quite cryptic if your forget to do so.
  64065. +
  64066. +Hideous Commercial Pitch: Spread your development costs across other OS
  64067. +vendors. Select from the best in the world, not the best in your
  64068. +building, by buying from third party OS component suppliers. Leverage
  64069. +the software component development power of the internet. Be the most
  64070. +aggressive in taking advantage of the commercial possibilities of
  64071. +decentralized internet development, and add value through your branded
  64072. +integration that you sell as an operating system. Let your competitors
  64073. +be the ones to compete against the entire internet by themselves. Be
  64074. +hip, get with the new economic trend, before your competitors do. Send
  64075. +email to reiser@namesys.com
  64076. +
  64077. +Hans Reiser was the primary architect of Reiser4, but a whole team
  64078. +chipped their ideas in. He invested everything he had into Namesys
  64079. +for 5.5 dark years of no money before Reiser3 finally started to work well
  64080. +enough to bring in money. He owns the copyright.
  64081. +
  64082. +DARPA was the primary sponsor of Reiser4. DARPA does not endorse
  64083. +Reiser4, it merely sponsors it. DARPA is, in solely Hans's personal
  64084. +opinion, unique in its willingness to invest into things more
  64085. +theoretical than the VC community can readily understand, and more
  64086. +longterm than allows them to be sure that they will be the ones to
  64087. +extract the economic benefits from. DARPA also integrated us into a
  64088. +security community that transformed our security worldview.
  64089. +
  64090. +Vladimir Saveliev is our lead programmer, with us from the beginning,
  64091. +and he worked long hours writing the cleanest code. This is why he is
  64092. +now the lead programmer after years of commitment to our work. He
  64093. +always made the effort to be the best he could be, and to make his
  64094. +code the best that it could be. What resulted was quite remarkable. I
  64095. +don't think that money can ever motivate someone to work the way he
  64096. +did, he is one of the most selfless men I know.
  64097. +
  64098. +Alexander Lyamin was our sysadmin, and helped to educate us in
  64099. +security issues. Moscow State University and IMT were very generous
  64100. +in the internet access they provided us, and in lots of other little
  64101. +ways that a generous institution can be.
  64102. +
  64103. +Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the
  64104. +locking code, the block allocator, and finished the flushing code.
  64105. +His code is always crystal clean and well structured.
  64106. +
  64107. +Nikita Danilov wrote the core of the balancing code, the core of the
  64108. +plugins code, and the directory code. He worked a steady pace of long
  64109. +hours that produced a whole lot of well abstracted code. He is our
  64110. +senior computer scientist.
  64111. +
  64112. +Vladimir Demidov wrote the parser. Writing an in kernel parser is
  64113. +something very few persons have the skills for, and it is thanks to
  64114. +him that we can say that the parser is really not so big compared to
  64115. +various bits of our other code, and making a parser work in the kernel
  64116. +was not so complicated as everyone would imagine mainly because it was
  64117. +him doing it...
  64118. +
  64119. +Joshua McDonald wrote the transaction manager, and the flush code.
  64120. +The flush code unexpectedly turned out be extremely hairy for reasons
  64121. +you can read about on our web page, and he did a great job on an
  64122. +extremely difficult task.
  64123. +
  64124. +Nina Reiser handled our accounting, government relations, and much
  64125. +more.
  64126. +
  64127. +Ramon Reiser developed our website.
  64128. +
  64129. +Beverly Palmer drew our graphics.
  64130. +
  64131. +Vitaly Fertman developed librepair, userspace plugins repair code, fsck
  64132. +and worked with Umka on developing libreiser4 and userspace plugins.
  64133. +
  64134. +Yury Umanets (aka Umka) developed libreiser4, userspace plugins and
  64135. +userspace tools (reiser4progs).
  64136. +
  64137. +Oleg Drokin (aka Green) is the release manager who fixes everything.
  64138. +It is so nice to have someone like that on the team. He (plus Chris
  64139. +and Jeff) make it possible for the entire rest of the Namesys team to
  64140. +focus on Reiser4, and he fixed a whole lot of Reiser4 bugs also. It
  64141. +is just amazing to watch his talent for spotting bugs in action.
  64142. +
  64143. +Edward Shishkin wrote cryptcompress file plugin (which manages files
  64144. +built of encrypted and(or) compressed bodies) and other plugins related
  64145. +to transparent encryption and compression support.
  64146. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/reiser4.h linux-5.16.14/fs/reiser4/reiser4.h
  64147. --- linux-5.16.14.orig/fs/reiser4/reiser4.h 1970-01-01 01:00:00.000000000 +0100
  64148. +++ linux-5.16.14/fs/reiser4/reiser4.h 2022-03-12 13:26:19.686892811 +0100
  64149. @@ -0,0 +1,261 @@
  64150. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  64151. + * reiser4/README */
  64152. +
  64153. +
  64154. +/* definitions of common constants used by reiser4 */
  64155. +
  64156. +#if !defined( __REISER4_H__ )
  64157. +#define __REISER4_H__
  64158. +
  64159. +#include <asm/param.h> /* for HZ */
  64160. +#include <linux/errno.h>
  64161. +#include <linux/types.h>
  64162. +#include <linux/fs.h>
  64163. +#include <linux/hardirq.h>
  64164. +#include <linux/sched.h>
  64165. +
  64166. +/*
  64167. + * reiser4 compilation options.
  64168. + */
  64169. +
  64170. +#if defined(CONFIG_REISER4_DEBUG)
  64171. +/* turn on assertion checks */
  64172. +#define REISER4_DEBUG (1)
  64173. +#else
  64174. +#define REISER4_DEBUG (0)
  64175. +#endif
  64176. +
  64177. +#define REISER4_SHA256 (0)
  64178. +
  64179. +/*
  64180. + * Turn on large keys mode. In his mode (which is default), reiser4 key has 4
  64181. + * 8-byte components. In the old "small key" mode, it's 3 8-byte
  64182. + * components. Additional component, referred to as "ordering" is used to
  64183. + * order items from which given object is composed of. As such, ordering is
  64184. + * placed between locality and objectid. For directory item ordering contains
  64185. + * initial prefix of the file name this item is for. This sorts all directory
  64186. + * items within given directory lexicographically (but see
  64187. + * fibration.[ch]). For file body and stat-data, ordering contains initial
  64188. + * prefix of the name file was initially created with. In the common case
  64189. + * (files with single name) this allows to order file bodies and stat-datas in
  64190. + * the same order as their respective directory entries, thus speeding up
  64191. + * readdir.
  64192. + *
  64193. + * Note, that kernel can only mount file system with the same key size as one
  64194. + * it is compiled for, so flipping this option may render your data
  64195. + * inaccessible.
  64196. + */
  64197. +#define REISER4_LARGE_KEY (1)
  64198. +/*#define REISER4_LARGE_KEY (0)*/
  64199. +
  64200. +/*#define GUESS_EXISTS 1*/
  64201. +
  64202. +/*
  64203. + * PLEASE update fs/reiser4/kattr.c:show_options() when adding new compilation
  64204. + * option
  64205. + */
  64206. +
  64207. +#define REISER4_SUPER_MAGIC_STRING "ReIsEr4"
  64208. +extern const int REISER4_MAGIC_OFFSET; /* offset to magic string from the
  64209. + * beginning of device */
  64210. +
  64211. +/* here go tunable parameters that are not worth special entry in kernel
  64212. + configuration */
  64213. +
  64214. +/* default number of slots in coord-by-key caches */
  64215. +#define CBK_CACHE_SLOTS (16)
  64216. +/* how many elementary tree operation to carry on the next level */
  64217. +#define CARRIES_POOL_SIZE (5)
  64218. +/* size of pool of preallocated nodes for carry process. */
  64219. +#define NODES_LOCKED_POOL_SIZE (5)
  64220. +
  64221. +#define REISER4_NEW_NODE_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT)
  64222. +#define REISER4_NEW_EXTENT_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT)
  64223. +#define REISER4_PASTE_FLAGS (COPI_GO_LEFT)
  64224. +#define REISER4_INSERT_FLAGS (COPI_GO_LEFT)
  64225. +
  64226. +/* we are supporting reservation of disk space on uid basis */
  64227. +#define REISER4_SUPPORT_UID_SPACE_RESERVATION (0)
  64228. +/* we are supporting reservation of disk space for groups */
  64229. +#define REISER4_SUPPORT_GID_SPACE_RESERVATION (0)
  64230. +/* we are supporting reservation of disk space for root */
  64231. +#define REISER4_SUPPORT_ROOT_SPACE_RESERVATION (0)
  64232. +/* we use rapid flush mode, see flush.c for comments. */
  64233. +#define REISER4_USE_RAPID_FLUSH (1)
  64234. +
  64235. +/*
  64236. + * set this to 0 if you don't want to use wait-for-flush in ->writepage().
  64237. + */
  64238. +#define REISER4_USE_ENTD (1)
  64239. +
  64240. +/* key allocation is Plan-A */
  64241. +#define REISER4_PLANA_KEY_ALLOCATION (1)
  64242. +/* key allocation follows good old 3.x scheme */
  64243. +#define REISER4_3_5_KEY_ALLOCATION (0)
  64244. +
  64245. +/* size of hash-table for znodes */
  64246. +#define REISER4_ZNODE_HASH_TABLE_SIZE (1 << 13)
  64247. +
  64248. +/* number of buckets in lnode hash-table */
  64249. +#define LNODE_HTABLE_BUCKETS (1024)
  64250. +
  64251. +/* some ridiculously high maximal limit on height of znode tree. This
  64252. + is used in declaration of various per level arrays and
  64253. + to allocate stattistics gathering array for per-level stats. */
  64254. +#define REISER4_MAX_ZTREE_HEIGHT (8)
  64255. +
  64256. +#define REISER4_PANIC_MSG_BUFFER_SIZE (1024)
  64257. +
  64258. +/* If array contains less than REISER4_SEQ_SEARCH_BREAK elements then,
  64259. + sequential search is on average faster than binary. This is because
  64260. + of better optimization and because sequential search is more CPU
  64261. + cache friendly. This number (25) was found by experiments on dual AMD
  64262. + Athlon(tm), 1400MHz.
  64263. +
  64264. + NOTE: testing in kernel has shown that binary search is more effective than
  64265. + implied by results of the user level benchmarking. Probably because in the
  64266. + node keys are separated by other data. So value was adjusted after few
  64267. + tests. More thorough tuning is needed.
  64268. +*/
  64269. +#define REISER4_SEQ_SEARCH_BREAK (3)
  64270. +static_assert(REISER4_SEQ_SEARCH_BREAK > 2);
  64271. +
  64272. +/* don't allow tree to be lower than this */
  64273. +#define REISER4_MIN_TREE_HEIGHT (TWIG_LEVEL)
  64274. +
  64275. +/* NOTE NIKITA this is no longer used: maximal atom size is auto-adjusted to
  64276. + * available memory. */
  64277. +/* Default value of maximal atom size. Can be ovewritten by
  64278. + tmgr.atom_max_size mount option. By default infinity. */
  64279. +#define REISER4_ATOM_MAX_SIZE ((unsigned)(~0))
  64280. +
  64281. +/* Default value of maximal atom age (in jiffies). After reaching this age
  64282. + atom will be forced to commit, either synchronously or asynchronously. Can
  64283. + be overwritten by tmgr.atom_max_age mount option. */
  64284. +#define REISER4_ATOM_MAX_AGE (600 * HZ)
  64285. +
  64286. +/* sleeping period for ktxnmrgd */
  64287. +#define REISER4_TXNMGR_TIMEOUT (5 * HZ)
  64288. +
  64289. +/* timeout to wait for ent thread in writepage. Default: 3 milliseconds. */
  64290. +#define REISER4_ENTD_TIMEOUT (3 * HZ / 1000)
  64291. +
  64292. +/* start complaining after that many restarts in coord_by_key().
  64293. +
  64294. + This either means incredibly heavy contention for this part of a tree, or
  64295. + some corruption or bug.
  64296. +*/
  64297. +#define REISER4_CBK_ITERATIONS_LIMIT (100)
  64298. +
  64299. +/* return -EIO after that many iterations in coord_by_key().
  64300. +
  64301. + I have witnessed more than 800 iterations (in 30 thread test) before cbk
  64302. + finished. --nikita
  64303. +*/
  64304. +#define REISER4_MAX_CBK_ITERATIONS 500000
  64305. +
  64306. +/* put a per-inode limit on maximal number of directory entries with identical
  64307. + keys in hashed directory.
  64308. +
  64309. + Disable this until inheritance interfaces stabilize: we need some way to
  64310. + set per directory limit.
  64311. +*/
  64312. +#define REISER4_USE_COLLISION_LIMIT (0)
  64313. +
  64314. +/* If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty leaf-level
  64315. + blocks it will force them to be relocated. */
  64316. +#define FLUSH_RELOCATE_THRESHOLD 64
  64317. +/* If flush finds can find a block allocation closer than at most
  64318. + FLUSH_RELOCATE_DISTANCE from the preceder it will relocate to that position.
  64319. + */
  64320. +#define FLUSH_RELOCATE_DISTANCE 64
  64321. +
  64322. +/* If we have written this much or more blocks before encountering busy jnode
  64323. + in flush list - abort flushing hoping that next time we get called
  64324. + this jnode will be clean already, and we will save some seeks. */
  64325. +#define FLUSH_WRITTEN_THRESHOLD 50
  64326. +
  64327. +/* The maximum number of nodes to scan left on a level during flush. */
  64328. +#define FLUSH_SCAN_MAXNODES 10000
  64329. +
  64330. +/* per-atom limit of flushers */
  64331. +#define ATOM_MAX_FLUSHERS (1)
  64332. +
  64333. +/* default tracing buffer size */
  64334. +#define REISER4_TRACE_BUF_SIZE (1 << 15)
  64335. +
  64336. +/* what size units of IO we would like cp, etc., to use, in writing to
  64337. + reiser4. In bytes.
  64338. +
  64339. + Can be overwritten by optimal_io_size mount option.
  64340. +*/
  64341. +#define REISER4_OPTIMAL_IO_SIZE (64 * 1024)
  64342. +
  64343. +/* see comments in inode.c:oid_to_uino() */
  64344. +#define REISER4_UINO_SHIFT (1 << 30)
  64345. +
  64346. +/* Mark function argument as unused to avoid compiler warnings. */
  64347. +#define UNUSED_ARG __attribute__((unused))
  64348. +
  64349. +#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
  64350. +#define NONNULL __attribute__((nonnull))
  64351. +#else
  64352. +#define NONNULL
  64353. +#endif
  64354. +
  64355. +/* master super block offset in bytes.*/
  64356. +#define REISER4_MASTER_OFFSET 65536
  64357. +
  64358. +/* size of VFS block */
  64359. +#define VFS_BLKSIZE 512
  64360. +/* number of bits in size of VFS block (512==2^9) */
  64361. +#define VFS_BLKSIZE_BITS 9
  64362. +
  64363. +#define REISER4_I reiser4_inode_data
  64364. +
  64365. +/* implication */
  64366. +#define ergo(antecedent, consequent) (!(antecedent) || (consequent))
  64367. +/* logical equivalence */
  64368. +#define equi(p1, p2) (ergo((p1), (p2)) && ergo((p2), (p1)))
  64369. +
  64370. +#define sizeof_array(x) ((int) (sizeof(x) / sizeof(x[0])))
  64371. +
  64372. +#define NOT_YET (0)
  64373. +
  64374. +/** Reiser4 specific error codes **/
  64375. +
  64376. +#define REISER4_ERROR_CODE_BASE 10000
  64377. +
  64378. +/* Neighbor is not available (side neighbor or parent) */
  64379. +#define E_NO_NEIGHBOR (REISER4_ERROR_CODE_BASE)
  64380. +
  64381. +/* Node was not found in cache */
  64382. +#define E_NOT_IN_CACHE (REISER4_ERROR_CODE_BASE + 1)
  64383. +
  64384. +/* node has no free space enough for completion of balancing operation */
  64385. +#define E_NODE_FULL (REISER4_ERROR_CODE_BASE + 2)
  64386. +
  64387. +/* repeat operation */
  64388. +#define E_REPEAT (REISER4_ERROR_CODE_BASE + 3)
  64389. +
  64390. +/* deadlock happens */
  64391. +#define E_DEADLOCK (REISER4_ERROR_CODE_BASE + 4)
  64392. +
  64393. +/* operation cannot be performed, because it would block and non-blocking mode
  64394. + * was requested. */
  64395. +#define E_BLOCK (REISER4_ERROR_CODE_BASE + 5)
  64396. +
  64397. +/* wait some event (depends on context), then repeat */
  64398. +#define E_WAIT (REISER4_ERROR_CODE_BASE + 6)
  64399. +
  64400. +#endif /* __REISER4_H__ */
  64401. +
  64402. +/* Make Linus happy.
  64403. + Local variables:
  64404. + c-indentation-style: "K&R"
  64405. + mode-name: "LC"
  64406. + c-basic-offset: 8
  64407. + tab-width: 8
  64408. + fill-column: 120
  64409. + End:
  64410. +*/
  64411. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/safe_link.c linux-5.16.14/fs/reiser4/safe_link.c
  64412. --- linux-5.16.14.orig/fs/reiser4/safe_link.c 1970-01-01 01:00:00.000000000 +0100
  64413. +++ linux-5.16.14/fs/reiser4/safe_link.c 2022-03-12 13:26:19.686892811 +0100
  64414. @@ -0,0 +1,354 @@
  64415. +/* Copyright 2003, 2004 by Hans Reiser, licensing governed by
  64416. + * reiser4/README */
  64417. +
  64418. +/* Safe-links. */
  64419. +
  64420. +/*
  64421. + * Safe-links are used to maintain file system consistency during operations
  64422. + * that spawns multiple transactions. For example:
  64423. + *
  64424. + * 1. Unlink. UNIX supports "open-but-unlinked" files, that is files
  64425. + * without user-visible names in the file system, but still opened by some
  64426. + * active process. What happens here is that unlink proper (i.e., removal
  64427. + * of the last file name) and file deletion (truncate of file body to zero
  64428. + * and deletion of stat-data, that happens when last file descriptor is
  64429. + * closed), may belong to different transactions T1 and T2. If a crash
  64430. + * happens after T1 commit, but before T2 commit, on-disk file system has
  64431. + * a file without name, that is, disk space leak.
  64432. + *
  64433. + * 2. Truncate. Truncate of large file may spawn multiple transactions. If
  64434. + * system crashes while truncate was in-progress, file is left partially
  64435. + * truncated, which violates "atomicity guarantees" of reiser4, viz. that
  64436. + * every system is atomic.
  64437. + *
  64438. + * Safe-links address both above cases. Basically, safe-link is a way post
  64439. + * some operation to be executed during commit of some other transaction than
  64440. + * current one. (Another way to look at the safe-link is to interpret it as a
  64441. + * logical logging.)
  64442. + *
  64443. + * Specifically, at the beginning of unlink safe-link in inserted in the
  64444. + * tree. This safe-link is normally removed by file deletion code (during
  64445. + * transaction T2 in the above terms). Truncate also inserts safe-link that is
  64446. + * normally removed when truncate operation is finished.
  64447. + *
  64448. + * This means, that in the case of "clean umount" there are no safe-links in
  64449. + * the tree. If safe-links are observed during mount, it means that (a) system
  64450. + * was terminated abnormally, and (b) safe-link correspond to the "pending"
  64451. + * (i.e., not finished) operations that were in-progress during system
  64452. + * termination. Each safe-link record enough information to complete
  64453. + * corresponding operation, and mount simply "replays" them (hence, the
  64454. + * analogy with the logical logging).
  64455. + *
  64456. + * Safe-links are implemented as blackbox items (see
  64457. + * plugin/item/blackbox.[ch]).
  64458. + *
  64459. + * For the reference: ext3 also has similar mechanism, it's called "an orphan
  64460. + * list" there.
  64461. + */
  64462. +
  64463. +#include "safe_link.h"
  64464. +#include "debug.h"
  64465. +#include "inode.h"
  64466. +
  64467. +#include "plugin/item/blackbox.h"
  64468. +
  64469. +#include <linux/fs.h>
  64470. +
  64471. +/*
  64472. + * On-disk format of safe-link.
  64473. + */
  64474. +typedef struct safelink {
  64475. + reiser4_key sdkey; /* key of stat-data for the file safe-link is
  64476. + * for */
  64477. + d64 size; /* size to which file should be truncated */
  64478. +} safelink_t;
  64479. +
  64480. +/*
  64481. + * locality where safe-link items are stored. Next to the objectid of root
  64482. + * directory.
  64483. + */
  64484. +static oid_t safe_link_locality(reiser4_tree * tree)
  64485. +{
  64486. + return get_key_objectid(get_super_private(tree->super)->df_plug->
  64487. + root_dir_key(tree->super)) + 1;
  64488. +}
  64489. +
  64490. +/*
  64491. + Construct a key for the safe-link. Key has the following format:
  64492. +
  64493. +| 60 | 4 | 64 | 4 | 60 | 64 |
  64494. ++---------------+---+------------------+---+---------------+------------------+
  64495. +| locality | 0 | 0 | 0 | objectid | link type |
  64496. ++---------------+---+------------------+---+---------------+------------------+
  64497. +| | | | |
  64498. +| 8 bytes | 8 bytes | 8 bytes | 8 bytes |
  64499. +
  64500. + This is in large keys format. In small keys format second 8 byte chunk is
  64501. + out. Locality is a constant returned by safe_link_locality(). objectid is
  64502. + an oid of a file on which operation protected by this safe-link is
  64503. + performed. link-type is used to distinguish safe-links for different
  64504. + operations.
  64505. +
  64506. + */
  64507. +static reiser4_key *build_link_key(reiser4_tree * tree, oid_t oid,
  64508. + reiser4_safe_link_t link, reiser4_key * key)
  64509. +{
  64510. + reiser4_key_init(key);
  64511. + set_key_locality(key, safe_link_locality(tree));
  64512. + set_key_objectid(key, oid);
  64513. + set_key_offset(key, link);
  64514. + return key;
  64515. +}
  64516. +
  64517. +/*
  64518. + * how much disk space is necessary to insert and remove (in the
  64519. + * error-handling path) safe-link.
  64520. + */
  64521. +static __u64 safe_link_tograb(reiser4_tree * tree)
  64522. +{
  64523. + return
  64524. + /* insert safe link */
  64525. + estimate_one_insert_item(tree) +
  64526. + /* remove safe link */
  64527. + estimate_one_item_removal(tree) +
  64528. + /* drill to the leaf level during insertion */
  64529. + 1 + estimate_one_insert_item(tree) +
  64530. + /*
  64531. + * possible update of existing safe-link. Actually, if
  64532. + * safe-link existed already (we failed to remove it), then no
  64533. + * insertion is necessary, so this term is already "covered",
  64534. + * but for simplicity let's left it.
  64535. + */
  64536. + 1;
  64537. +}
  64538. +
  64539. +/*
  64540. + * grab enough disk space to insert and remove (in the error-handling path)
  64541. + * safe-link.
  64542. + */
  64543. +int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags)
  64544. +{
  64545. + int result;
  64546. +
  64547. + grab_space_enable();
  64548. + /* The sbinfo->delete_mutex can be taken here.
  64549. + * safe_link_release() should be called before leaving reiser4
  64550. + * context. */
  64551. + result =
  64552. + reiser4_grab_reserved(tree->super, safe_link_tograb(tree), flags);
  64553. + grab_space_enable();
  64554. + return result;
  64555. +}
  64556. +
  64557. +/*
  64558. + * release unused disk space reserved by safe_link_grab().
  64559. + */
  64560. +void safe_link_release(reiser4_tree * tree)
  64561. +{
  64562. + reiser4_release_reserved(tree->super);
  64563. +}
  64564. +
  64565. +/*
  64566. + * insert into tree safe-link for operation @link on inode @inode.
  64567. + */
  64568. +int safe_link_add(struct inode *inode, reiser4_safe_link_t link)
  64569. +{
  64570. + reiser4_key key;
  64571. + safelink_t sl;
  64572. + int length;
  64573. + int result;
  64574. + reiser4_tree *tree;
  64575. +
  64576. + build_sd_key(inode, &sl.sdkey);
  64577. + length = sizeof sl.sdkey;
  64578. +
  64579. + if (link == SAFE_TRUNCATE) {
  64580. + /*
  64581. + * for truncate we have to store final file length also,
  64582. + * expand item.
  64583. + */
  64584. + length += sizeof(sl.size);
  64585. + put_unaligned(cpu_to_le64(inode->i_size), &sl.size);
  64586. + }
  64587. + tree = reiser4_tree_by_inode(inode);
  64588. + build_link_key(tree, get_inode_oid(inode), link, &key);
  64589. +
  64590. + result = store_black_box(tree, &key, &sl, length);
  64591. + if (result == -EEXIST)
  64592. + result = update_black_box(tree, &key, &sl, length);
  64593. + return result;
  64594. +}
  64595. +
  64596. +/*
  64597. + * remove safe-link corresponding to the operation @link on inode @inode from
  64598. + * the tree.
  64599. + */
  64600. +int safe_link_del(reiser4_tree * tree, oid_t oid, reiser4_safe_link_t link)
  64601. +{
  64602. + reiser4_key key;
  64603. +
  64604. + return kill_black_box(tree, build_link_key(tree, oid, link, &key));
  64605. +}
  64606. +
  64607. +/*
  64608. + * in-memory structure to keep information extracted from safe-link. This is
  64609. + * used to iterate over all safe-links.
  64610. + */
  64611. +struct safe_link_context {
  64612. + reiser4_tree *tree; /* internal tree */
  64613. + reiser4_key key; /* safe-link key */
  64614. + reiser4_key sdkey; /* key of object stat-data */
  64615. + reiser4_safe_link_t link; /* safe-link type */
  64616. + oid_t oid; /* object oid */
  64617. + __u64 size; /* final size for truncate */
  64618. +};
  64619. +
  64620. +/*
  64621. + * start iterating over all safe-links.
  64622. + */
  64623. +static void safe_link_iter_begin(reiser4_tree * tree,
  64624. + struct safe_link_context *ctx)
  64625. +{
  64626. + ctx->tree = tree;
  64627. + reiser4_key_init(&ctx->key);
  64628. + set_key_locality(&ctx->key, safe_link_locality(tree));
  64629. + set_key_objectid(&ctx->key, get_key_objectid(reiser4_max_key()));
  64630. + set_key_offset(&ctx->key, get_key_offset(reiser4_max_key()));
  64631. +}
  64632. +
  64633. +/*
  64634. + * return next safe-link.
  64635. + */
  64636. +static int safe_link_iter_next(struct safe_link_context *ctx)
  64637. +{
  64638. + int result;
  64639. + safelink_t sl;
  64640. +
  64641. + result = load_black_box(ctx->tree, &ctx->key, &sl, sizeof sl, 0);
  64642. + if (result == 0) {
  64643. + ctx->oid = get_key_objectid(&ctx->key);
  64644. + ctx->link = get_key_offset(&ctx->key);
  64645. + ctx->sdkey = sl.sdkey;
  64646. + if (ctx->link == SAFE_TRUNCATE)
  64647. + ctx->size = le64_to_cpu(get_unaligned(&sl.size));
  64648. + }
  64649. + return result;
  64650. +}
  64651. +
  64652. +/*
  64653. + * check are there any more safe-links left in the tree.
  64654. + */
  64655. +static int safe_link_iter_finished(struct safe_link_context *ctx)
  64656. +{
  64657. + return get_key_locality(&ctx->key) != safe_link_locality(ctx->tree);
  64658. +}
  64659. +
  64660. +/*
  64661. + * finish safe-link iteration.
  64662. + */
  64663. +static void safe_link_iter_end(struct safe_link_context *ctx)
  64664. +{
  64665. + /* nothing special */
  64666. +}
  64667. +
  64668. +/*
  64669. + * process single safe-link.
  64670. + */
  64671. +static int process_safelink(struct super_block *super, reiser4_safe_link_t link,
  64672. + reiser4_key * sdkey, oid_t oid, __u64 size)
  64673. +{
  64674. + struct inode *inode;
  64675. + int result;
  64676. +
  64677. + /*
  64678. + * obtain object inode by reiser4_iget(), then call object plugin
  64679. + * ->safelink() method to do actual work, then delete safe-link on
  64680. + * success.
  64681. + */
  64682. + inode = reiser4_iget(super, sdkey, 1);
  64683. + if (!IS_ERR(inode)) {
  64684. + file_plugin *fplug;
  64685. +
  64686. + fplug = inode_file_plugin(inode);
  64687. + assert("nikita-3428", fplug != NULL);
  64688. + assert("", oid == get_inode_oid(inode));
  64689. + if (fplug->safelink != NULL) {
  64690. + /* reiser4_txn_restart_current is not necessary because
  64691. + * mounting is signle thread. However, without it
  64692. + * deadlock detection code will complain (see
  64693. + * nikita-3361). */
  64694. + reiser4_txn_restart_current();
  64695. + result = fplug->safelink(inode, link, size);
  64696. + } else {
  64697. + warning("nikita-3430",
  64698. + "Cannot handle safelink for %lli",
  64699. + (unsigned long long)oid);
  64700. + reiser4_print_key("key", sdkey);
  64701. + result = 0;
  64702. + }
  64703. + if (result != 0) {
  64704. + warning("nikita-3431",
  64705. + "Error processing safelink for %lli: %i",
  64706. + (unsigned long long)oid, result);
  64707. + }
  64708. + reiser4_iget_complete(inode);
  64709. + iput(inode);
  64710. + if (result == 0) {
  64711. + result = safe_link_grab(reiser4_get_tree(super),
  64712. + BA_CAN_COMMIT);
  64713. + if (result == 0)
  64714. + result =
  64715. + safe_link_del(reiser4_get_tree(super), oid,
  64716. + link);
  64717. + safe_link_release(reiser4_get_tree(super));
  64718. + /*
  64719. + * restart transaction: if there was large number of
  64720. + * safe-links, their processing may fail to fit into
  64721. + * single transaction.
  64722. + */
  64723. + if (result == 0)
  64724. + reiser4_txn_restart_current();
  64725. + }
  64726. + } else
  64727. + result = PTR_ERR(inode);
  64728. + return result;
  64729. +}
  64730. +
  64731. +/*
  64732. + * iterate over all safe-links in the file-system processing them one by one.
  64733. + */
  64734. +int process_safelinks(struct super_block *super)
  64735. +{
  64736. + struct safe_link_context ctx;
  64737. + int result;
  64738. +
  64739. + if (sb_rdonly(super))
  64740. + /* do nothing on the read-only file system */
  64741. + return 0;
  64742. + safe_link_iter_begin(&get_super_private(super)->tree, &ctx);
  64743. + result = 0;
  64744. + do {
  64745. + result = safe_link_iter_next(&ctx);
  64746. + if (safe_link_iter_finished(&ctx) || result == -ENOENT) {
  64747. + result = 0;
  64748. + break;
  64749. + }
  64750. + if (result == 0)
  64751. + result = process_safelink(super, ctx.link,
  64752. + &ctx.sdkey, ctx.oid,
  64753. + ctx.size);
  64754. + } while (result == 0);
  64755. + safe_link_iter_end(&ctx);
  64756. + return result;
  64757. +}
  64758. +
  64759. +/* Make Linus happy.
  64760. + Local variables:
  64761. + c-indentation-style: "K&R"
  64762. + mode-name: "LC"
  64763. + c-basic-offset: 8
  64764. + tab-width: 8
  64765. + fill-column: 120
  64766. + scroll-step: 1
  64767. + End:
  64768. +*/
  64769. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/safe_link.h linux-5.16.14/fs/reiser4/safe_link.h
  64770. --- linux-5.16.14.orig/fs/reiser4/safe_link.h 1970-01-01 01:00:00.000000000 +0100
  64771. +++ linux-5.16.14/fs/reiser4/safe_link.h 2022-03-12 13:26:19.686892811 +0100
  64772. @@ -0,0 +1,29 @@
  64773. +/* Copyright 2003 by Hans Reiser, licensing governed by
  64774. + * reiser4/README */
  64775. +
  64776. +/* Safe-links. See safe_link.c for details. */
  64777. +
  64778. +#if !defined(__FS_SAFE_LINK_H__)
  64779. +#define __FS_SAFE_LINK_H__
  64780. +
  64781. +#include "tree.h"
  64782. +
  64783. +int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags);
  64784. +void safe_link_release(reiser4_tree * tree);
  64785. +int safe_link_add(struct inode *inode, reiser4_safe_link_t link);
  64786. +int safe_link_del(reiser4_tree *, oid_t oid, reiser4_safe_link_t link);
  64787. +
  64788. +int process_safelinks(struct super_block *super);
  64789. +
  64790. +/* __FS_SAFE_LINK_H__ */
  64791. +#endif
  64792. +
  64793. +/* Make Linus happy.
  64794. + Local variables:
  64795. + c-indentation-style: "K&R"
  64796. + mode-name: "LC"
  64797. + c-basic-offset: 8
  64798. + tab-width: 8
  64799. + fill-column: 120
  64800. + End:
  64801. +*/
  64802. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/seal.c linux-5.16.14/fs/reiser4/seal.c
  64803. --- linux-5.16.14.orig/fs/reiser4/seal.c 1970-01-01 01:00:00.000000000 +0100
  64804. +++ linux-5.16.14/fs/reiser4/seal.c 2022-03-12 13:26:19.687892813 +0100
  64805. @@ -0,0 +1,219 @@
  64806. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  64807. +/* Seals implementation. */
  64808. +/* Seals are "weak" tree pointers. They are analogous to tree coords in
  64809. + allowing to bypass tree traversal. But normal usage of coords implies that
  64810. + node pointed to by coord is locked, whereas seals don't keep a lock (or
  64811. + even a reference) to znode. In stead, each znode contains a version number,
  64812. + increased on each znode modification. This version number is copied into a
  64813. + seal when seal is created. Later, one can "validate" seal by calling
  64814. + reiser4_seal_validate(). If znode is in cache and its version number is
  64815. + still the same, seal is "pristine" and coord associated with it can be
  64816. + re-used immediately.
  64817. +
  64818. + If, on the other hand, znode is out of cache, or it is obviously different
  64819. + one from the znode seal was initially attached to (for example, it is on
  64820. + the different level, or is being removed from the tree), seal is
  64821. + irreparably invalid ("burned") and tree traversal has to be repeated.
  64822. +
  64823. + Otherwise, there is some hope, that while znode was modified (and seal was
  64824. + "broken" as a result), key attached to the seal is still in the node. This
  64825. + is checked by first comparing this key with delimiting keys of node and, if
  64826. + key is ok, doing intra-node lookup.
  64827. +
  64828. + Znode version is maintained in the following way:
  64829. +
  64830. + there is reiser4_tree.znode_epoch counter. Whenever new znode is created,
  64831. + znode_epoch is incremented and its new value is stored in ->version field
  64832. + of new znode. Whenever znode is dirtied (which means it was probably
  64833. + modified), znode_epoch is also incremented and its new value is stored in
  64834. + znode->version. This is done so, because just incrementing znode->version
  64835. + on each update is not enough: it may so happen, that znode get deleted, new
  64836. + znode is allocated for the same disk block and gets the same version
  64837. + counter, tricking seal code into false positive.
  64838. +*/
  64839. +
  64840. +#include "forward.h"
  64841. +#include "debug.h"
  64842. +#include "key.h"
  64843. +#include "coord.h"
  64844. +#include "seal.h"
  64845. +#include "plugin/item/item.h"
  64846. +#include "plugin/node/node.h"
  64847. +#include "jnode.h"
  64848. +#include "znode.h"
  64849. +#include "super.h"
  64850. +
  64851. +static znode *seal_node(const seal_t *seal);
  64852. +static int seal_matches(const seal_t *seal, znode * node);
  64853. +
  64854. +/* initialise seal. This can be called several times on the same seal. @coord
  64855. + and @key can be NULL. */
  64856. +void reiser4_seal_init(seal_t *seal /* seal to initialise */ ,
  64857. + const coord_t *coord /* coord @seal will be
  64858. + * attached to */ ,
  64859. + const reiser4_key * key UNUSED_ARG /* key @seal will be
  64860. + * attached to */ )
  64861. +{
  64862. + assert("nikita-1886", seal != NULL);
  64863. + memset(seal, 0, sizeof *seal);
  64864. + if (coord != NULL) {
  64865. + znode *node;
  64866. +
  64867. + node = coord->node;
  64868. + assert("nikita-1987", node != NULL);
  64869. + spin_lock_znode(node);
  64870. + seal->version = node->version;
  64871. + assert("nikita-1988", seal->version != 0);
  64872. + seal->block = *znode_get_block(node);
  64873. +#if REISER4_DEBUG
  64874. + seal->coord1 = *coord;
  64875. + if (key != NULL)
  64876. + seal->key = *key;
  64877. +#endif
  64878. + spin_unlock_znode(node);
  64879. + }
  64880. +}
  64881. +
  64882. +/* finish with seal */
  64883. +void reiser4_seal_done(seal_t *seal/* seal to clear */)
  64884. +{
  64885. + assert("nikita-1887", seal != NULL);
  64886. + seal->version = 0;
  64887. +}
  64888. +
  64889. +/* true if seal was initialised */
  64890. +int reiser4_seal_is_set(const seal_t *seal/* seal to query */)
  64891. +{
  64892. + assert("nikita-1890", seal != NULL);
  64893. + return seal->version != 0;
  64894. +}
  64895. +
  64896. +#if REISER4_DEBUG
  64897. +/* helper function for reiser4_seal_validate(). It checks that item at @coord
  64898. + * has expected key. This is to detect cases where node was modified but wasn't
  64899. + * marked dirty. */
  64900. +static inline int check_seal_match(const coord_t *coord /* coord to check */ ,
  64901. + const reiser4_key *k__/* expected key */)
  64902. +{
  64903. + reiser4_key ukey;
  64904. +
  64905. + /* FIXME-VS: we only can compare keys for items whose units
  64906. + represent exactly one key */
  64907. + if (coord->between != AT_UNIT)
  64908. + return 1;
  64909. + if (!coord_is_existing_unit(coord))
  64910. + return 0;
  64911. + if (item_is_extent(coord))
  64912. + return 1;
  64913. + if (item_is_ctail(coord))
  64914. + return keyge(k__, unit_key_by_coord(coord, &ukey));
  64915. + return keyeq(k__, unit_key_by_coord(coord, &ukey));
  64916. +}
  64917. +#endif
  64918. +
  64919. +/* this is used by reiser4_seal_validate. It accepts return value of
  64920. + * longterm_lock_znode and returns 1 if it can be interpreted as seal
  64921. + * validation failure. For instance, when longterm_lock_znode returns -EINVAL,
  64922. + * reiser4_seal_validate returns -E_REPEAT and caller will call tre search.
  64923. + * We cannot do this in longterm_lock_znode(), because sometimes we want to
  64924. + * distinguish between -EINVAL and -E_REPEAT. */
  64925. +static int should_repeat(int return_code)
  64926. +{
  64927. + return return_code == -EINVAL;
  64928. +}
  64929. +
  64930. +/* (re-)validate seal.
  64931. +
  64932. + Checks whether seal is pristine, and try to revalidate it if possible.
  64933. +
  64934. + If seal was burned, or broken irreparably, return -E_REPEAT.
  64935. +
  64936. + NOTE-NIKITA currently reiser4_seal_validate() returns -E_REPEAT if key we are
  64937. + looking for is in range of keys covered by the sealed node, but item wasn't
  64938. + found by node ->lookup() method. Alternative is to return -ENOENT in this
  64939. + case, but this would complicate callers logic.
  64940. +
  64941. +*/
  64942. +int reiser4_seal_validate(seal_t *seal /* seal to validate */,
  64943. + coord_t *coord /* coord to validate against */,
  64944. + const reiser4_key * key /* key to validate against */,
  64945. + lock_handle * lh /* resulting lock handle */,
  64946. + znode_lock_mode mode /* lock node */,
  64947. + znode_lock_request request/* locking priority */)
  64948. +{
  64949. + znode *node;
  64950. + int result;
  64951. +
  64952. + assert("nikita-1889", seal != NULL);
  64953. + assert("nikita-1881", reiser4_seal_is_set(seal));
  64954. + assert("nikita-1882", key != NULL);
  64955. + assert("nikita-1883", coord != NULL);
  64956. + assert("nikita-1884", lh != NULL);
  64957. + assert("nikita-1885", keyeq(&seal->key, key));
  64958. + assert("nikita-1989", coords_equal(&seal->coord1, coord));
  64959. +
  64960. + /* obtain znode by block number */
  64961. + node = seal_node(seal);
  64962. + if (!node)
  64963. + /* znode wasn't in cache */
  64964. + return RETERR(-E_REPEAT);
  64965. + /* znode was in cache, lock it */
  64966. + result = longterm_lock_znode(lh, node, mode, request);
  64967. + zput(node);
  64968. + if (result == 0) {
  64969. + if (seal_matches(seal, node)) {
  64970. + /* if seal version and znode version
  64971. + coincide */
  64972. + ON_DEBUG(coord_update_v(coord));
  64973. + assert("nikita-1990",
  64974. + node == seal->coord1.node);
  64975. + assert("nikita-1898",
  64976. + WITH_DATA_RET(coord->node, 1,
  64977. + check_seal_match(coord,
  64978. + key)));
  64979. + } else
  64980. + result = RETERR(-E_REPEAT);
  64981. + }
  64982. + if (result != 0) {
  64983. + if (should_repeat(result))
  64984. + result = RETERR(-E_REPEAT);
  64985. + /* unlock node on failure */
  64986. + done_lh(lh);
  64987. + }
  64988. + return result;
  64989. +}
  64990. +
  64991. +/* helpers functions */
  64992. +
  64993. +/* obtain reference to znode seal points to, if in cache */
  64994. +static znode *seal_node(const seal_t *seal/* seal to query */)
  64995. +{
  64996. + assert("nikita-1891", seal != NULL);
  64997. + return zlook(current_tree, &seal->block);
  64998. +}
  64999. +
  65000. +/* true if @seal version and @node version coincide */
  65001. +static int seal_matches(const seal_t *seal /* seal to check */ ,
  65002. + znode * node/* node to check */)
  65003. +{
  65004. + int result;
  65005. +
  65006. + assert("nikita-1991", seal != NULL);
  65007. + assert("nikita-1993", node != NULL);
  65008. +
  65009. + spin_lock_znode(node);
  65010. + result = (seal->version == node->version);
  65011. + spin_unlock_znode(node);
  65012. + return result;
  65013. +}
  65014. +
  65015. +/* Make Linus happy.
  65016. + Local variables:
  65017. + c-indentation-style: "K&R"
  65018. + mode-name: "LC"
  65019. + c-basic-offset: 8
  65020. + tab-width: 8
  65021. + fill-column: 120
  65022. + scroll-step: 1
  65023. + End:
  65024. +*/
  65025. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/seal.h linux-5.16.14/fs/reiser4/seal.h
  65026. --- linux-5.16.14.orig/fs/reiser4/seal.h 1970-01-01 01:00:00.000000000 +0100
  65027. +++ linux-5.16.14/fs/reiser4/seal.h 2022-03-12 13:26:19.687892813 +0100
  65028. @@ -0,0 +1,49 @@
  65029. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  65030. +
  65031. +/* Declaration of seals: "weak" tree pointers. See seal.c for comments. */
  65032. +
  65033. +#ifndef __SEAL_H__
  65034. +#define __SEAL_H__
  65035. +
  65036. +#include "forward.h"
  65037. +#include "debug.h"
  65038. +#include "dformat.h"
  65039. +#include "key.h"
  65040. +#include "coord.h"
  65041. +
  65042. +/* for __u?? types */
  65043. +/*#include <linux/types.h>*/
  65044. +
  65045. +/* seal. See comment at the top of seal.c */
  65046. +typedef struct seal_s {
  65047. + /* version of znode recorder at the time of seal creation */
  65048. + __u64 version;
  65049. + /* block number of znode attached to this seal */
  65050. + reiser4_block_nr block;
  65051. +#if REISER4_DEBUG
  65052. + /* coord this seal is attached to. For debugging. */
  65053. + coord_t coord1;
  65054. + /* key this seal is attached to. For debugging. */
  65055. + reiser4_key key;
  65056. +#endif
  65057. +} seal_t;
  65058. +
  65059. +extern void reiser4_seal_init(seal_t *, const coord_t *, const reiser4_key *);
  65060. +extern void reiser4_seal_done(seal_t *);
  65061. +extern int reiser4_seal_is_set(const seal_t *);
  65062. +extern int reiser4_seal_validate(seal_t *, coord_t *,
  65063. + const reiser4_key *, lock_handle * ,
  65064. + znode_lock_mode mode, znode_lock_request request);
  65065. +
  65066. +/* __SEAL_H__ */
  65067. +#endif
  65068. +
  65069. +/* Make Linus happy.
  65070. + Local variables:
  65071. + c-indentation-style: "K&R"
  65072. + mode-name: "LC"
  65073. + c-basic-offset: 8
  65074. + tab-width: 8
  65075. + fill-column: 120
  65076. + End:
  65077. +*/
  65078. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/search.c linux-5.16.14/fs/reiser4/search.c
  65079. --- linux-5.16.14.orig/fs/reiser4/search.c 1970-01-01 01:00:00.000000000 +0100
  65080. +++ linux-5.16.14/fs/reiser4/search.c 2022-03-12 13:26:19.687892813 +0100
  65081. @@ -0,0 +1,1611 @@
  65082. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  65083. + * reiser4/README */
  65084. +
  65085. +#include "forward.h"
  65086. +#include "debug.h"
  65087. +#include "dformat.h"
  65088. +#include "key.h"
  65089. +#include "coord.h"
  65090. +#include "seal.h"
  65091. +#include "plugin/item/item.h"
  65092. +#include "plugin/node/node.h"
  65093. +#include "plugin/plugin.h"
  65094. +#include "jnode.h"
  65095. +#include "znode.h"
  65096. +#include "block_alloc.h"
  65097. +#include "tree_walk.h"
  65098. +#include "tree.h"
  65099. +#include "reiser4.h"
  65100. +#include "super.h"
  65101. +#include "inode.h"
  65102. +
  65103. +#include <linux/slab.h>
  65104. +
  65105. +static const char *bias_name(lookup_bias bias);
  65106. +
  65107. +/* tree searching algorithm, intranode searching algorithms are in
  65108. + plugin/node/ */
  65109. +
  65110. +/* tree lookup cache
  65111. + *
  65112. + * The coord by key cache consists of small list of recently accessed nodes
  65113. + * maintained according to the LRU discipline. Before doing real top-to-down
  65114. + * tree traversal this cache is scanned for nodes that can contain key
  65115. + * requested.
  65116. + *
  65117. + * The efficiency of coord cache depends heavily on locality of reference for
  65118. + * tree accesses. Our user level simulations show reasonably good hit ratios
  65119. + * for coord cache under most loads so far.
  65120. + */
  65121. +
  65122. +/* Initialise coord cache slot */
  65123. +static void cbk_cache_init_slot(cbk_cache_slot *slot)
  65124. +{
  65125. + assert("nikita-345", slot != NULL);
  65126. +
  65127. + INIT_LIST_HEAD(&slot->lru);
  65128. + slot->node = NULL;
  65129. +}
  65130. +
  65131. +/* Initialize coord cache */
  65132. +int cbk_cache_init(cbk_cache * cache/* cache to init */)
  65133. +{
  65134. + int i;
  65135. +
  65136. + assert("nikita-346", cache != NULL);
  65137. +
  65138. + cache->slot =
  65139. + kmalloc(sizeof(cbk_cache_slot) * cache->nr_slots,
  65140. + reiser4_ctx_gfp_mask_get());
  65141. + if (cache->slot == NULL)
  65142. + return RETERR(-ENOMEM);
  65143. +
  65144. + INIT_LIST_HEAD(&cache->lru);
  65145. + for (i = 0; i < cache->nr_slots; ++i) {
  65146. + cbk_cache_init_slot(cache->slot + i);
  65147. + list_add_tail(&((cache->slot + i)->lru), &cache->lru);
  65148. + }
  65149. + rwlock_init(&cache->guard);
  65150. + return 0;
  65151. +}
  65152. +
  65153. +/* free cbk cache data */
  65154. +void cbk_cache_done(cbk_cache * cache/* cache to release */)
  65155. +{
  65156. + assert("nikita-2493", cache != NULL);
  65157. + if (cache->slot != NULL) {
  65158. + kfree(cache->slot);
  65159. + cache->slot = NULL;
  65160. + }
  65161. +}
  65162. +
  65163. +/* macro to iterate over all cbk cache slots */
  65164. +#define for_all_slots(cache, slot) \
  65165. + for ((slot) = list_entry((cache)->lru.next, cbk_cache_slot, lru); \
  65166. + &(cache)->lru != &(slot)->lru; \
  65167. + (slot) = list_entry(slot->lru.next, cbk_cache_slot, lru))
  65168. +
  65169. +#if REISER4_DEBUG
  65170. +/* this function assures that [cbk-cache-invariant] invariant holds */
  65171. +static int cbk_cache_invariant(const cbk_cache * cache)
  65172. +{
  65173. + cbk_cache_slot *slot;
  65174. + int result;
  65175. + int unused;
  65176. +
  65177. + if (cache->nr_slots == 0)
  65178. + return 1;
  65179. +
  65180. + assert("nikita-2469", cache != NULL);
  65181. + unused = 0;
  65182. + result = 1;
  65183. + read_lock(&((cbk_cache *)cache)->guard);
  65184. + for_all_slots(cache, slot) {
  65185. + /* in LRU first go all `used' slots followed by `unused' */
  65186. + if (unused && (slot->node != NULL))
  65187. + result = 0;
  65188. + if (slot->node == NULL)
  65189. + unused = 1;
  65190. + else {
  65191. + cbk_cache_slot *scan;
  65192. +
  65193. + /* all cached nodes are different */
  65194. + scan = slot;
  65195. + while (result) {
  65196. + scan = list_entry(scan->lru.next,
  65197. + cbk_cache_slot, lru);
  65198. + if (&cache->lru == &scan->lru)
  65199. + break;
  65200. + if (slot->node == scan->node)
  65201. + result = 0;
  65202. + }
  65203. + }
  65204. + if (!result)
  65205. + break;
  65206. + }
  65207. + read_unlock(&((cbk_cache *)cache)->guard);
  65208. + return result;
  65209. +}
  65210. +
  65211. +#endif
  65212. +
  65213. +/* Remove references, if any, to @node from coord cache */
  65214. +void cbk_cache_invalidate(const znode * node /* node to remove from cache */ ,
  65215. + reiser4_tree * tree/* tree to remove node from */)
  65216. +{
  65217. + cbk_cache_slot *slot;
  65218. + cbk_cache *cache;
  65219. + int i;
  65220. +
  65221. + assert("nikita-350", node != NULL);
  65222. + assert("nikita-1479", LOCK_CNT_GTZ(rw_locked_tree));
  65223. +
  65224. + cache = &tree->cbk_cache;
  65225. + assert("nikita-2470", cbk_cache_invariant(cache));
  65226. +
  65227. + write_lock(&(cache->guard));
  65228. + for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
  65229. + if (slot->node == node) {
  65230. + list_move_tail(&slot->lru, &cache->lru);
  65231. + slot->node = NULL;
  65232. + break;
  65233. + }
  65234. + }
  65235. + write_unlock(&(cache->guard));
  65236. + assert("nikita-2471", cbk_cache_invariant(cache));
  65237. +}
  65238. +
  65239. +/* add to the cbk-cache in the "tree" information about "node". This
  65240. + can actually be update of existing slot in a cache. */
  65241. +static void cbk_cache_add(const znode * node/* node to add to the cache */)
  65242. +{
  65243. + cbk_cache *cache;
  65244. +
  65245. + cbk_cache_slot *slot;
  65246. + int i;
  65247. +
  65248. + assert("nikita-352", node != NULL);
  65249. +
  65250. + cache = &znode_get_tree(node)->cbk_cache;
  65251. + assert("nikita-2472", cbk_cache_invariant(cache));
  65252. +
  65253. + if (cache->nr_slots == 0)
  65254. + return;
  65255. +
  65256. + write_lock(&(cache->guard));
  65257. + /* find slot to update/add */
  65258. + for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
  65259. + /* oops, this node is already in a cache */
  65260. + if (slot->node == node)
  65261. + break;
  65262. + }
  65263. + /* if all slots are used, reuse least recently used one */
  65264. + if (i == cache->nr_slots) {
  65265. + slot = list_entry(cache->lru.prev, cbk_cache_slot, lru);
  65266. + slot->node = (znode *) node;
  65267. + }
  65268. + list_move(&slot->lru, &cache->lru);
  65269. + write_unlock(&(cache->guard));
  65270. + assert("nikita-2473", cbk_cache_invariant(cache));
  65271. +}
  65272. +
  65273. +static int setup_delimiting_keys(cbk_handle * h);
  65274. +static lookup_result coord_by_handle(cbk_handle * handle);
  65275. +static lookup_result traverse_tree(cbk_handle * h);
  65276. +static int cbk_cache_search(cbk_handle * h);
  65277. +
  65278. +static level_lookup_result cbk_level_lookup(cbk_handle * h);
  65279. +static level_lookup_result cbk_node_lookup(cbk_handle * h);
  65280. +
  65281. +/* helper functions */
  65282. +
  65283. +static void update_stale_dk(reiser4_tree * tree, znode * node);
  65284. +
  65285. +/* release parent node during traversal */
  65286. +static void put_parent(cbk_handle * h);
  65287. +/* check consistency of fields */
  65288. +static int sanity_check(cbk_handle * h);
  65289. +/* release resources in handle */
  65290. +static void hput(cbk_handle * h);
  65291. +
  65292. +static level_lookup_result search_to_left(cbk_handle * h);
  65293. +
  65294. +/* pack numerous (numberous I should say) arguments of coord_by_key() into
  65295. + * cbk_handle */
  65296. +static cbk_handle *cbk_pack(cbk_handle * handle,
  65297. + reiser4_tree * tree,
  65298. + const reiser4_key * key,
  65299. + coord_t *coord,
  65300. + lock_handle * active_lh,
  65301. + lock_handle * parent_lh,
  65302. + znode_lock_mode lock_mode,
  65303. + lookup_bias bias,
  65304. + tree_level lock_level,
  65305. + tree_level stop_level,
  65306. + __u32 flags, ra_info_t *info)
  65307. +{
  65308. + memset(handle, 0, sizeof *handle);
  65309. +
  65310. + handle->tree = tree;
  65311. + handle->key = key;
  65312. + handle->lock_mode = lock_mode;
  65313. + handle->bias = bias;
  65314. + handle->lock_level = lock_level;
  65315. + handle->stop_level = stop_level;
  65316. + handle->coord = coord;
  65317. + /* set flags. See comment in tree.h:cbk_flags */
  65318. + handle->flags = flags | CBK_TRUST_DK | CBK_USE_CRABLOCK;
  65319. +
  65320. + handle->active_lh = active_lh;
  65321. + handle->parent_lh = parent_lh;
  65322. + handle->ra_info = info;
  65323. + return handle;
  65324. +}
  65325. +
  65326. +/* main tree lookup procedure
  65327. +
  65328. + Check coord cache. If key we are looking for is not found there, call cbk()
  65329. + to do real tree traversal.
  65330. +
  65331. + As we have extents on the twig level, @lock_level and @stop_level can
  65332. + be different from LEAF_LEVEL and each other.
  65333. +
  65334. + Thread cannot keep any reiser4 locks (tree, znode, dk spin-locks, or znode
  65335. + long term locks) while calling this.
  65336. +*/
  65337. +lookup_result coord_by_key(reiser4_tree * tree /* tree to perform search
  65338. + * in. Usually this tree is
  65339. + * part of file-system
  65340. + * super-block */ ,
  65341. + const reiser4_key * key /* key to look for */ ,
  65342. + coord_t *coord /* where to store found
  65343. + * position in a tree. Fields
  65344. + * in "coord" are only valid if
  65345. + * coord_by_key() returned
  65346. + * "CBK_COORD_FOUND" */ ,
  65347. + lock_handle * lh, /* resulting lock handle */
  65348. + znode_lock_mode lock_mode /* type of lookup we
  65349. + * want on node. Pass
  65350. + * ZNODE_READ_LOCK here
  65351. + * if you only want to
  65352. + * read item found and
  65353. + * ZNODE_WRITE_LOCK if
  65354. + * you want to modify
  65355. + * it */ ,
  65356. + lookup_bias bias /* what to return if coord
  65357. + * with exactly the @key is
  65358. + * not in the tree */ ,
  65359. + tree_level lock_level/* tree level where to start
  65360. + * taking @lock type of
  65361. + * locks */ ,
  65362. + tree_level stop_level/* tree level to stop. Pass
  65363. + * LEAF_LEVEL or TWIG_LEVEL
  65364. + * here Item being looked
  65365. + * for has to be between
  65366. + * @lock_level and
  65367. + * @stop_level, inclusive */ ,
  65368. + __u32 flags /* search flags */ ,
  65369. + ra_info_t *
  65370. + info
  65371. + /* information about desired tree traversal
  65372. + * readahead */
  65373. + )
  65374. +{
  65375. + cbk_handle handle;
  65376. + lock_handle parent_lh;
  65377. + lookup_result result;
  65378. +
  65379. + init_lh(lh);
  65380. + init_lh(&parent_lh);
  65381. +
  65382. + assert("nikita-3023", reiser4_schedulable());
  65383. +
  65384. + assert("nikita-353", tree != NULL);
  65385. + assert("nikita-354", key != NULL);
  65386. + assert("nikita-355", coord != NULL);
  65387. + assert("nikita-356", (bias == FIND_EXACT)
  65388. + || (bias == FIND_MAX_NOT_MORE_THAN));
  65389. + assert("nikita-357", stop_level >= LEAF_LEVEL);
  65390. + /* no locks can be held during tree traversal */
  65391. + assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
  65392. +
  65393. + cbk_pack(&handle,
  65394. + tree,
  65395. + key,
  65396. + coord,
  65397. + lh,
  65398. + &parent_lh,
  65399. + lock_mode, bias, lock_level, stop_level, flags, info);
  65400. +
  65401. + result = coord_by_handle(&handle);
  65402. + assert("nikita-3247",
  65403. + ergo(!IS_CBKERR(result), coord->node == lh->node));
  65404. + return result;
  65405. +}
  65406. +
  65407. +/* like coord_by_key(), but starts traversal from vroot of @object rather than
  65408. + * from tree root. */
  65409. +lookup_result reiser4_object_lookup(struct inode *object,
  65410. + const reiser4_key * key,
  65411. + coord_t *coord,
  65412. + lock_handle * lh,
  65413. + znode_lock_mode lock_mode,
  65414. + lookup_bias bias,
  65415. + tree_level lock_level,
  65416. + tree_level stop_level, __u32 flags,
  65417. + ra_info_t *info)
  65418. +{
  65419. + cbk_handle handle;
  65420. + lock_handle parent_lh;
  65421. + lookup_result result;
  65422. +
  65423. + init_lh(lh);
  65424. + init_lh(&parent_lh);
  65425. +
  65426. + assert("nikita-3023", reiser4_schedulable());
  65427. +
  65428. + assert("nikita-354", key != NULL);
  65429. + assert("nikita-355", coord != NULL);
  65430. + assert("nikita-356", (bias == FIND_EXACT)
  65431. + || (bias == FIND_MAX_NOT_MORE_THAN));
  65432. + assert("nikita-357", stop_level >= LEAF_LEVEL);
  65433. + /* no locks can be held during tree search by key */
  65434. + assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
  65435. +
  65436. + cbk_pack(&handle,
  65437. + object != NULL ? reiser4_tree_by_inode(object) : current_tree,
  65438. + key,
  65439. + coord,
  65440. + lh,
  65441. + &parent_lh,
  65442. + lock_mode, bias, lock_level, stop_level, flags, info);
  65443. + handle.object = object;
  65444. +
  65445. + result = coord_by_handle(&handle);
  65446. + assert("nikita-3247",
  65447. + ergo(!IS_CBKERR(result), coord->node == lh->node));
  65448. + return result;
  65449. +}
  65450. +
  65451. +/* lookup by cbk_handle. Common part of coord_by_key() and
  65452. + reiser4_object_lookup(). */
  65453. +static lookup_result coord_by_handle(cbk_handle * handle)
  65454. +{
  65455. + /*
  65456. + * first check cbk_cache (which is look-aside cache for our tree) and
  65457. + * of this fails, start traversal.
  65458. + */
  65459. + /* first check whether "key" is in cache of recent lookups. */
  65460. + if (cbk_cache_search(handle) == 0)
  65461. + return handle->result;
  65462. + else
  65463. + return traverse_tree(handle);
  65464. +}
  65465. +
  65466. +/* Execute actor for each item (or unit, depending on @through_units_p),
  65467. + starting from @coord, right-ward, until either:
  65468. +
  65469. + - end of the tree is reached
  65470. + - unformatted node is met
  65471. + - error occurred
  65472. + - @actor returns 0 or less
  65473. +
  65474. + Error code, or last actor return value is returned.
  65475. +
  65476. + This is used by plugin/dir/hashe_dir.c:reiser4_find_entry() to move through
  65477. + sequence of entries with identical keys and alikes.
  65478. +*/
  65479. +int reiser4_iterate_tree(reiser4_tree * tree /* tree to scan */ ,
  65480. + coord_t *coord /* coord to start from */ ,
  65481. + lock_handle * lh /* lock handle to start with and to
  65482. + * update along the way */ ,
  65483. + tree_iterate_actor_t actor /* function to call on each
  65484. + * item/unit */ ,
  65485. + void *arg /* argument to pass to @actor */ ,
  65486. + znode_lock_mode mode /* lock mode on scanned nodes */ ,
  65487. + int through_units_p /* call @actor on each item or on
  65488. + * each unit */ )
  65489. +{
  65490. + int result;
  65491. +
  65492. + assert("nikita-1143", tree != NULL);
  65493. + assert("nikita-1145", coord != NULL);
  65494. + assert("nikita-1146", lh != NULL);
  65495. + assert("nikita-1147", actor != NULL);
  65496. +
  65497. + result = zload(coord->node);
  65498. + coord_clear_iplug(coord);
  65499. + if (result != 0)
  65500. + return result;
  65501. + if (!coord_is_existing_unit(coord)) {
  65502. + zrelse(coord->node);
  65503. + return -ENOENT;
  65504. + }
  65505. + while ((result = actor(tree, coord, lh, arg)) > 0) {
  65506. + /* move further */
  65507. + if ((through_units_p && coord_next_unit(coord)) ||
  65508. + (!through_units_p && coord_next_item(coord))) {
  65509. + do {
  65510. + lock_handle couple;
  65511. +
  65512. + /* move to the next node */
  65513. + init_lh(&couple);
  65514. + result =
  65515. + reiser4_get_right_neighbor(&couple,
  65516. + coord->node,
  65517. + (int)mode,
  65518. + GN_CAN_USE_UPPER_LEVELS);
  65519. + zrelse(coord->node);
  65520. + if (result == 0) {
  65521. +
  65522. + result = zload(couple.node);
  65523. + if (result != 0) {
  65524. + done_lh(&couple);
  65525. + return result;
  65526. + }
  65527. +
  65528. + coord_init_first_unit(coord,
  65529. + couple.node);
  65530. + done_lh(lh);
  65531. + move_lh(lh, &couple);
  65532. + } else
  65533. + return result;
  65534. + } while (node_is_empty(coord->node));
  65535. + }
  65536. +
  65537. + assert("nikita-1149", coord_is_existing_unit(coord));
  65538. + }
  65539. + zrelse(coord->node);
  65540. + return result;
  65541. +}
  65542. +
  65543. +/* return locked uber znode for @tree */
  65544. +int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
  65545. + znode_lock_request pri, lock_handle * lh)
  65546. +{
  65547. + int result;
  65548. +
  65549. + result = longterm_lock_znode(lh, tree->uber, mode, pri);
  65550. + return result;
  65551. +}
  65552. +
  65553. +/* true if @key is strictly within @node
  65554. +
  65555. + we are looking for possibly non-unique key and it is item is at the edge of
  65556. + @node. May be it is in the neighbor.
  65557. +*/
  65558. +static int znode_contains_key_strict(znode * node /* node to check key
  65559. + * against */ ,
  65560. + const reiser4_key *
  65561. + key /* key to check */ ,
  65562. + int isunique)
  65563. +{
  65564. + int answer;
  65565. +
  65566. + assert("nikita-1760", node != NULL);
  65567. + assert("nikita-1722", key != NULL);
  65568. +
  65569. + if (keyge(key, &node->rd_key))
  65570. + return 0;
  65571. +
  65572. + answer = keycmp(&node->ld_key, key);
  65573. +
  65574. + if (isunique)
  65575. + return answer != GREATER_THAN;
  65576. + else
  65577. + return answer == LESS_THAN;
  65578. +}
  65579. +
  65580. +/*
  65581. + * Virtual Root (vroot) code.
  65582. + *
  65583. + * For given file system object (e.g., regular file or directory) let's
  65584. + * define its "virtual root" as lowest in the tree (that is, furtherest
  65585. + * from the tree root) node such that all body items of said object are
  65586. + * located in a tree rooted at this node.
  65587. + *
  65588. + * Once vroot of object is found all tree lookups for items within body of
  65589. + * this object ("object lookups") can be started from its vroot rather
  65590. + * than from real root. This has following advantages:
  65591. + *
  65592. + * 1. amount of nodes traversed during lookup (and, hence, amount of
  65593. + * key comparisons made) decreases, and
  65594. + *
  65595. + * 2. contention on tree root is decreased. This latter was actually
  65596. + * motivating reason behind vroot, because spin lock of root node,
  65597. + * which is taken when acquiring long-term lock on root node is the
  65598. + * hottest lock in the reiser4.
  65599. + *
  65600. + * How to find vroot.
  65601. + *
  65602. + * When vroot of object F is not yet determined, all object lookups start
  65603. + * from the root of the tree. At each tree level during traversal we have
  65604. + * a node N such that a key we are looking for (which is the key inside
  65605. + * object's body) is located within N. In function handle_vroot() called
  65606. + * from cbk_level_lookup() we check whether N is possible vroot for
  65607. + * F. Check is trivial---if neither leftmost nor rightmost item of N
  65608. + * belongs to F (and we already have helpful ->owns_item() method of
  65609. + * object plugin for this), then N is possible vroot of F. This, of
  65610. + * course, relies on the assumption that each object occupies contiguous
  65611. + * range of keys in the tree.
  65612. + *
  65613. + * Thus, traversing tree downward and checking each node as we go, we can
  65614. + * find lowest such node, which, by definition, is vroot.
  65615. + *
  65616. + * How to track vroot.
  65617. + *
  65618. + * Nohow. If actual vroot changes, next object lookup will just restart
  65619. + * from the actual tree root, refreshing object's vroot along the way.
  65620. + *
  65621. + */
  65622. +
  65623. +/*
  65624. + * Check whether @node is possible vroot of @object.
  65625. + */
  65626. +static void handle_vroot(struct inode *object, znode * node)
  65627. +{
  65628. + file_plugin *fplug;
  65629. + coord_t coord;
  65630. +
  65631. + fplug = inode_file_plugin(object);
  65632. + assert("nikita-3353", fplug != NULL);
  65633. + assert("nikita-3354", fplug->owns_item != NULL);
  65634. +
  65635. + if (unlikely(node_is_empty(node)))
  65636. + return;
  65637. +
  65638. + coord_init_first_unit(&coord, node);
  65639. + /*
  65640. + * if leftmost item of @node belongs to @object, we cannot be sure
  65641. + * that @node is vroot of @object, because, some items of @object are
  65642. + * probably in the sub-tree rooted at the left neighbor of @node.
  65643. + */
  65644. + if (fplug->owns_item(object, &coord))
  65645. + return;
  65646. + coord_init_last_unit(&coord, node);
  65647. + /* mutatis mutandis for the rightmost item */
  65648. + if (fplug->owns_item(object, &coord))
  65649. + return;
  65650. + /* otherwise, @node is possible vroot of @object */
  65651. + inode_set_vroot(object, node);
  65652. +}
  65653. +
  65654. +/*
  65655. + * helper function used by traverse tree to start tree traversal not from the
  65656. + * tree root, but from @h->object's vroot, if possible.
  65657. + */
  65658. +static int prepare_object_lookup(cbk_handle * h)
  65659. +{
  65660. + znode *vroot;
  65661. + int result;
  65662. +
  65663. + vroot = inode_get_vroot(h->object);
  65664. + if (vroot == NULL) {
  65665. + /*
  65666. + * object doesn't have known vroot, start from real tree root.
  65667. + */
  65668. + return LOOKUP_CONT;
  65669. + }
  65670. +
  65671. + h->level = znode_get_level(vroot);
  65672. + /* take a long-term lock on vroot */
  65673. + h->result = longterm_lock_znode(h->active_lh, vroot,
  65674. + cbk_lock_mode(h->level, h),
  65675. + ZNODE_LOCK_LOPRI);
  65676. + result = LOOKUP_REST;
  65677. + if (h->result == 0) {
  65678. + int isunique;
  65679. + int inside;
  65680. +
  65681. + isunique = h->flags & CBK_UNIQUE;
  65682. + /* check that key is inside vroot */
  65683. + read_lock_dk(h->tree);
  65684. + inside = (znode_contains_key_strict(vroot, h->key, isunique) &&
  65685. + !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE));
  65686. + read_unlock_dk(h->tree);
  65687. + if (inside) {
  65688. + h->result = zload(vroot);
  65689. + if (h->result == 0) {
  65690. + /* search for key in vroot. */
  65691. + result = cbk_node_lookup(h);
  65692. + zrelse(vroot); /*h->active_lh->node); */
  65693. + if (h->active_lh->node != vroot) {
  65694. + result = LOOKUP_REST;
  65695. + } else if (result == LOOKUP_CONT) {
  65696. + move_lh(h->parent_lh, h->active_lh);
  65697. + h->flags &= ~CBK_DKSET;
  65698. + }
  65699. + }
  65700. + }
  65701. + }
  65702. +
  65703. + zput(vroot);
  65704. +
  65705. + if (IS_CBKERR(h->result) || result == LOOKUP_REST)
  65706. + hput(h);
  65707. + return result;
  65708. +}
  65709. +
  65710. +/* main function that handles common parts of tree traversal: starting
  65711. + (fake znode handling), restarts, error handling, completion */
  65712. +static lookup_result traverse_tree(cbk_handle * h/* search handle */)
  65713. +{
  65714. + int done;
  65715. + int iterations;
  65716. + int vroot_used;
  65717. +
  65718. + assert("nikita-365", h != NULL);
  65719. + assert("nikita-366", h->tree != NULL);
  65720. + assert("nikita-367", h->key != NULL);
  65721. + assert("nikita-368", h->coord != NULL);
  65722. + assert("nikita-369", (h->bias == FIND_EXACT)
  65723. + || (h->bias == FIND_MAX_NOT_MORE_THAN));
  65724. + assert("nikita-370", h->stop_level >= LEAF_LEVEL);
  65725. + assert("nikita-2949", !(h->flags & CBK_DKSET));
  65726. + assert("zam-355", lock_stack_isclean(get_current_lock_stack()));
  65727. +
  65728. + done = 0;
  65729. + iterations = 0;
  65730. + vroot_used = 0;
  65731. +
  65732. + /* loop for restarts */
  65733. +restart:
  65734. +
  65735. + assert("nikita-3024", reiser4_schedulable());
  65736. +
  65737. + h->result = CBK_COORD_FOUND;
  65738. + /* connect_znode() needs it */
  65739. + h->ld_key = *reiser4_min_key();
  65740. + h->rd_key = *reiser4_max_key();
  65741. + h->flags |= CBK_DKSET;
  65742. + h->error = NULL;
  65743. +
  65744. + if (!vroot_used && h->object != NULL) {
  65745. + vroot_used = 1;
  65746. + done = prepare_object_lookup(h);
  65747. + if (done == LOOKUP_REST)
  65748. + goto restart;
  65749. + else if (done == LOOKUP_DONE)
  65750. + return h->result;
  65751. + }
  65752. + if (h->parent_lh->node == NULL) {
  65753. + done =
  65754. + get_uber_znode(h->tree, ZNODE_READ_LOCK, ZNODE_LOCK_LOPRI,
  65755. + h->parent_lh);
  65756. +
  65757. + assert("nikita-1637", done != -E_DEADLOCK);
  65758. +
  65759. + h->block = h->tree->root_block;
  65760. + h->level = h->tree->height;
  65761. + h->coord->node = h->parent_lh->node;
  65762. +
  65763. + if (done != 0)
  65764. + return done;
  65765. + }
  65766. +
  65767. + /* loop descending a tree */
  65768. + while (!done) {
  65769. +
  65770. + if (unlikely((iterations > REISER4_CBK_ITERATIONS_LIMIT) &&
  65771. + IS_POW(iterations))) {
  65772. + warning("nikita-1481", "Too many iterations: %i",
  65773. + iterations);
  65774. + reiser4_print_key("key", h->key);
  65775. + ++iterations;
  65776. + } else if (unlikely(iterations > REISER4_MAX_CBK_ITERATIONS)) {
  65777. + h->error =
  65778. + "reiser-2018: Too many iterations. Tree corrupted, or (less likely) starvation occurring.";
  65779. + h->result = RETERR(-EIO);
  65780. + break;
  65781. + }
  65782. + switch (cbk_level_lookup(h)) {
  65783. + case LOOKUP_CONT:
  65784. + move_lh(h->parent_lh, h->active_lh);
  65785. + continue;
  65786. + default:
  65787. + wrong_return_value("nikita-372", "cbk_level");
  65788. + case LOOKUP_DONE:
  65789. + done = 1;
  65790. + break;
  65791. + case LOOKUP_REST:
  65792. + hput(h);
  65793. + /* deadlock avoidance is normal case. */
  65794. + if (h->result != -E_DEADLOCK)
  65795. + ++iterations;
  65796. + reiser4_preempt_point();
  65797. + goto restart;
  65798. + }
  65799. + }
  65800. + /* that's all. The rest is error handling */
  65801. + if (unlikely(h->error != NULL)) {
  65802. + warning("nikita-373", "%s: level: %i, "
  65803. + "lock_level: %i, stop_level: %i "
  65804. + "lock_mode: %s, bias: %s",
  65805. + h->error, h->level, h->lock_level, h->stop_level,
  65806. + lock_mode_name(h->lock_mode), bias_name(h->bias));
  65807. + reiser4_print_address("block", &h->block);
  65808. + reiser4_print_key("key", h->key);
  65809. + print_coord_content("coord", h->coord);
  65810. + }
  65811. + /* `unlikely' error case */
  65812. + if (unlikely(IS_CBKERR(h->result))) {
  65813. + /* failure. do cleanup */
  65814. + hput(h);
  65815. + } else {
  65816. + assert("nikita-1605", WITH_DATA_RET
  65817. + (h->coord->node, 1,
  65818. + ergo((h->result == CBK_COORD_FOUND) &&
  65819. + (h->bias == FIND_EXACT) &&
  65820. + (!node_is_empty(h->coord->node)),
  65821. + coord_is_existing_item(h->coord))));
  65822. + }
  65823. + return h->result;
  65824. +}
  65825. +
  65826. +/* find delimiting keys of child
  65827. +
  65828. + Determine left and right delimiting keys for child pointed to by
  65829. + @parent_coord.
  65830. +
  65831. +*/
  65832. +static void find_child_delimiting_keys(znode * parent /* parent znode, passed
  65833. + * locked */ ,
  65834. + const coord_t *parent_coord
  65835. + /* coord where pointer
  65836. + * to child is stored
  65837. + */ ,
  65838. + reiser4_key * ld /* where to store left
  65839. + * delimiting key */ ,
  65840. + reiser4_key * rd /* where to store right
  65841. + * delimiting key */ )
  65842. +{
  65843. + coord_t neighbor;
  65844. +
  65845. + assert("nikita-1484", parent != NULL);
  65846. + assert_rw_locked(&(znode_get_tree(parent)->dk_lock));
  65847. +
  65848. + coord_dup(&neighbor, parent_coord);
  65849. +
  65850. + if (neighbor.between == AT_UNIT)
  65851. + /* imitate item ->lookup() behavior. */
  65852. + neighbor.between = AFTER_UNIT;
  65853. +
  65854. + if (coord_set_to_left(&neighbor) == 0)
  65855. + unit_key_by_coord(&neighbor, ld);
  65856. + else {
  65857. + assert("nikita-14851", 0);
  65858. + *ld = *znode_get_ld_key(parent);
  65859. + }
  65860. +
  65861. + coord_dup(&neighbor, parent_coord);
  65862. + if (neighbor.between == AT_UNIT)
  65863. + neighbor.between = AFTER_UNIT;
  65864. + if (coord_set_to_right(&neighbor) == 0)
  65865. + unit_key_by_coord(&neighbor, rd);
  65866. + else
  65867. + *rd = *znode_get_rd_key(parent);
  65868. +}
  65869. +
  65870. +/*
  65871. + * setup delimiting keys for a child
  65872. + *
  65873. + * @parent parent node
  65874. + *
  65875. + * @coord location in @parent where pointer to @child is
  65876. + *
  65877. + * @child child node
  65878. + */
  65879. +int
  65880. +set_child_delimiting_keys(znode * parent, const coord_t *coord, znode * child)
  65881. +{
  65882. + reiser4_tree *tree;
  65883. +
  65884. + assert("nikita-2952",
  65885. + znode_get_level(parent) == znode_get_level(coord->node));
  65886. +
  65887. + /* fast check without taking dk lock. This is safe, because
  65888. + * JNODE_DKSET is never cleared once set. */
  65889. + if (!ZF_ISSET(child, JNODE_DKSET)) {
  65890. + tree = znode_get_tree(parent);
  65891. + write_lock_dk(tree);
  65892. + if (likely(!ZF_ISSET(child, JNODE_DKSET))) {
  65893. + find_child_delimiting_keys(parent, coord,
  65894. + &child->ld_key,
  65895. + &child->rd_key);
  65896. + ON_DEBUG(child->ld_key_version =
  65897. + atomic_inc_return(&delim_key_version);
  65898. + child->rd_key_version =
  65899. + atomic_inc_return(&delim_key_version););
  65900. + ZF_SET(child, JNODE_DKSET);
  65901. + }
  65902. + write_unlock_dk(tree);
  65903. + return 1;
  65904. + }
  65905. + return 0;
  65906. +}
  65907. +
  65908. +/* Perform tree lookup at one level. This is called from cbk_traverse()
  65909. + function that drives lookup through tree and calls cbk_node_lookup() to
  65910. + perform lookup within one node.
  65911. +
  65912. + See comments in a code.
  65913. +*/
  65914. +static level_lookup_result cbk_level_lookup(cbk_handle * h/* search handle */)
  65915. +{
  65916. + int ret;
  65917. + int setdk;
  65918. + int ldkeyset = 0;
  65919. + reiser4_key ldkey;
  65920. + reiser4_key key;
  65921. + znode *active;
  65922. +
  65923. + assert("nikita-3025", reiser4_schedulable());
  65924. +
  65925. + /* acquire reference to @active node */
  65926. + active = zget(h->tree, &h->block, h->parent_lh->node, h->level,
  65927. + reiser4_ctx_gfp_mask_get());
  65928. +
  65929. + if (IS_ERR(active)) {
  65930. + h->result = PTR_ERR(active);
  65931. + return LOOKUP_DONE;
  65932. + }
  65933. +
  65934. + /* lock @active */
  65935. + h->result = longterm_lock_znode(h->active_lh,
  65936. + active,
  65937. + cbk_lock_mode(h->level, h),
  65938. + ZNODE_LOCK_LOPRI);
  65939. + /* longterm_lock_znode() acquires additional reference to znode (which
  65940. + will be later released by longterm_unlock_znode()). Release
  65941. + reference acquired by zget().
  65942. + */
  65943. + zput(active);
  65944. + if (unlikely(h->result != 0))
  65945. + goto fail_or_restart;
  65946. +
  65947. + setdk = 0;
  65948. + /* if @active is accessed for the first time, setup delimiting keys on
  65949. + it. Delimiting keys are taken from the parent node. See
  65950. + setup_delimiting_keys() for details.
  65951. + */
  65952. + if (h->flags & CBK_DKSET) {
  65953. + setdk = setup_delimiting_keys(h);
  65954. + h->flags &= ~CBK_DKSET;
  65955. + } else {
  65956. + znode *parent;
  65957. +
  65958. + parent = h->parent_lh->node;
  65959. + h->result = zload(parent);
  65960. + if (unlikely(h->result != 0))
  65961. + goto fail_or_restart;
  65962. +
  65963. + if (!ZF_ISSET(active, JNODE_DKSET))
  65964. + setdk = set_child_delimiting_keys(parent,
  65965. + h->coord, active);
  65966. + else {
  65967. + read_lock_dk(h->tree);
  65968. + find_child_delimiting_keys(parent, h->coord, &ldkey,
  65969. + &key);
  65970. + read_unlock_dk(h->tree);
  65971. + ldkeyset = 1;
  65972. + }
  65973. + zrelse(parent);
  65974. + }
  65975. +
  65976. + /* this is ugly kludge. Reminder: this is necessary, because
  65977. + ->lookup() method returns coord with ->between field probably set
  65978. + to something different from AT_UNIT.
  65979. + */
  65980. + h->coord->between = AT_UNIT;
  65981. +
  65982. + if (znode_just_created(active) && (h->coord->node != NULL)) {
  65983. + write_lock_tree(h->tree);
  65984. + /* if we are going to load znode right now, setup
  65985. + ->in_parent: coord where pointer to this node is stored in
  65986. + parent.
  65987. + */
  65988. + coord_to_parent_coord(h->coord, &active->in_parent);
  65989. + write_unlock_tree(h->tree);
  65990. + }
  65991. +
  65992. + /* check connectedness without holding tree lock---false negatives
  65993. + * will be re-checked by connect_znode(), and false positives are
  65994. + * impossible---@active cannot suddenly turn into unconnected
  65995. + * state. */
  65996. + if (!znode_is_connected(active)) {
  65997. + h->result = connect_znode(h->coord, active);
  65998. + if (unlikely(h->result != 0)) {
  65999. + put_parent(h);
  66000. + goto fail_or_restart;
  66001. + }
  66002. + }
  66003. +
  66004. + jload_prefetch(ZJNODE(active));
  66005. +
  66006. + if (setdk)
  66007. + update_stale_dk(h->tree, active);
  66008. +
  66009. + /* put_parent() cannot be called earlier, because connect_znode()
  66010. + assumes parent node is referenced; */
  66011. + put_parent(h);
  66012. +
  66013. + if ((!znode_contains_key_lock(active, h->key) &&
  66014. + (h->flags & CBK_TRUST_DK))
  66015. + || ZF_ISSET(active, JNODE_HEARD_BANSHEE)) {
  66016. + /* 1. key was moved out of this node while this thread was
  66017. + waiting for the lock. Restart. More elaborate solution is
  66018. + to determine where key moved (to the left, or to the right)
  66019. + and try to follow it through sibling pointers.
  66020. +
  66021. + 2. or, node itself is going to be removed from the
  66022. + tree. Release lock and restart.
  66023. + */
  66024. + h->result = -E_REPEAT;
  66025. + }
  66026. + if (h->result == -E_REPEAT)
  66027. + return LOOKUP_REST;
  66028. +
  66029. + h->result = zload_ra(active, h->ra_info);
  66030. + if (h->result)
  66031. + return LOOKUP_DONE;
  66032. +
  66033. + /* sanity checks */
  66034. + if (sanity_check(h)) {
  66035. + zrelse(active);
  66036. + return LOOKUP_DONE;
  66037. + }
  66038. +
  66039. + /* check that key of leftmost item in the @active is the same as in
  66040. + * its parent */
  66041. + if (ldkeyset && !node_is_empty(active) &&
  66042. + !keyeq(leftmost_key_in_node(active, &key), &ldkey)) {
  66043. + warning("vs-3533", "Keys are inconsistent. Fsck?");
  66044. + reiser4_print_key("inparent", &ldkey);
  66045. + reiser4_print_key("inchild", &key);
  66046. + h->result = RETERR(-EIO);
  66047. + zrelse(active);
  66048. + return LOOKUP_DONE;
  66049. + }
  66050. +
  66051. + if (h->object != NULL)
  66052. + handle_vroot(h->object, active);
  66053. +
  66054. + ret = cbk_node_lookup(h);
  66055. +
  66056. + /* h->active_lh->node might change, but active is yet to be zrelsed */
  66057. + zrelse(active);
  66058. +
  66059. + return ret;
  66060. +
  66061. +fail_or_restart:
  66062. + if (h->result == -E_DEADLOCK)
  66063. + return LOOKUP_REST;
  66064. + return LOOKUP_DONE;
  66065. +}
  66066. +
  66067. +#if REISER4_DEBUG
  66068. +/* check left and right delimiting keys of a znode */
  66069. +void check_dkeys(znode * node)
  66070. +{
  66071. + znode *left;
  66072. + znode *right;
  66073. +
  66074. + read_lock_tree(current_tree);
  66075. + read_lock_dk(current_tree);
  66076. +
  66077. + assert("vs-1710", znode_is_any_locked(node));
  66078. + assert("vs-1197",
  66079. + !keygt(znode_get_ld_key(node), znode_get_rd_key(node)));
  66080. +
  66081. + left = node->left;
  66082. + right = node->right;
  66083. +
  66084. + if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
  66085. + && left != NULL && ZF_ISSET(left, JNODE_DKSET))
  66086. + /* check left neighbor. Note that left neighbor is not locked,
  66087. + so it might get wrong delimiting keys therefore */
  66088. + assert("vs-1198",
  66089. + (keyeq(znode_get_rd_key(left), znode_get_ld_key(node))
  66090. + || ZF_ISSET(left, JNODE_HEARD_BANSHEE)));
  66091. +
  66092. + if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
  66093. + && right != NULL && ZF_ISSET(right, JNODE_DKSET))
  66094. + /* check right neighbor. Note that right neighbor is not
  66095. + locked, so it might get wrong delimiting keys therefore */
  66096. + assert("vs-1199",
  66097. + (keyeq(znode_get_rd_key(node), znode_get_ld_key(right))
  66098. + || ZF_ISSET(right, JNODE_HEARD_BANSHEE)));
  66099. +
  66100. + read_unlock_dk(current_tree);
  66101. + read_unlock_tree(current_tree);
  66102. +}
  66103. +#endif
  66104. +
  66105. +/* true if @key is left delimiting key of @node */
  66106. +static int key_is_ld(znode * node, const reiser4_key * key)
  66107. +{
  66108. + int ld;
  66109. +
  66110. + assert("nikita-1716", node != NULL);
  66111. + assert("nikita-1758", key != NULL);
  66112. +
  66113. + read_lock_dk(znode_get_tree(node));
  66114. + assert("nikita-1759", znode_contains_key(node, key));
  66115. + ld = keyeq(znode_get_ld_key(node), key);
  66116. + read_unlock_dk(znode_get_tree(node));
  66117. + return ld;
  66118. +}
  66119. +
  66120. +/* Process one node during tree traversal.
  66121. +
  66122. + This is called by cbk_level_lookup(). */
  66123. +static level_lookup_result cbk_node_lookup(cbk_handle * h/* search handle */)
  66124. +{
  66125. + /* node plugin of @active */
  66126. + node_plugin *nplug;
  66127. + /* item plugin of item that was found */
  66128. + item_plugin *iplug;
  66129. + /* search bias */
  66130. + lookup_bias node_bias;
  66131. + /* node we are operating upon */
  66132. + znode *active;
  66133. + /* tree we are searching in */
  66134. + reiser4_tree *tree;
  66135. + /* result */
  66136. + int result;
  66137. +
  66138. + assert("nikita-379", h != NULL);
  66139. +
  66140. + active = h->active_lh->node;
  66141. + tree = h->tree;
  66142. +
  66143. + nplug = active->nplug;
  66144. + assert("nikita-380", nplug != NULL);
  66145. +
  66146. + ON_DEBUG(check_dkeys(active));
  66147. +
  66148. + /* return item from "active" node with maximal key not greater than
  66149. + "key" */
  66150. + node_bias = h->bias;
  66151. + result = nplug->lookup(active, h->key, node_bias, h->coord);
  66152. + if (unlikely(result != NS_FOUND && result != NS_NOT_FOUND)) {
  66153. + /* error occurred */
  66154. + h->result = result;
  66155. + return LOOKUP_DONE;
  66156. + }
  66157. + if (h->level == h->stop_level) {
  66158. + /* welcome to the stop level */
  66159. + assert("nikita-381", h->coord->node == active);
  66160. + if (result == NS_FOUND) {
  66161. + /* success of tree lookup */
  66162. + if (!(h->flags & CBK_UNIQUE)
  66163. + && key_is_ld(active, h->key))
  66164. + return search_to_left(h);
  66165. + else
  66166. + h->result = CBK_COORD_FOUND;
  66167. + } else {
  66168. + h->result = CBK_COORD_NOTFOUND;
  66169. + }
  66170. + if (!(h->flags & CBK_IN_CACHE))
  66171. + cbk_cache_add(active);
  66172. + return LOOKUP_DONE;
  66173. + }
  66174. +
  66175. + if (h->level > TWIG_LEVEL && result == NS_NOT_FOUND) {
  66176. + h->error = "not found on internal node";
  66177. + h->result = result;
  66178. + return LOOKUP_DONE;
  66179. + }
  66180. +
  66181. + assert("vs-361", h->level > h->stop_level);
  66182. +
  66183. + if (handle_eottl(h, &result)) {
  66184. + assert("vs-1674", (result == LOOKUP_DONE ||
  66185. + result == LOOKUP_REST));
  66186. + return result;
  66187. + }
  66188. +
  66189. + /* go down to next level */
  66190. + check_me("vs-12", zload(h->coord->node) == 0);
  66191. + assert("nikita-2116", item_is_internal(h->coord));
  66192. + iplug = item_plugin_by_coord(h->coord);
  66193. + iplug->s.internal.down_link(h->coord, h->key, &h->block);
  66194. + zrelse(h->coord->node);
  66195. + --h->level;
  66196. + return LOOKUP_CONT; /* continue */
  66197. +}
  66198. +
  66199. +/* scan cbk_cache slots looking for a match for @h */
  66200. +static int cbk_cache_scan_slots(cbk_handle * h/* cbk handle */)
  66201. +{
  66202. + level_lookup_result llr;
  66203. + znode *node;
  66204. + reiser4_tree *tree;
  66205. + cbk_cache_slot *slot;
  66206. + cbk_cache *cache;
  66207. + tree_level level;
  66208. + int isunique;
  66209. + const reiser4_key *key;
  66210. + int result;
  66211. +
  66212. + assert("nikita-1317", h != NULL);
  66213. + assert("nikita-1315", h->tree != NULL);
  66214. + assert("nikita-1316", h->key != NULL);
  66215. +
  66216. + tree = h->tree;
  66217. + cache = &tree->cbk_cache;
  66218. + if (cache->nr_slots == 0)
  66219. + /* size of cbk cache was set to 0 by mount time option. */
  66220. + return RETERR(-ENOENT);
  66221. +
  66222. + assert("nikita-2474", cbk_cache_invariant(cache));
  66223. + node = NULL; /* to keep gcc happy */
  66224. + level = h->level;
  66225. + key = h->key;
  66226. + isunique = h->flags & CBK_UNIQUE;
  66227. + result = RETERR(-ENOENT);
  66228. +
  66229. + /*
  66230. + * this is time-critical function and dragons had, hence, been settled
  66231. + * here.
  66232. + *
  66233. + * Loop below scans cbk cache slots trying to find matching node with
  66234. + * suitable range of delimiting keys and located at the h->level.
  66235. + *
  66236. + * Scan is done under cbk cache spin lock that protects slot->node
  66237. + * pointers. If suitable node is found we want to pin it in
  66238. + * memory. But slot->node can point to the node with x_count 0
  66239. + * (unreferenced). Such node can be recycled at any moment, or can
  66240. + * already be in the process of being recycled (within jput()).
  66241. + *
  66242. + * As we found node in the cbk cache, it means that jput() hasn't yet
  66243. + * called cbk_cache_invalidate().
  66244. + *
  66245. + * We acquire reference to the node without holding tree lock, and
  66246. + * later, check node's RIP bit. This avoids races with jput().
  66247. + */
  66248. +
  66249. + rcu_read_lock();
  66250. + read_lock(&((cbk_cache *)cache)->guard);
  66251. +
  66252. + slot = list_entry(cache->lru.next, cbk_cache_slot, lru);
  66253. + slot = list_entry(slot->lru.prev, cbk_cache_slot, lru);
  66254. + BUG_ON(&slot->lru != &cache->lru);/*????*/
  66255. + while (1) {
  66256. +
  66257. + slot = list_entry(slot->lru.next, cbk_cache_slot, lru);
  66258. +
  66259. + if (&cache->lru != &slot->lru)
  66260. + node = slot->node;
  66261. + else
  66262. + node = NULL;
  66263. +
  66264. + if (unlikely(node == NULL))
  66265. + break;
  66266. +
  66267. + /*
  66268. + * this is (hopefully) the only place in the code where we are
  66269. + * working with delimiting keys without holding dk lock. This
  66270. + * is fine here, because this is only "guess" anyway---keys
  66271. + * are rechecked under dk lock below.
  66272. + */
  66273. + if (znode_get_level(node) == level &&
  66274. + /* reiser4_min_key < key < reiser4_max_key */
  66275. + znode_contains_key_strict(node, key, isunique)) {
  66276. + zref(node);
  66277. + result = 0;
  66278. + spin_lock_prefetch(&tree->tree_lock);
  66279. + break;
  66280. + }
  66281. + }
  66282. + read_unlock(&((cbk_cache *)cache)->guard);
  66283. +
  66284. + assert("nikita-2475", cbk_cache_invariant(cache));
  66285. +
  66286. + if (unlikely(result == 0 && ZF_ISSET(node, JNODE_RIP)))
  66287. + result = -ENOENT;
  66288. +
  66289. + rcu_read_unlock();
  66290. +
  66291. + if (result != 0) {
  66292. + h->result = CBK_COORD_NOTFOUND;
  66293. + return RETERR(-ENOENT);
  66294. + }
  66295. +
  66296. + result =
  66297. + longterm_lock_znode(h->active_lh, node, cbk_lock_mode(level, h),
  66298. + ZNODE_LOCK_LOPRI);
  66299. + zput(node);
  66300. + if (result != 0)
  66301. + return result;
  66302. + result = zload(node);
  66303. + if (result != 0)
  66304. + return result;
  66305. +
  66306. + /* recheck keys */
  66307. + read_lock_dk(tree);
  66308. + result = (znode_contains_key_strict(node, key, isunique) &&
  66309. + !ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  66310. + read_unlock_dk(tree);
  66311. + if (result) {
  66312. + /* do lookup inside node */
  66313. + llr = cbk_node_lookup(h);
  66314. + /* if cbk_node_lookup() wandered to another node (due to eottl
  66315. + or non-unique keys), adjust @node */
  66316. + /*node = h->active_lh->node; */
  66317. +
  66318. + if (llr != LOOKUP_DONE) {
  66319. + /* restart or continue on the next level */
  66320. + result = RETERR(-ENOENT);
  66321. + } else if (IS_CBKERR(h->result))
  66322. + /* io or oom */
  66323. + result = RETERR(-ENOENT);
  66324. + else {
  66325. + /* good. Either item found or definitely not found. */
  66326. + result = 0;
  66327. +
  66328. + write_lock(&(cache->guard));
  66329. + if (slot->node == h->active_lh->node) {
  66330. + /* if this node is still in cbk cache---move
  66331. + its slot to the head of the LRU list. */
  66332. + list_move(&slot->lru, &cache->lru);
  66333. + }
  66334. + write_unlock(&(cache->guard));
  66335. + }
  66336. + } else {
  66337. + /* race. While this thread was waiting for the lock, node was
  66338. + rebalanced and item we are looking for, shifted out of it
  66339. + (if it ever was here).
  66340. +
  66341. + Continuing scanning is almost hopeless: node key range was
  66342. + moved to, is almost certainly at the beginning of the LRU
  66343. + list at this time, because it's hot, but restarting
  66344. + scanning from the very beginning is complex. Just return,
  66345. + so that cbk() will be performed. This is not that
  66346. + important, because such races should be rare. Are they?
  66347. + */
  66348. + result = RETERR(-ENOENT); /* -ERAUGHT */
  66349. + }
  66350. + zrelse(node);
  66351. + assert("nikita-2476", cbk_cache_invariant(cache));
  66352. + return result;
  66353. +}
  66354. +
  66355. +/* look for item with given key in the coord cache
  66356. +
  66357. + This function, called by coord_by_key(), scans "coord cache" (&cbk_cache)
  66358. + which is a small LRU list of znodes accessed lately. For each znode in
  66359. + znode in this list, it checks whether key we are looking for fits into key
  66360. + range covered by this node. If so, and in addition, node lies at allowed
  66361. + level (this is to handle extents on a twig level), node is locked, and
  66362. + lookup inside it is performed.
  66363. +
  66364. + we need a measurement of the cost of this cache search compared to the cost
  66365. + of coord_by_key.
  66366. +
  66367. +*/
  66368. +static int cbk_cache_search(cbk_handle * h/* cbk handle */)
  66369. +{
  66370. + int result = 0;
  66371. + tree_level level;
  66372. +
  66373. + /* add CBK_IN_CACHE to the handle flags. This means that
  66374. + * cbk_node_lookup() assumes that cbk_cache is scanned and would add
  66375. + * found node to the cache. */
  66376. + h->flags |= CBK_IN_CACHE;
  66377. + for (level = h->stop_level; level <= h->lock_level; ++level) {
  66378. + h->level = level;
  66379. + result = cbk_cache_scan_slots(h);
  66380. + if (result != 0) {
  66381. + done_lh(h->active_lh);
  66382. + done_lh(h->parent_lh);
  66383. + } else {
  66384. + assert("nikita-1319", !IS_CBKERR(h->result));
  66385. + break;
  66386. + }
  66387. + }
  66388. + h->flags &= ~CBK_IN_CACHE;
  66389. + return result;
  66390. +}
  66391. +
  66392. +/* type of lock we want to obtain during tree traversal. On stop level
  66393. + we want type of lock user asked for, on upper levels: read lock. */
  66394. +znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h)
  66395. +{
  66396. + assert("nikita-382", h != NULL);
  66397. +
  66398. + return (level <= h->lock_level) ? h->lock_mode : ZNODE_READ_LOCK;
  66399. +}
  66400. +
  66401. +/* update outdated delimiting keys */
  66402. +static void stale_dk(reiser4_tree * tree, znode * node)
  66403. +{
  66404. + znode *right;
  66405. +
  66406. + read_lock_tree(tree);
  66407. + write_lock_dk(tree);
  66408. + right = node->right;
  66409. +
  66410. + if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
  66411. + right && ZF_ISSET(right, JNODE_DKSET) &&
  66412. + !keyeq(znode_get_rd_key(node), znode_get_ld_key(right)))
  66413. + znode_set_rd_key(node, znode_get_ld_key(right));
  66414. +
  66415. + write_unlock_dk(tree);
  66416. + read_unlock_tree(tree);
  66417. +}
  66418. +
  66419. +/* check for possibly outdated delimiting keys, and update them if
  66420. + * necessary. */
  66421. +static void update_stale_dk(reiser4_tree * tree, znode * node)
  66422. +{
  66423. + znode *right;
  66424. + reiser4_key rd;
  66425. +
  66426. + read_lock_tree(tree);
  66427. + read_lock_dk(tree);
  66428. + rd = *znode_get_rd_key(node);
  66429. + right = node->right;
  66430. + if (unlikely(ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
  66431. + right && ZF_ISSET(right, JNODE_DKSET) &&
  66432. + !keyeq(&rd, znode_get_ld_key(right)))) {
  66433. + assert("nikita-38211", ZF_ISSET(node, JNODE_DKSET));
  66434. + read_unlock_dk(tree);
  66435. + read_unlock_tree(tree);
  66436. + stale_dk(tree, node);
  66437. + return;
  66438. + }
  66439. + read_unlock_dk(tree);
  66440. + read_unlock_tree(tree);
  66441. +}
  66442. +
  66443. +/**
  66444. + * Search for non-unique key.
  66445. + *
  66446. + * Suppose that we are looking for an item with possibly non-unique key 100.
  66447. + *
  66448. + * Root node contains two pointers: one to a node with left delimiting key 0,
  66449. + * and another to a node with left delimiting key 100. Item we interested in
  66450. + * may well happen in the sub-tree rooted at the first pointer.
  66451. + *
  66452. + * To handle this search_to_left() is called when search reaches stop
  66453. + * level. This function checks it is _possible_ that item we are looking for
  66454. + * is in the left neighbor (this can be done by comparing delimiting keys) and
  66455. + * if so, tries to lock left neighbor (this is low priority lock, so it can
  66456. + * deadlock, tree traversal is just restarted if it did) and then checks
  66457. + * whether left neighbor actually contains items with our key.
  66458. + *
  66459. + * Note that this is done on the stop level only. It is possible to try such
  66460. + * left-check on each level, but as duplicate keys are supposed to be rare
  66461. + * (very unlikely that more than one node is completely filled with items with
  66462. + * duplicate keys), it sis cheaper to scan to the left on the stop level once.
  66463. + *
  66464. + */
  66465. +static level_lookup_result search_to_left(cbk_handle * h/* search handle */)
  66466. +{
  66467. + level_lookup_result result = LOOKUP_INVAL;
  66468. + coord_t *coord;
  66469. + znode *node;
  66470. + znode *neighbor;
  66471. +
  66472. + lock_handle lh;
  66473. +
  66474. + assert("nikita-1761", h != NULL);
  66475. + assert("nikita-1762", h->level == h->stop_level);
  66476. +
  66477. + init_lh(&lh);
  66478. + coord = h->coord;
  66479. + node = h->active_lh->node;
  66480. + assert("nikita-1763", coord_is_leftmost_unit(coord));
  66481. +
  66482. + h->result =
  66483. + reiser4_get_left_neighbor(&lh, node, (int)h->lock_mode,
  66484. + GN_CAN_USE_UPPER_LEVELS);
  66485. + neighbor = NULL;
  66486. + switch (h->result) {
  66487. + case -E_DEADLOCK:
  66488. + result = LOOKUP_REST;
  66489. + break;
  66490. + case 0:{
  66491. + node_plugin *nplug;
  66492. + coord_t crd;
  66493. + lookup_bias bias;
  66494. +
  66495. + neighbor = lh.node;
  66496. + h->result = zload(neighbor);
  66497. + if (h->result != 0) {
  66498. + result = LOOKUP_DONE;
  66499. + break;
  66500. + }
  66501. +
  66502. + nplug = neighbor->nplug;
  66503. +
  66504. + coord_init_zero(&crd);
  66505. + bias = h->bias;
  66506. + h->bias = FIND_EXACT;
  66507. + h->result =
  66508. + nplug->lookup(neighbor, h->key, h->bias, &crd);
  66509. + h->bias = bias;
  66510. +
  66511. + if (h->result == NS_NOT_FOUND) {
  66512. + case -E_NO_NEIGHBOR:
  66513. + h->result = CBK_COORD_FOUND;
  66514. + if (!(h->flags & CBK_IN_CACHE))
  66515. + cbk_cache_add(node);
  66516. + default: /* some other error */
  66517. + result = LOOKUP_DONE;
  66518. + } else if (h->result == NS_FOUND) {
  66519. + read_lock_dk(znode_get_tree(neighbor));
  66520. + h->rd_key = *znode_get_ld_key(node);
  66521. + leftmost_key_in_node(neighbor, &h->ld_key);
  66522. + read_unlock_dk(znode_get_tree(neighbor));
  66523. + h->flags |= CBK_DKSET;
  66524. +
  66525. + h->block = *znode_get_block(neighbor);
  66526. + /* clear coord->node so that cbk_level_lookup()
  66527. + wouldn't overwrite parent hint in neighbor.
  66528. +
  66529. + Parent hint was set up by
  66530. + reiser4_get_left_neighbor()
  66531. + */
  66532. + /* FIXME: why do we have to spinlock here? */
  66533. + write_lock_tree(znode_get_tree(neighbor));
  66534. + h->coord->node = NULL;
  66535. + write_unlock_tree(znode_get_tree(neighbor));
  66536. + result = LOOKUP_CONT;
  66537. + } else {
  66538. + result = LOOKUP_DONE;
  66539. + }
  66540. + if (neighbor != NULL)
  66541. + zrelse(neighbor);
  66542. + }
  66543. + }
  66544. + done_lh(&lh);
  66545. + return result;
  66546. +}
  66547. +
  66548. +/* debugging aid: return symbolic name of search bias */
  66549. +static const char *bias_name(lookup_bias bias/* bias to get name of */)
  66550. +{
  66551. + if (bias == FIND_EXACT)
  66552. + return "exact";
  66553. + else if (bias == FIND_MAX_NOT_MORE_THAN)
  66554. + return "left-slant";
  66555. +/* else if( bias == RIGHT_SLANT_BIAS ) */
  66556. +/* return "right-bias"; */
  66557. + else {
  66558. + static char buf[30];
  66559. +
  66560. + sprintf(buf, "unknown: %i", bias);
  66561. + return buf;
  66562. + }
  66563. +}
  66564. +
  66565. +#if REISER4_DEBUG
  66566. +/* debugging aid: print human readable information about @p */
  66567. +void print_coord_content(const char *prefix /* prefix to print */ ,
  66568. + coord_t *p/* coord to print */)
  66569. +{
  66570. + reiser4_key key;
  66571. +
  66572. + if (p == NULL) {
  66573. + printk("%s: null\n", prefix);
  66574. + return;
  66575. + }
  66576. + if ((p->node != NULL) && znode_is_loaded(p->node)
  66577. + && coord_is_existing_item(p))
  66578. + printk("%s: data: %p, length: %i\n", prefix,
  66579. + item_body_by_coord(p), item_length_by_coord(p));
  66580. + if (znode_is_loaded(p->node)) {
  66581. + item_key_by_coord(p, &key);
  66582. + reiser4_print_key(prefix, &key);
  66583. + }
  66584. +}
  66585. +
  66586. +/* debugging aid: print human readable information about @block */
  66587. +void reiser4_print_address(const char *prefix /* prefix to print */ ,
  66588. + const reiser4_block_nr * block/* block number to print */)
  66589. +{
  66590. + printk("%s: %s\n", prefix, sprint_address(block));
  66591. +}
  66592. +#endif
  66593. +
  66594. +/* return string containing human readable representation of @block */
  66595. +char *sprint_address(const reiser4_block_nr *
  66596. + block/* block number to print */)
  66597. +{
  66598. + static char address[30];
  66599. +
  66600. + if (block == NULL)
  66601. + sprintf(address, "null");
  66602. + else if (reiser4_blocknr_is_fake(block))
  66603. + sprintf(address, "%llx", (unsigned long long)(*block));
  66604. + else
  66605. + sprintf(address, "%llu", (unsigned long long)(*block));
  66606. + return address;
  66607. +}
  66608. +
  66609. +/* release parent node during traversal */
  66610. +static void put_parent(cbk_handle * h/* search handle */)
  66611. +{
  66612. + assert("nikita-383", h != NULL);
  66613. + if (h->parent_lh->node != NULL)
  66614. + longterm_unlock_znode(h->parent_lh);
  66615. +}
  66616. +
  66617. +/* helper function used by coord_by_key(): release reference to parent znode
  66618. + stored in handle before processing its child. */
  66619. +static void hput(cbk_handle * h/* search handle */)
  66620. +{
  66621. + assert("nikita-385", h != NULL);
  66622. + done_lh(h->parent_lh);
  66623. + done_lh(h->active_lh);
  66624. +}
  66625. +
  66626. +/* Helper function used by cbk(): update delimiting keys of child node (stored
  66627. + in h->active_lh->node) using key taken from parent on the parent level. */
  66628. +static int setup_delimiting_keys(cbk_handle * h/* search handle */)
  66629. +{
  66630. + znode *active;
  66631. + reiser4_tree *tree;
  66632. +
  66633. + assert("nikita-1088", h != NULL);
  66634. +
  66635. + active = h->active_lh->node;
  66636. +
  66637. + /* fast check without taking dk lock. This is safe, because
  66638. + * JNODE_DKSET is never cleared once set. */
  66639. + if (!ZF_ISSET(active, JNODE_DKSET)) {
  66640. + tree = znode_get_tree(active);
  66641. + write_lock_dk(tree);
  66642. + if (!ZF_ISSET(active, JNODE_DKSET)) {
  66643. + znode_set_ld_key(active, &h->ld_key);
  66644. + znode_set_rd_key(active, &h->rd_key);
  66645. + ZF_SET(active, JNODE_DKSET);
  66646. + }
  66647. + write_unlock_dk(tree);
  66648. + return 1;
  66649. + }
  66650. + return 0;
  66651. +}
  66652. +
  66653. +/* true if @block makes sense for the @tree. Used to detect corrupted node
  66654. + * pointers */
  66655. +static int
  66656. +block_nr_is_correct(reiser4_block_nr * block /* block number to check */ ,
  66657. + reiser4_tree * tree/* tree to check against */)
  66658. +{
  66659. + assert("nikita-757", block != NULL);
  66660. + assert("nikita-758", tree != NULL);
  66661. +
  66662. + /* check to see if it exceeds the size of the device. */
  66663. + return reiser4_blocknr_is_sane_for(tree->super, block);
  66664. +}
  66665. +
  66666. +/* check consistency of fields */
  66667. +static int sanity_check(cbk_handle * h/* search handle */)
  66668. +{
  66669. + assert("nikita-384", h != NULL);
  66670. +
  66671. + if (h->level < h->stop_level) {
  66672. + h->error = "Buried under leaves";
  66673. + h->result = RETERR(-EIO);
  66674. + return LOOKUP_DONE;
  66675. + } else if (!block_nr_is_correct(&h->block, h->tree)) {
  66676. + h->error = "bad block number";
  66677. + h->result = RETERR(-EIO);
  66678. + return LOOKUP_DONE;
  66679. + } else
  66680. + return 0;
  66681. +}
  66682. +
  66683. +/* Make Linus happy.
  66684. + Local variables:
  66685. + c-indentation-style: "K&R"
  66686. + mode-name: "LC"
  66687. + c-basic-offset: 8
  66688. + tab-width: 8
  66689. + fill-column: 120
  66690. + scroll-step: 1
  66691. + End:
  66692. +*/
  66693. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/status_flags.c linux-5.16.14/fs/reiser4/status_flags.c
  66694. --- linux-5.16.14.orig/fs/reiser4/status_flags.c 1970-01-01 01:00:00.000000000 +0100
  66695. +++ linux-5.16.14/fs/reiser4/status_flags.c 2022-03-12 13:26:19.687892813 +0100
  66696. @@ -0,0 +1,180 @@
  66697. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  66698. + * reiser4/README */
  66699. +
  66700. +/* Functions that deal with reiser4 status block, query status and update it,
  66701. + * if needed */
  66702. +
  66703. +#include <linux/bio.h>
  66704. +#include <linux/highmem.h>
  66705. +#include <linux/fs.h>
  66706. +#include <linux/blkdev.h>
  66707. +#include "debug.h"
  66708. +#include "dformat.h"
  66709. +#include "status_flags.h"
  66710. +#include "super.h"
  66711. +
  66712. +/* This is our end I/O handler that marks page uptodate if IO was successful.
  66713. + It also unconditionally unlocks the page, so we can see that io was done.
  66714. + We do not free bio, because we hope to reuse that. */
  66715. +static void reiser4_status_endio(struct bio *bio)
  66716. +{
  66717. + if (!bio->bi_status)
  66718. + SetPageUptodate(bio->bi_io_vec->bv_page);
  66719. + else {
  66720. + ClearPageUptodate(bio->bi_io_vec->bv_page);
  66721. + SetPageError(bio->bi_io_vec->bv_page);
  66722. + }
  66723. + unlock_page(bio->bi_io_vec->bv_page);
  66724. +}
  66725. +
  66726. +/* Initialise status code. This is expected to be called from the disk format
  66727. + code. block paremeter is where status block lives. */
  66728. +int reiser4_status_init(reiser4_block_nr block)
  66729. +{
  66730. + struct super_block *sb = reiser4_get_current_sb();
  66731. + struct reiser4_status *statuspage;
  66732. + struct bio *bio;
  66733. + struct page *page;
  66734. +
  66735. + get_super_private(sb)->status_page = NULL;
  66736. + get_super_private(sb)->status_bio = NULL;
  66737. +
  66738. + page = alloc_pages(reiser4_ctx_gfp_mask_get(), 0);
  66739. + if (!page)
  66740. + return -ENOMEM;
  66741. +
  66742. + bio = bio_alloc(reiser4_ctx_gfp_mask_get(), 1);
  66743. + if (bio != NULL) {
  66744. + bio->bi_iter.bi_sector = block * (sb->s_blocksize >> 9);
  66745. + bio_set_dev(bio, sb->s_bdev);
  66746. + bio->bi_io_vec[0].bv_page = page;
  66747. + bio->bi_io_vec[0].bv_len = sb->s_blocksize;
  66748. + bio->bi_io_vec[0].bv_offset = 0;
  66749. + bio->bi_vcnt = 1;
  66750. + bio->bi_iter.bi_size = sb->s_blocksize;
  66751. + bio->bi_end_io = reiser4_status_endio;
  66752. + } else {
  66753. + __free_pages(page, 0);
  66754. + return -ENOMEM;
  66755. + }
  66756. + lock_page(page);
  66757. + bio_set_op_attrs(bio, READ, 0);
  66758. + submit_bio(bio);
  66759. + wait_on_page_locked(page);
  66760. + if (!PageUptodate(page)) {
  66761. + warning("green-2007",
  66762. + "I/O error while tried to read status page\n");
  66763. + return -EIO;
  66764. + }
  66765. +
  66766. + statuspage = (struct reiser4_status *)kmap_atomic(page);
  66767. + if (memcmp
  66768. + (statuspage->magic, REISER4_STATUS_MAGIC,
  66769. + sizeof(REISER4_STATUS_MAGIC))) {
  66770. + /* Magic does not match. */
  66771. + kunmap_atomic((char *)statuspage);
  66772. + warning("green-2008", "Wrong magic in status block\n");
  66773. + __free_pages(page, 0);
  66774. + bio_put(bio);
  66775. + return -EINVAL;
  66776. + }
  66777. + kunmap_atomic((char *)statuspage);
  66778. +
  66779. + get_super_private(sb)->status_page = page;
  66780. + get_super_private(sb)->status_bio = bio;
  66781. + return 0;
  66782. +}
  66783. +
  66784. +/* Query the status of fs. Returns if the FS can be safely mounted.
  66785. + Also if "status" and "extended" parameters are given, it will fill
  66786. + actual parts of status from disk there. */
  66787. +int reiser4_status_query(u64 *status, u64 *extended)
  66788. +{
  66789. + struct super_block *sb = reiser4_get_current_sb();
  66790. + struct reiser4_status *statuspage;
  66791. + int retval;
  66792. +
  66793. + if (!get_super_private(sb)->status_page)
  66794. + /* No status page? */
  66795. + return REISER4_STATUS_MOUNT_UNKNOWN;
  66796. + statuspage = (struct reiser4_status *)
  66797. + kmap_atomic(get_super_private(sb)->status_page);
  66798. + switch ((long)le64_to_cpu(get_unaligned(&statuspage->status))) {
  66799. + /* FIXME: this cast is a hack for 32 bit arches to work. */
  66800. + case REISER4_STATUS_OK:
  66801. + retval = REISER4_STATUS_MOUNT_OK;
  66802. + break;
  66803. + case REISER4_STATUS_CORRUPTED:
  66804. + retval = REISER4_STATUS_MOUNT_WARN;
  66805. + break;
  66806. + case REISER4_STATUS_DAMAGED:
  66807. + case REISER4_STATUS_DESTROYED:
  66808. + case REISER4_STATUS_IOERROR:
  66809. + retval = REISER4_STATUS_MOUNT_RO;
  66810. + break;
  66811. + default:
  66812. + retval = REISER4_STATUS_MOUNT_UNKNOWN;
  66813. + break;
  66814. + }
  66815. +
  66816. + if (status)
  66817. + *status = le64_to_cpu(get_unaligned(&statuspage->status));
  66818. + if (extended)
  66819. + *extended = le64_to_cpu(get_unaligned(&statuspage->extended_status));
  66820. +
  66821. + kunmap_atomic((char *)statuspage);
  66822. + return retval;
  66823. +}
  66824. +
  66825. +/* This function should be called when something bad happens (e.g. from
  66826. + reiser4_panic). It fills the status structure and tries to push it to disk.*/
  66827. +int reiser4_status_write(__u64 status, __u64 extended_status, char *message)
  66828. +{
  66829. + struct super_block *sb = reiser4_get_current_sb();
  66830. + struct reiser4_status *statuspage;
  66831. + struct bio *bio = get_super_private(sb)->status_bio;
  66832. +
  66833. + if (!get_super_private(sb)->status_page)
  66834. + /* No status page? */
  66835. + return -1;
  66836. + statuspage = (struct reiser4_status *)
  66837. + kmap_atomic(get_super_private(sb)->status_page);
  66838. +
  66839. + put_unaligned(cpu_to_le64(status), &statuspage->status);
  66840. + put_unaligned(cpu_to_le64(extended_status), &statuspage->extended_status);
  66841. + strncpy(statuspage->texterror, message, REISER4_TEXTERROR_LEN);
  66842. +
  66843. + kunmap_atomic((char *)statuspage);
  66844. + bio_reset(bio);
  66845. + bio_set_dev(bio, sb->s_bdev);
  66846. + bio->bi_io_vec[0].bv_page = get_super_private(sb)->status_page;
  66847. + bio->bi_io_vec[0].bv_len = sb->s_blocksize;
  66848. + bio->bi_io_vec[0].bv_offset = 0;
  66849. + bio->bi_vcnt = 1;
  66850. + bio->bi_iter.bi_size = sb->s_blocksize;
  66851. + bio->bi_end_io = reiser4_status_endio;
  66852. + lock_page(get_super_private(sb)->status_page); /* Safe as nobody should
  66853. + * touch our page. */
  66854. + /*
  66855. + * We can block now, but we have no other choice anyway
  66856. + */
  66857. + bio_set_op_attrs(bio, WRITE, 0);
  66858. + submit_bio(bio);
  66859. + /*
  66860. + * We do not wait for IO completon
  66861. + */
  66862. + return 0;
  66863. +}
  66864. +
  66865. +/* Frees the page with status and bio structure. Should be called by disk format
  66866. + * at umount time */
  66867. +int reiser4_status_finish(void)
  66868. +{
  66869. + struct super_block *sb = reiser4_get_current_sb();
  66870. +
  66871. + __free_pages(get_super_private(sb)->status_page, 0);
  66872. + get_super_private(sb)->status_page = NULL;
  66873. + bio_put(get_super_private(sb)->status_bio);
  66874. + get_super_private(sb)->status_bio = NULL;
  66875. + return 0;
  66876. +}
  66877. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/status_flags.h linux-5.16.14/fs/reiser4/status_flags.h
  66878. --- linux-5.16.14.orig/fs/reiser4/status_flags.h 1970-01-01 01:00:00.000000000 +0100
  66879. +++ linux-5.16.14/fs/reiser4/status_flags.h 2022-03-12 13:26:19.687892813 +0100
  66880. @@ -0,0 +1,47 @@
  66881. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  66882. + * reiser4/README */
  66883. +
  66884. +/* Here we declare structures and flags that store reiser4 status on disk.
  66885. + The status that helps us to find out if the filesystem is valid or if it
  66886. + contains some critical, or not so critical errors */
  66887. +
  66888. +#if !defined(__REISER4_STATUS_FLAGS_H__)
  66889. +#define __REISER4_STATUS_FLAGS_H__
  66890. +
  66891. +#include "dformat.h"
  66892. +/* These are major status flags */
  66893. +#define REISER4_STATUS_OK 0
  66894. +#define REISER4_STATUS_CORRUPTED 0x1
  66895. +#define REISER4_STATUS_DAMAGED 0x2
  66896. +#define REISER4_STATUS_DESTROYED 0x4
  66897. +#define REISER4_STATUS_IOERROR 0x8
  66898. +
  66899. +/* Return values for reiser4_status_query() */
  66900. +#define REISER4_STATUS_MOUNT_OK 0
  66901. +#define REISER4_STATUS_MOUNT_WARN 1
  66902. +#define REISER4_STATUS_MOUNT_RO 2
  66903. +#define REISER4_STATUS_MOUNT_UNKNOWN -1
  66904. +
  66905. +#define REISER4_TEXTERROR_LEN 256
  66906. +
  66907. +#define REISER4_STATUS_MAGIC "ReiSeR4StATusBl"
  66908. +/* We probably need to keep its size under sector size which is 512 bytes */
  66909. +struct reiser4_status {
  66910. + char magic[16];
  66911. + d64 status; /* Current FS state */
  66912. + d64 extended_status; /* Any additional info that might have sense in
  66913. + * addition to "status". E.g. last sector where
  66914. + * io error happened if status is
  66915. + * "io error encountered" */
  66916. + d64 stacktrace[10]; /* Last ten functional calls made (addresses) */
  66917. + char texterror[REISER4_TEXTERROR_LEN]; /* Any error message if
  66918. + * appropriate, otherwise filled
  66919. + * with zeroes */
  66920. +};
  66921. +
  66922. +int reiser4_status_init(reiser4_block_nr block);
  66923. +int reiser4_status_query(u64 *status, u64 *extended);
  66924. +int reiser4_status_write(u64 status, u64 extended_status, char *message);
  66925. +int reiser4_status_finish(void);
  66926. +
  66927. +#endif
  66928. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/super.c linux-5.16.14/fs/reiser4/super.c
  66929. --- linux-5.16.14.orig/fs/reiser4/super.c 1970-01-01 01:00:00.000000000 +0100
  66930. +++ linux-5.16.14/fs/reiser4/super.c 2022-03-12 13:26:19.688892816 +0100
  66931. @@ -0,0 +1,306 @@
  66932. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  66933. + * reiser4/README */
  66934. +
  66935. +/* Super-block manipulations. */
  66936. +
  66937. +#include "debug.h"
  66938. +#include "dformat.h"
  66939. +#include "key.h"
  66940. +#include "plugin/security/perm.h"
  66941. +#include "plugin/space/space_allocator.h"
  66942. +#include "plugin/plugin.h"
  66943. +#include "tree.h"
  66944. +#include "vfs_ops.h"
  66945. +#include "super.h"
  66946. +#include "reiser4.h"
  66947. +
  66948. +#include <linux/types.h> /* for __u?? */
  66949. +#include <linux/fs.h> /* for struct super_block */
  66950. +
  66951. +static __u64 reserved_for_gid(const struct super_block *super, gid_t gid);
  66952. +static __u64 reserved_for_uid(const struct super_block *super, uid_t uid);
  66953. +static __u64 reserved_for_root(const struct super_block *super);
  66954. +
  66955. +/* Return reiser4-specific part of super block */
  66956. +reiser4_super_info_data *get_super_private_nocheck(const struct super_block *super)
  66957. +{
  66958. + return (reiser4_super_info_data *) super->s_fs_info;
  66959. +}
  66960. +
  66961. +/* Return reiser4 fstype: value that is returned in ->f_type field by statfs()
  66962. + */
  66963. +long reiser4_statfs_type(const struct super_block *super UNUSED_ARG)
  66964. +{
  66965. + assert("nikita-448", super != NULL);
  66966. + assert("nikita-449", is_reiser4_super(super));
  66967. + return (long)REISER4_SUPER_MAGIC;
  66968. +}
  66969. +
  66970. +/* functions to read/modify fields of reiser4_super_info_data */
  66971. +
  66972. +/* get number of blocks in file system */
  66973. +__u64 reiser4_block_count(const struct super_block *super /* super block
  66974. + queried */ )
  66975. +{
  66976. + assert("vs-494", super != NULL);
  66977. + assert("vs-495", is_reiser4_super(super));
  66978. + return get_super_private(super)->block_count;
  66979. +}
  66980. +
  66981. +#if REISER4_DEBUG
  66982. +/*
  66983. + * number of blocks in the current file system
  66984. + */
  66985. +__u64 reiser4_current_block_count(void)
  66986. +{
  66987. + return get_current_super_private()->block_count;
  66988. +}
  66989. +#endif /* REISER4_DEBUG */
  66990. +
  66991. +/* set number of block in filesystem */
  66992. +void reiser4_set_block_count(const struct super_block *super, __u64 nr)
  66993. +{
  66994. + assert("vs-501", super != NULL);
  66995. + assert("vs-502", is_reiser4_super(super));
  66996. + get_super_private(super)->block_count = nr;
  66997. + /*
  66998. + * The proper calculation of the reserved space counter (%5 of device
  66999. + * block counter) we need a 64 bit division which is missing in Linux
  67000. + * on i386 platform. Because we do not need a precise calculation here
  67001. + * we can replace a div64 operation by this combination of
  67002. + * multiplication and shift: 51. / (2^10) == .0498 .
  67003. + * FIXME: this is a bug. It comes up only for very small filesystems
  67004. + * which probably are never used. Nevertheless, it is a bug. Number of
  67005. + * reserved blocks must be not less than maximal number of blocks which
  67006. + * get grabbed with BA_RESERVED.
  67007. + */
  67008. + get_super_private(super)->blocks_reserved = ((nr * 51) >> 10);
  67009. +}
  67010. +
  67011. +/* amount of blocks used (allocated for data) in file system */
  67012. +__u64 reiser4_data_blocks(const struct super_block *super /* super block
  67013. + queried */ )
  67014. +{
  67015. + assert("nikita-452", super != NULL);
  67016. + assert("nikita-453", is_reiser4_super(super));
  67017. + return get_super_private(super)->blocks_used;
  67018. +}
  67019. +
  67020. +/* set number of block used in filesystem */
  67021. +void reiser4_set_data_blocks(const struct super_block *super, __u64 nr)
  67022. +{
  67023. + assert("vs-503", super != NULL);
  67024. + assert("vs-504", is_reiser4_super(super));
  67025. + get_super_private(super)->blocks_used = nr;
  67026. +}
  67027. +
  67028. +/* amount of free blocks in file system */
  67029. +__u64 reiser4_free_blocks(const struct super_block *super /* super block
  67030. + queried */ )
  67031. +{
  67032. + assert("nikita-454", super != NULL);
  67033. + assert("nikita-455", is_reiser4_super(super));
  67034. + return get_super_private(super)->blocks_free;
  67035. +}
  67036. +
  67037. +/* set number of blocks free in filesystem */
  67038. +void reiser4_set_free_blocks(const struct super_block *super, __u64 nr)
  67039. +{
  67040. + assert("vs-505", super != NULL);
  67041. + assert("vs-506", is_reiser4_super(super));
  67042. + get_super_private(super)->blocks_free = nr;
  67043. +}
  67044. +
  67045. +/* get mkfs unique identifier */
  67046. +__u32 reiser4_mkfs_id(const struct super_block *super /* super block
  67047. + queried */ )
  67048. +{
  67049. + assert("vpf-221", super != NULL);
  67050. + assert("vpf-222", is_reiser4_super(super));
  67051. + return get_super_private(super)->mkfs_id;
  67052. +}
  67053. +
  67054. +/* amount of free blocks in file system */
  67055. +__u64 reiser4_free_committed_blocks(const struct super_block *super)
  67056. +{
  67057. + assert("vs-497", super != NULL);
  67058. + assert("vs-498", is_reiser4_super(super));
  67059. + return get_super_private(super)->blocks_free_committed;
  67060. +}
  67061. +
  67062. +/* amount of blocks in the file system reserved for @uid and @gid */
  67063. +long reiser4_reserved_blocks(const struct super_block *super /* super block
  67064. + queried */ ,
  67065. + uid_t uid /* user id */ ,
  67066. + gid_t gid/* group id */)
  67067. +{
  67068. + long reserved;
  67069. +
  67070. + assert("nikita-456", super != NULL);
  67071. + assert("nikita-457", is_reiser4_super(super));
  67072. +
  67073. + reserved = 0;
  67074. + if (REISER4_SUPPORT_GID_SPACE_RESERVATION)
  67075. + reserved += reserved_for_gid(super, gid);
  67076. + if (REISER4_SUPPORT_UID_SPACE_RESERVATION)
  67077. + reserved += reserved_for_uid(super, uid);
  67078. + if (REISER4_SUPPORT_ROOT_SPACE_RESERVATION && (uid == 0))
  67079. + reserved += reserved_for_root(super);
  67080. + return reserved;
  67081. +}
  67082. +
  67083. +/* get/set value of/to grabbed blocks counter */
  67084. +__u64 reiser4_grabbed_blocks(const struct super_block * super)
  67085. +{
  67086. + assert("zam-512", super != NULL);
  67087. + assert("zam-513", is_reiser4_super(super));
  67088. +
  67089. + return get_super_private(super)->blocks_grabbed;
  67090. +}
  67091. +
  67092. +__u64 reiser4_flush_reserved(const struct super_block *super)
  67093. +{
  67094. + assert("vpf-285", super != NULL);
  67095. + assert("vpf-286", is_reiser4_super(super));
  67096. +
  67097. + return get_super_private(super)->blocks_flush_reserved;
  67098. +}
  67099. +
  67100. +/* get/set value of/to counter of fake allocated formatted blocks */
  67101. +__u64 reiser4_fake_allocated(const struct super_block *super)
  67102. +{
  67103. + assert("zam-516", super != NULL);
  67104. + assert("zam-517", is_reiser4_super(super));
  67105. +
  67106. + return get_super_private(super)->blocks_fake_allocated;
  67107. +}
  67108. +
  67109. +/* get/set value of/to counter of fake allocated unformatted blocks */
  67110. +__u64 reiser4_fake_allocated_unformatted(const struct super_block *super)
  67111. +{
  67112. + assert("zam-516", super != NULL);
  67113. + assert("zam-517", is_reiser4_super(super));
  67114. +
  67115. + return get_super_private(super)->blocks_fake_allocated_unformatted;
  67116. +}
  67117. +
  67118. +/* get/set value of/to counter of clustered blocks */
  67119. +__u64 reiser4_clustered_blocks(const struct super_block *super)
  67120. +{
  67121. + assert("edward-601", super != NULL);
  67122. + assert("edward-602", is_reiser4_super(super));
  67123. +
  67124. + return get_super_private(super)->blocks_clustered;
  67125. +}
  67126. +
  67127. +/* space allocator used by this file system */
  67128. +reiser4_space_allocator * reiser4_get_space_allocator(const struct super_block
  67129. + *super)
  67130. +{
  67131. + assert("nikita-1965", super != NULL);
  67132. + assert("nikita-1966", is_reiser4_super(super));
  67133. + return &get_super_private(super)->space_allocator;
  67134. +}
  67135. +
  67136. +/* return fake inode used to bind formatted nodes in the page cache */
  67137. +struct inode *reiser4_get_super_fake(const struct super_block *super)
  67138. +{
  67139. + assert("nikita-1757", super != NULL);
  67140. + return get_super_private(super)->fake;
  67141. +}
  67142. +
  67143. +/* return fake inode used to bind copied on capture nodes in the page cache */
  67144. +struct inode *reiser4_get_cc_fake(const struct super_block *super)
  67145. +{
  67146. + assert("nikita-1757", super != NULL);
  67147. + return get_super_private(super)->cc;
  67148. +}
  67149. +
  67150. +/* return fake inode used to bind bitmaps and journlal heads */
  67151. +struct inode *reiser4_get_bitmap_fake(const struct super_block *super)
  67152. +{
  67153. + assert("nikita-17571", super != NULL);
  67154. + return get_super_private(super)->bitmap;
  67155. +}
  67156. +
  67157. +/* tree used by this file system */
  67158. +reiser4_tree *reiser4_get_tree(const struct super_block *super)
  67159. +{
  67160. + assert("nikita-460", super != NULL);
  67161. + assert("nikita-461", is_reiser4_super(super));
  67162. + return &get_super_private(super)->tree;
  67163. +}
  67164. +
  67165. +/* Check that @super is (looks like) reiser4 super block. This is mainly for
  67166. + use in assertions. */
  67167. +int is_reiser4_super(const struct super_block *super)
  67168. +{
  67169. + return
  67170. + super != NULL &&
  67171. + get_super_private(super) != NULL &&
  67172. + super->s_op == &(get_super_private(super)->ops.super);
  67173. +}
  67174. +
  67175. +int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f)
  67176. +{
  67177. + return test_bit((int)f, &get_super_private(super)->fs_flags);
  67178. +}
  67179. +
  67180. +/* amount of blocks reserved for given group in file system */
  67181. +static __u64 reserved_for_gid(const struct super_block *super UNUSED_ARG,
  67182. + gid_t gid UNUSED_ARG/* group id */)
  67183. +{
  67184. + return 0;
  67185. +}
  67186. +
  67187. +/* amount of blocks reserved for given user in file system */
  67188. +static __u64 reserved_for_uid(const struct super_block *super UNUSED_ARG,
  67189. + uid_t uid UNUSED_ARG/* user id */)
  67190. +{
  67191. + return 0;
  67192. +}
  67193. +
  67194. +/* amount of blocks reserved for super user in file system */
  67195. +static __u64 reserved_for_root(const struct super_block *super UNUSED_ARG)
  67196. +{
  67197. + return 0;
  67198. +}
  67199. +
  67200. +/*
  67201. + * true if block number @blk makes sense for the file system at @super.
  67202. + */
  67203. +int
  67204. +reiser4_blocknr_is_sane_for(const struct super_block *super,
  67205. + const reiser4_block_nr * blk)
  67206. +{
  67207. + reiser4_super_info_data *sbinfo;
  67208. +
  67209. + assert("nikita-2957", super != NULL);
  67210. + assert("nikita-2958", blk != NULL);
  67211. +
  67212. + if (reiser4_blocknr_is_fake(blk))
  67213. + return 1;
  67214. +
  67215. + sbinfo = get_super_private(super);
  67216. + return *blk < sbinfo->block_count;
  67217. +}
  67218. +
  67219. +#if REISER4_DEBUG
  67220. +/*
  67221. + * true, if block number @blk makes sense for the current file system
  67222. + */
  67223. +int reiser4_blocknr_is_sane(const reiser4_block_nr * blk)
  67224. +{
  67225. + return reiser4_blocknr_is_sane_for(reiser4_get_current_sb(), blk);
  67226. +}
  67227. +#endif /* REISER4_DEBUG */
  67228. +
  67229. +/* Make Linus happy.
  67230. + Local variables:
  67231. + c-indentation-style: "K&R"
  67232. + mode-name: "LC"
  67233. + c-basic-offset: 8
  67234. + tab-width: 8
  67235. + fill-column: 120
  67236. + End:
  67237. +*/
  67238. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/super.h linux-5.16.14/fs/reiser4/super.h
  67239. --- linux-5.16.14.orig/fs/reiser4/super.h 1970-01-01 01:00:00.000000000 +0100
  67240. +++ linux-5.16.14/fs/reiser4/super.h 2022-03-12 13:26:19.688892816 +0100
  67241. @@ -0,0 +1,456 @@
  67242. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  67243. + * reiser4/README */
  67244. +
  67245. +/* Super-block functions. See super.c for details. */
  67246. +
  67247. +#if !defined(__REISER4_SUPER_H__)
  67248. +#define __REISER4_SUPER_H__
  67249. +
  67250. +#include <linux/exportfs.h>
  67251. +
  67252. +#include "tree.h"
  67253. +#include "entd.h"
  67254. +#include "wander.h"
  67255. +#include "fsdata.h"
  67256. +#include "plugin/object.h"
  67257. +#include "plugin/space/space_allocator.h"
  67258. +
  67259. +/*
  67260. + * Flush algorithms parameters.
  67261. + */
  67262. +struct flush_params {
  67263. + unsigned relocate_threshold;
  67264. + unsigned relocate_distance;
  67265. + unsigned written_threshold;
  67266. + unsigned scan_maxnodes;
  67267. +};
  67268. +
  67269. +typedef enum {
  67270. + /*
  67271. + * True if this file system doesn't support hard-links (multiple names)
  67272. + * for directories: this is default UNIX behavior.
  67273. + *
  67274. + * If hard-links on directoires are not allowed, file system is Acyclic
  67275. + * Directed Graph (modulo dot, and dotdot, of course).
  67276. + *
  67277. + * This is used by reiser4_link().
  67278. + */
  67279. + REISER4_ADG = 0,
  67280. + /*
  67281. + * set if all nodes in internal tree have the same node layout plugin.
  67282. + * If so, znode_guess_plugin() will return tree->node_plugin in stead
  67283. + * of guessing plugin by plugin id stored in the node.
  67284. + */
  67285. + REISER4_ONE_NODE_PLUGIN = 1,
  67286. + /* if set, bsd gid assignment is supported. */
  67287. + REISER4_BSD_GID = 2,
  67288. + /* [mac]_time are 32 bit in inode */
  67289. + REISER4_32_BIT_TIMES = 3,
  67290. + /* load all bitmap blocks at mount time */
  67291. + REISER4_DONT_LOAD_BITMAP = 5,
  67292. + /* enforce atomicity during write(2) */
  67293. + REISER4_ATOMIC_WRITE = 6,
  67294. + /* enable issuing of discard requests */
  67295. + REISER4_DISCARD = 8,
  67296. + /* disable hole punching at flush time */
  67297. + REISER4_DONT_PUNCH_HOLES = 9
  67298. +} reiser4_fs_flag;
  67299. +
  67300. +/*
  67301. + * VFS related operation vectors.
  67302. + */
  67303. +struct object_ops {
  67304. + struct super_operations super;
  67305. + struct dentry_operations dentry;
  67306. + struct export_operations export;
  67307. +};
  67308. +
  67309. +/* reiser4-specific part of super block
  67310. +
  67311. + Locking
  67312. +
  67313. + Fields immutable after mount:
  67314. +
  67315. + ->oid*
  67316. + ->space*
  67317. + ->default_[ug]id
  67318. + ->mkfs_id
  67319. + ->trace_flags
  67320. + ->debug_flags
  67321. + ->fs_flags
  67322. + ->df_plug
  67323. + ->optimal_io_size
  67324. + ->plug
  67325. + ->flush
  67326. + ->u (bad name)
  67327. + ->txnmgr
  67328. + ->ra_params
  67329. + ->fsuid
  67330. + ->journal_header
  67331. + ->journal_footer
  67332. +
  67333. + Fields protected by ->lnode_guard
  67334. +
  67335. + ->lnode_htable
  67336. +
  67337. + Fields protected by per-super block spin lock
  67338. +
  67339. + ->block_count
  67340. + ->blocks_used
  67341. + ->blocks_free
  67342. + ->blocks_free_committed
  67343. + ->blocks_grabbed
  67344. + ->blocks_fake_allocated_unformatted
  67345. + ->blocks_fake_allocated
  67346. + ->blocks_flush_reserved
  67347. + ->eflushed
  67348. + ->blocknr_hint_default
  67349. +
  67350. + After journal replaying during mount,
  67351. +
  67352. + ->last_committed_tx
  67353. +
  67354. + is protected by ->tmgr.commit_mutex
  67355. +
  67356. + Invariants involving this data-type:
  67357. +
  67358. + [sb-block-counts]
  67359. + [sb-grabbed]
  67360. + [sb-fake-allocated]
  67361. +*/
  67362. +struct reiser4_super_info_data {
  67363. + /*
  67364. + * guard spinlock which protects reiser4 super block fields (currently
  67365. + * blocks_free, blocks_free_committed)
  67366. + */
  67367. + spinlock_t guard;
  67368. +
  67369. + /* next oid that will be returned by oid_allocate() */
  67370. + oid_t next_to_use;
  67371. + /* total number of used oids */
  67372. + oid_t oids_in_use;
  67373. +
  67374. + /* space manager plugin */
  67375. + reiser4_space_allocator space_allocator;
  67376. +
  67377. + /* transaction model */
  67378. + reiser4_txmod_id txmod;
  67379. +
  67380. + /* reiser4 internal tree */
  67381. + reiser4_tree tree;
  67382. +
  67383. + /*
  67384. + * default user id used for light-weight files without their own
  67385. + * stat-data.
  67386. + */
  67387. + __u32 default_uid;
  67388. +
  67389. + /*
  67390. + * default group id used for light-weight files without their own
  67391. + * stat-data.
  67392. + */
  67393. + __u32 default_gid;
  67394. +
  67395. + /* mkfs identifier generated at mkfs time. */
  67396. + __u32 mkfs_id;
  67397. + /* amount of blocks in a file system */
  67398. + __u64 block_count;
  67399. +
  67400. + /* inviolable reserve */
  67401. + __u64 blocks_reserved;
  67402. +
  67403. + /* amount of blocks used by file system data and meta-data. */
  67404. + __u64 blocks_used;
  67405. +
  67406. + /*
  67407. + * amount of free blocks. This is "working" free blocks counter. It is
  67408. + * like "working" bitmap, please see block_alloc.c for description.
  67409. + */
  67410. + __u64 blocks_free;
  67411. +
  67412. + /*
  67413. + * free block count for fs committed state. This is "commit" version of
  67414. + * free block counter.
  67415. + */
  67416. + __u64 blocks_free_committed;
  67417. +
  67418. + /*
  67419. + * number of blocks reserved for further allocation, for all
  67420. + * threads.
  67421. + */
  67422. + __u64 blocks_grabbed;
  67423. +
  67424. + /* number of fake allocated unformatted blocks in tree. */
  67425. + __u64 blocks_fake_allocated_unformatted;
  67426. +
  67427. + /* number of fake allocated formatted blocks in tree. */
  67428. + __u64 blocks_fake_allocated;
  67429. +
  67430. + /* number of blocks reserved for flush operations. */
  67431. + __u64 blocks_flush_reserved;
  67432. +
  67433. + /* number of blocks reserved for cluster operations. */
  67434. + __u64 blocks_clustered;
  67435. +
  67436. + /* unique file-system identifier */
  67437. + __u32 fsuid;
  67438. +
  67439. + /* On-disk format version. If does not equal to the disk_format
  67440. + plugin version, some format updates (e.g. enlarging plugin
  67441. + set, etc) may have place on mount. */
  67442. + int version;
  67443. +
  67444. + /* file-system wide flags. See reiser4_fs_flag enum */
  67445. + unsigned long fs_flags;
  67446. +
  67447. + /* transaction manager */
  67448. + txn_mgr tmgr;
  67449. +
  67450. + /* ent thread */
  67451. + entd_context entd;
  67452. +
  67453. + /* fake inode used to bind formatted nodes */
  67454. + struct inode *fake;
  67455. + /* inode used to bind bitmaps (and journal heads) */
  67456. + struct inode *bitmap;
  67457. + /* inode used to bind copied on capture nodes */
  67458. + struct inode *cc;
  67459. +
  67460. + /* disk layout plugin */
  67461. + disk_format_plugin *df_plug;
  67462. +
  67463. + /* disk layout specific part of reiser4 super info data */
  67464. + union {
  67465. + format40_super_info format40;
  67466. + } u;
  67467. +
  67468. + /* value we return in st_blksize on stat(2) */
  67469. + unsigned long optimal_io_size;
  67470. +
  67471. + /* parameters for the flush algorithm */
  67472. + struct flush_params flush;
  67473. +
  67474. + /* pointers to jnodes for journal header and footer */
  67475. + jnode *journal_header;
  67476. + jnode *journal_footer;
  67477. +
  67478. + journal_location jloc;
  67479. +
  67480. + /* head block number of last committed transaction */
  67481. + __u64 last_committed_tx;
  67482. +
  67483. + /*
  67484. + * we remember last written location for using as a hint for new block
  67485. + * allocation
  67486. + */
  67487. + __u64 blocknr_hint_default;
  67488. +
  67489. + /* committed number of files (oid allocator state variable ) */
  67490. + __u64 nr_files_committed;
  67491. +
  67492. + struct formatted_ra_params ra_params;
  67493. +
  67494. + /*
  67495. + * A mutex for serializing cut tree operation if out-of-free-space:
  67496. + * the only one cut_tree thread is allowed to grab space from reserved
  67497. + * area (it is 5% of disk space)
  67498. + */
  67499. + struct mutex delete_mutex;
  67500. + /* task owning ->delete_mutex */
  67501. + struct task_struct *delete_mutex_owner;
  67502. +
  67503. + /* Diskmap's blocknumber */
  67504. + __u64 diskmap_block;
  67505. +
  67506. + /* What to do in case of error */
  67507. + int onerror;
  67508. +
  67509. + /* operations for objects on this file system */
  67510. + struct object_ops ops;
  67511. +
  67512. + /*
  67513. + * structure to maintain d_cursors. See plugin/file_ops_readdir.c for
  67514. + * more details
  67515. + */
  67516. + struct d_cursor_info d_info;
  67517. + struct crypto_shash *csum_tfm;
  67518. +
  67519. +#ifdef CONFIG_REISER4_BADBLOCKS
  67520. + /* Alternative master superblock offset (in bytes) */
  67521. + unsigned long altsuper;
  67522. +#endif
  67523. + struct repacker *repacker;
  67524. + struct page *status_page;
  67525. + struct bio *status_bio;
  67526. +
  67527. +#if REISER4_DEBUG
  67528. + /*
  67529. + * minimum used blocks value (includes super blocks, bitmap blocks and
  67530. + * other fs reserved areas), depends on fs format and fs size.
  67531. + */
  67532. + __u64 min_blocks_used;
  67533. +
  67534. + /*
  67535. + * when debugging is on, all jnodes (including znodes, bitmaps, etc.)
  67536. + * are kept on a list anchored at sbinfo->all_jnodes. This list is
  67537. + * protected by sbinfo->all_guard spin lock. This lock should be taken
  67538. + * with _irq modifier, because it is also modified from interrupt
  67539. + * contexts (by RCU).
  67540. + */
  67541. + spinlock_t all_guard;
  67542. + /* list of all jnodes */
  67543. + struct list_head all_jnodes;
  67544. +#endif
  67545. + struct dentry *debugfs_root;
  67546. +};
  67547. +
  67548. +extern reiser4_super_info_data *get_super_private_nocheck(const struct
  67549. + super_block * super);
  67550. +
  67551. +/* Return reiser4-specific part of super block */
  67552. +static inline reiser4_super_info_data *get_super_private(const struct
  67553. + super_block * super)
  67554. +{
  67555. + assert("nikita-447", super != NULL);
  67556. +
  67557. + return (reiser4_super_info_data *) super->s_fs_info;
  67558. +}
  67559. +
  67560. +/* get ent context for the @super */
  67561. +static inline entd_context *get_entd_context(struct super_block *super)
  67562. +{
  67563. + return &get_super_private(super)->entd;
  67564. +}
  67565. +
  67566. +/* "Current" super-block: main super block used during current system
  67567. + call. Reference to this super block is stored in reiser4_context. */
  67568. +static inline struct super_block *reiser4_get_current_sb(void)
  67569. +{
  67570. + return get_current_context()->super;
  67571. +}
  67572. +
  67573. +/* Reiser4-specific part of "current" super-block: main super block used
  67574. + during current system call. Reference to this super block is stored in
  67575. + reiser4_context. */
  67576. +static inline reiser4_super_info_data *get_current_super_private(void)
  67577. +{
  67578. + return get_super_private(reiser4_get_current_sb());
  67579. +}
  67580. +
  67581. +static inline struct formatted_ra_params *get_current_super_ra_params(void)
  67582. +{
  67583. + return &(get_current_super_private()->ra_params);
  67584. +}
  67585. +
  67586. +/*
  67587. + * true, if @tree represents read-only file system
  67588. + */
  67589. +static inline int rofs_tree(reiser4_tree * tree)
  67590. +{
  67591. + return sb_rdonly(tree->super);
  67592. +}
  67593. +
  67594. +/*
  67595. + * true, if file system where @node lives on, is read-only
  67596. + */
  67597. +static inline int rofs_jnode(jnode * node)
  67598. +{
  67599. + return rofs_tree(jnode_get_tree(node));
  67600. +}
  67601. +
  67602. +extern __u64 reiser4_current_block_count(void);
  67603. +
  67604. +extern void build_object_ops(struct super_block *super, struct object_ops *ops);
  67605. +
  67606. +#define REISER4_SUPER_MAGIC 0x52345362 /* (*(__u32 *)"R4Sb"); */
  67607. +
  67608. +static inline void spin_lock_reiser4_super(reiser4_super_info_data *sbinfo)
  67609. +{
  67610. + spin_lock(&(sbinfo->guard));
  67611. +}
  67612. +
  67613. +static inline void spin_unlock_reiser4_super(reiser4_super_info_data *sbinfo)
  67614. +{
  67615. + assert_spin_locked(&(sbinfo->guard));
  67616. + spin_unlock(&(sbinfo->guard));
  67617. +}
  67618. +
  67619. +extern __u64 reiser4_flush_reserved(const struct super_block *);
  67620. +extern int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f);
  67621. +extern long reiser4_statfs_type(const struct super_block *super);
  67622. +extern __u64 reiser4_block_count(const struct super_block *super);
  67623. +extern void reiser4_set_block_count(const struct super_block *super, __u64 nr);
  67624. +extern __u64 reiser4_data_blocks(const struct super_block *super);
  67625. +extern void reiser4_set_data_blocks(const struct super_block *super, __u64 nr);
  67626. +extern __u64 reiser4_free_blocks(const struct super_block *super);
  67627. +extern void reiser4_set_free_blocks(const struct super_block *super, __u64 nr);
  67628. +extern __u32 reiser4_mkfs_id(const struct super_block *super);
  67629. +
  67630. +extern __u64 reiser4_free_committed_blocks(const struct super_block *super);
  67631. +
  67632. +extern __u64 reiser4_grabbed_blocks(const struct super_block *);
  67633. +extern __u64 reiser4_fake_allocated(const struct super_block *);
  67634. +extern __u64 reiser4_fake_allocated_unformatted(const struct super_block *);
  67635. +extern __u64 reiser4_clustered_blocks(const struct super_block *);
  67636. +
  67637. +extern long reiser4_reserved_blocks(const struct super_block *super, uid_t uid,
  67638. + gid_t gid);
  67639. +
  67640. +extern reiser4_space_allocator *
  67641. +reiser4_get_space_allocator(const struct super_block *super);
  67642. +extern reiser4_oid_allocator *
  67643. +reiser4_get_oid_allocator(const struct super_block *super);
  67644. +extern struct inode *reiser4_get_super_fake(const struct super_block *super);
  67645. +extern struct inode *reiser4_get_cc_fake(const struct super_block *super);
  67646. +extern struct inode *reiser4_get_bitmap_fake(const struct super_block *super);
  67647. +extern reiser4_tree *reiser4_get_tree(const struct super_block *super);
  67648. +extern int is_reiser4_super(const struct super_block *super);
  67649. +
  67650. +extern int reiser4_blocknr_is_sane(const reiser4_block_nr * blk);
  67651. +extern int reiser4_blocknr_is_sane_for(const struct super_block *super,
  67652. + const reiser4_block_nr * blk);
  67653. +extern int reiser4_fill_super(struct super_block *s, void *data, int silent);
  67654. +extern int reiser4_done_super(struct super_block *s);
  67655. +
  67656. +/* step of fill super */
  67657. +extern int reiser4_init_fs_info(struct super_block *);
  67658. +extern void reiser4_done_fs_info(struct super_block *);
  67659. +extern int reiser4_init_super_data(struct super_block *, char *opt_string);
  67660. +extern int reiser4_init_read_super(struct super_block *, int silent);
  67661. +extern int reiser4_init_root_inode(struct super_block *);
  67662. +extern reiser4_plugin *get_default_plugin(pset_member memb);
  67663. +
  67664. +/* Maximal possible object id. */
  67665. +#define ABSOLUTE_MAX_OID ((oid_t)~0)
  67666. +
  67667. +#define OIDS_RESERVED (1 << 16)
  67668. +int oid_init_allocator(struct super_block *, oid_t nr_files, oid_t next);
  67669. +oid_t oid_allocate(struct super_block *);
  67670. +int oid_release(struct super_block *, oid_t);
  67671. +oid_t oid_next(const struct super_block *);
  67672. +void oid_count_allocated(void);
  67673. +void oid_count_released(void);
  67674. +long oids_used(const struct super_block *);
  67675. +
  67676. +#if REISER4_DEBUG
  67677. +void print_fs_info(const char *prefix, const struct super_block *);
  67678. +#endif
  67679. +
  67680. +extern void destroy_reiser4_cache(struct kmem_cache **);
  67681. +
  67682. +extern struct super_operations reiser4_super_operations;
  67683. +extern struct export_operations reiser4_export_operations;
  67684. +extern struct dentry_operations reiser4_dentry_operations;
  67685. +
  67686. +/* __REISER4_SUPER_H__ */
  67687. +#endif
  67688. +
  67689. +/*
  67690. + * Local variables:
  67691. + * c-indentation-style: "K&R"
  67692. + * mode-name: "LC"
  67693. + * c-basic-offset: 8
  67694. + * tab-width: 8
  67695. + * fill-column: 120
  67696. + * End:
  67697. + */
  67698. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/super_ops.c linux-5.16.14/fs/reiser4/super_ops.c
  67699. --- linux-5.16.14.orig/fs/reiser4/super_ops.c 1970-01-01 01:00:00.000000000 +0100
  67700. +++ linux-5.16.14/fs/reiser4/super_ops.c 2022-03-12 13:26:19.688892816 +0100
  67701. @@ -0,0 +1,789 @@
  67702. +/* Copyright 2005 by Hans Reiser, licensing governed by
  67703. + * reiser4/README */
  67704. +
  67705. +#include "inode.h"
  67706. +#include "page_cache.h"
  67707. +#include "ktxnmgrd.h"
  67708. +#include "flush.h"
  67709. +#include "safe_link.h"
  67710. +#include "checksum.h"
  67711. +
  67712. +#include <linux/vfs.h>
  67713. +#include <linux/writeback.h>
  67714. +#include <linux/mount.h>
  67715. +#include <linux/seq_file.h>
  67716. +#include <linux/debugfs.h>
  67717. +#include <linux/backing-dev.h>
  67718. +#include <linux/module.h>
  67719. +
  67720. +/* slab cache for inodes */
  67721. +static struct kmem_cache *inode_cache;
  67722. +
  67723. +static struct dentry *reiser4_debugfs_root = NULL;
  67724. +
  67725. +/**
  67726. + * init_once - constructor for reiser4 inodes
  67727. + * @cache: cache @obj belongs to
  67728. + * @obj: inode to be initialized
  67729. + *
  67730. + * Initialization function to be called when new page is allocated by reiser4
  67731. + * inode cache. It is set on inode cache creation.
  67732. + */
  67733. +static void init_once(void *obj)
  67734. +{
  67735. + struct reiser4_inode_object *info;
  67736. +
  67737. + info = obj;
  67738. +
  67739. + /* initialize vfs inode */
  67740. + inode_init_once(&info->vfs_inode);
  67741. +
  67742. + /*
  67743. + * initialize reiser4 specific part fo inode.
  67744. + * NOTE-NIKITA add here initializations for locks, list heads,
  67745. + * etc. that will be added to our private inode part.
  67746. + */
  67747. + INIT_LIST_HEAD(get_readdir_list(&info->vfs_inode));
  67748. + init_rwsem(&info->p.conv_sem);
  67749. + /* init semaphore which is used during inode loading */
  67750. + loading_init_once(&info->p);
  67751. + INIT_RADIX_TREE(jnode_tree_by_reiser4_inode(&info->p),
  67752. + GFP_ATOMIC);
  67753. +#if REISER4_DEBUG
  67754. + info->p.nr_jnodes = 0;
  67755. +#endif
  67756. +}
  67757. +
  67758. +/**
  67759. + * init_inodes - create znode cache
  67760. + *
  67761. + * Initializes slab cache of inodes. It is part of reiser4 module initialization
  67762. + */
  67763. +static int init_inodes(void)
  67764. +{
  67765. + inode_cache = kmem_cache_create("reiser4_inode",
  67766. + sizeof(struct reiser4_inode_object),
  67767. + 0,
  67768. + SLAB_HWCACHE_ALIGN |
  67769. + SLAB_RECLAIM_ACCOUNT, init_once);
  67770. + if (inode_cache == NULL)
  67771. + return RETERR(-ENOMEM);
  67772. + return 0;
  67773. +}
  67774. +
  67775. +/**
  67776. + * done_inodes - delete inode cache
  67777. + *
  67778. + * This is called on reiser4 module unloading or system shutdown.
  67779. + */
  67780. +static void done_inodes(void)
  67781. +{
  67782. + destroy_reiser4_cache(&inode_cache);
  67783. +}
  67784. +
  67785. +/**
  67786. + * reiser4_alloc_inode - alloc_inode of super operations
  67787. + * @super: super block new inode is allocated for
  67788. + *
  67789. + * Allocates new inode, initializes reiser4 specific part of it.
  67790. + */
  67791. +static struct inode *reiser4_alloc_inode(struct super_block *super)
  67792. +{
  67793. + struct reiser4_inode_object *obj;
  67794. +
  67795. + assert("nikita-1696", super != NULL);
  67796. + obj = kmem_cache_alloc(inode_cache, reiser4_ctx_gfp_mask_get());
  67797. + if (obj != NULL) {
  67798. + reiser4_inode *info;
  67799. +
  67800. + info = &obj->p;
  67801. +
  67802. + info->pset = plugin_set_get_empty();
  67803. + info->hset = plugin_set_get_empty();
  67804. + info->extmask = 0;
  67805. + info->locality_id = 0ull;
  67806. + info->plugin_mask = 0;
  67807. + info->heir_mask = 0;
  67808. +#if !REISER4_INO_IS_OID
  67809. + info->oid_hi = 0;
  67810. +#endif
  67811. + reiser4_seal_init(&info->sd_seal, NULL, NULL);
  67812. + coord_init_invalid(&info->sd_coord, NULL);
  67813. + info->flags = 0;
  67814. + spin_lock_init(&info->guard);
  67815. + /* this deals with info's loading semaphore */
  67816. + loading_alloc(info);
  67817. + info->vroot = UBER_TREE_ADDR;
  67818. + return &obj->vfs_inode;
  67819. + } else
  67820. + return NULL;
  67821. +}
  67822. +
  67823. +/**
  67824. + * reiser4_destroy_inode - destroy_inode of super operations
  67825. + * @inode: inode being destroyed
  67826. + *
  67827. + * Puts reiser4 specific portion of inode, frees memory occupied by inode.
  67828. + */
  67829. +static void reiser4_destroy_inode(struct inode *inode)
  67830. +{
  67831. + reiser4_inode *info;
  67832. +
  67833. + info = reiser4_inode_data(inode);
  67834. +
  67835. + assert("vs-1220", inode_has_no_jnodes(info));
  67836. +
  67837. + if (!is_bad_inode(inode) && is_inode_loaded(inode)) {
  67838. + file_plugin *fplug = inode_file_plugin(inode);
  67839. + if (fplug->destroy_inode != NULL)
  67840. + fplug->destroy_inode(inode);
  67841. + }
  67842. + reiser4_dispose_cursors(inode);
  67843. + if (info->pset)
  67844. + plugin_set_put(info->pset);
  67845. + if (info->hset)
  67846. + plugin_set_put(info->hset);
  67847. +
  67848. + /*
  67849. + * cannot add similar assertion about ->i_list as prune_icache return
  67850. + * inode into slab with dangling ->list.{next,prev}. This is safe,
  67851. + * because they are re-initialized in the new_inode().
  67852. + */
  67853. + assert("nikita-2895", hlist_empty(&inode->i_dentry));
  67854. + assert("nikita-2896", hlist_unhashed(&inode->i_hash));
  67855. + assert("nikita-2898", list_empty_careful(get_readdir_list(inode)));
  67856. +
  67857. + /* this deals with info's loading semaphore */
  67858. + loading_destroy(info);
  67859. +
  67860. + kmem_cache_free(inode_cache,
  67861. + container_of(info, struct reiser4_inode_object, p));
  67862. +}
  67863. +
  67864. +/**
  67865. + * reiser4_dirty_inode - dirty_inode of super operations
  67866. + * @inode: inode being dirtied
  67867. + *
  67868. + * Updates stat data.
  67869. + */
  67870. +static void reiser4_dirty_inode(struct inode *inode, int flags)
  67871. +{
  67872. + int result;
  67873. + reiser4_context *ctx;
  67874. +
  67875. + if (!is_in_reiser4_context())
  67876. + return;
  67877. + ctx = get_current_context();
  67878. +
  67879. + if (ctx->ro) {
  67880. + warning("edward-2200",
  67881. + "failed to make inode %llu dirty (read-only FS)",
  67882. + (unsigned long long)get_inode_oid(inode));
  67883. + return;
  67884. + }
  67885. + assert("edward-1606", !IS_RDONLY(inode));
  67886. + assert("edward-1607",
  67887. + (inode_file_plugin(inode)->estimate.update(inode) <=
  67888. + ctx->grabbed_blocks));
  67889. +
  67890. + if (ctx->locked_page)
  67891. + unlock_page(ctx->locked_page);
  67892. +
  67893. + result = reiser4_update_sd(inode);
  67894. +
  67895. + if (ctx->locked_page)
  67896. + lock_page(ctx->locked_page);
  67897. + if (result)
  67898. + warning("edward-1605", "failed to dirty inode for %llu: %d",
  67899. + get_inode_oid(inode), result);
  67900. +}
  67901. +
  67902. +/**
  67903. + * ->evict_inode() of super operations
  67904. + * @inode: inode to delete
  67905. + *
  67906. + * Calls file plugin's delete_object method to delete object items from
  67907. + * filesystem tree and calls clear_inode().
  67908. + */
  67909. +static void reiser4_evict_inode(struct inode *inode)
  67910. +{
  67911. + reiser4_context *ctx;
  67912. + file_plugin *fplug;
  67913. +
  67914. + ctx = reiser4_init_context(inode->i_sb);
  67915. + if (IS_ERR(ctx)) {
  67916. + warning("vs-15", "failed to init context");
  67917. + return;
  67918. + }
  67919. +
  67920. + if (inode->i_nlink == 0 && is_inode_loaded(inode)) {
  67921. + fplug = inode_file_plugin(inode);
  67922. + if (fplug != NULL && fplug->delete_object != NULL)
  67923. + fplug->delete_object(inode);
  67924. + }
  67925. +
  67926. + truncate_inode_pages_final(&inode->i_data);
  67927. + inode->i_blocks = 0;
  67928. + clear_inode(inode);
  67929. + reiser4_exit_context(ctx);
  67930. +}
  67931. +
  67932. +/**
  67933. + * reiser4_put_super - put_super of super operations
  67934. + * @super: super block to free
  67935. + *
  67936. + * Stops daemons, release resources, umounts in short.
  67937. + */
  67938. +static void reiser4_put_super(struct super_block *super)
  67939. +{
  67940. + reiser4_super_info_data *sbinfo;
  67941. + reiser4_context *ctx;
  67942. +
  67943. + sbinfo = get_super_private(super);
  67944. + assert("vs-1699", sbinfo);
  67945. +
  67946. + debugfs_remove(sbinfo->tmgr.debugfs_atom_count);
  67947. + debugfs_remove(sbinfo->tmgr.debugfs_id_count);
  67948. + debugfs_remove(sbinfo->debugfs_root);
  67949. +
  67950. + ctx = reiser4_init_context(super);
  67951. + if (IS_ERR(ctx)) {
  67952. + warning("vs-17", "failed to init context");
  67953. + return;
  67954. + }
  67955. +
  67956. + /* have disk format plugin to free its resources */
  67957. + if (get_super_private(super)->df_plug->release)
  67958. + get_super_private(super)->df_plug->release(super);
  67959. +
  67960. + reiser4_done_formatted_fake(super);
  67961. + reiser4_done_csum_tfm(sbinfo->csum_tfm);
  67962. +
  67963. + /* stop daemons: ktxnmgr and entd */
  67964. + reiser4_done_entd(super);
  67965. + reiser4_done_ktxnmgrd(super);
  67966. + reiser4_done_txnmgr(&sbinfo->tmgr);
  67967. +
  67968. + assert("edward-1890", list_empty(&get_super_private(super)->all_jnodes));
  67969. + assert("edward-1891", get_current_context()->trans->atom == NULL);
  67970. + reiser4_check_block_counters(super);
  67971. +
  67972. + reiser4_exit_context(ctx);
  67973. + reiser4_done_fs_info(super);
  67974. +}
  67975. +
  67976. +/**
  67977. + * reiser4_statfs - statfs of super operations
  67978. + * @super: super block of file system in queried
  67979. + * @stafs: buffer to fill with statistics
  67980. + *
  67981. + * Returns information about filesystem.
  67982. + */
  67983. +static int reiser4_statfs(struct dentry *dentry, struct kstatfs *statfs)
  67984. +{
  67985. + sector_t total;
  67986. + sector_t reserved;
  67987. + sector_t free;
  67988. + sector_t forroot;
  67989. + sector_t deleted;
  67990. + reiser4_context *ctx;
  67991. + struct super_block *super = dentry->d_sb;
  67992. +
  67993. + assert("nikita-408", super != NULL);
  67994. + assert("nikita-409", statfs != NULL);
  67995. +
  67996. + ctx = reiser4_init_context(super);
  67997. + if (IS_ERR(ctx))
  67998. + return PTR_ERR(ctx);
  67999. +
  68000. + statfs->f_type = reiser4_statfs_type(super);
  68001. + statfs->f_bsize = super->s_blocksize;
  68002. +
  68003. + /*
  68004. + * 5% of total block space is reserved. This is needed for flush and
  68005. + * for truncates (so that we are able to perform truncate/unlink even
  68006. + * on the otherwise completely full file system). If this reservation
  68007. + * is hidden from statfs(2), users will mistakenly guess that they
  68008. + * have enough free space to complete some operation, which is
  68009. + * frustrating.
  68010. + *
  68011. + * Another possible solution is to subtract ->blocks_reserved from
  68012. + * ->f_bfree, but changing available space seems less intrusive than
  68013. + * letting user to see 5% of disk space to be used directly after
  68014. + * mkfs.
  68015. + */
  68016. + total = reiser4_block_count(super);
  68017. + reserved = get_super_private(super)->blocks_reserved;
  68018. + deleted = txnmgr_count_deleted_blocks();
  68019. + free = reiser4_free_blocks(super) + deleted;
  68020. + forroot = reiser4_reserved_blocks(super, 0, 0);
  68021. +
  68022. + /*
  68023. + * These counters may be in inconsistent state because we take the
  68024. + * values without keeping any global spinlock. Here we do a sanity
  68025. + * check that free block counter does not exceed the number of all
  68026. + * blocks.
  68027. + */
  68028. + if (free > total)
  68029. + free = total;
  68030. + statfs->f_blocks = total - reserved;
  68031. + /* make sure statfs->f_bfree is never larger than statfs->f_blocks */
  68032. + if (free > reserved)
  68033. + free -= reserved;
  68034. + else
  68035. + free = 0;
  68036. + statfs->f_bfree = free;
  68037. +
  68038. + if (free > forroot)
  68039. + free -= forroot;
  68040. + else
  68041. + free = 0;
  68042. + statfs->f_bavail = free;
  68043. +
  68044. + statfs->f_files = 0;
  68045. + statfs->f_ffree = 0;
  68046. +
  68047. + /* maximal acceptable name length depends on directory plugin. */
  68048. + assert("nikita-3351", super->s_root->d_inode != NULL);
  68049. + statfs->f_namelen = reiser4_max_filename_len(super->s_root->d_inode);
  68050. + reiser4_exit_context(ctx);
  68051. + return 0;
  68052. +}
  68053. +
  68054. +/**
  68055. + * reiser4_writeback_inodes - writeback_inodes of super operations
  68056. + * @super:
  68057. + * @wb:
  68058. + * @wbc:
  68059. + *
  68060. + * This method is called by background and non-backgound writeback.
  68061. + * Reiser4's implementation uses generic_writeback_sb_inodes to call
  68062. + * reiser4_writepages_dispatch for each of dirty inodes.
  68063. + * reiser4_writepages_dispatch handles pages dirtied via shared
  68064. + * mapping - dirty pages get into atoms. Writeout is called to flush
  68065. + * some atoms.
  68066. + */
  68067. +static long reiser4_writeback_inodes(struct super_block *super,
  68068. + struct bdi_writeback *wb,
  68069. + struct writeback_control *wbc,
  68070. + struct wb_writeback_work *work,
  68071. + bool flush_all)
  68072. +{
  68073. + long result;
  68074. + reiser4_context *ctx;
  68075. +
  68076. + if (wbc->for_kupdate)
  68077. + /* reiser4 has its own means of periodical write-out */
  68078. + goto skip;
  68079. +
  68080. + spin_unlock(&wb->list_lock);
  68081. + ctx = reiser4_init_context(super);
  68082. + if (IS_ERR(ctx)) {
  68083. + warning("vs-13", "failed to init context");
  68084. + spin_lock(&wb->list_lock);
  68085. + goto skip;
  68086. + }
  68087. + ctx->flush_bd_task = 1;
  68088. + /*
  68089. + * call reiser4_writepages for each of dirty inodes to turn
  68090. + * dirty pages into transactions if they were not yet.
  68091. + */
  68092. + spin_lock(&wb->list_lock);
  68093. + result = generic_writeback_sb_inodes(super, wb, wbc, work, flush_all);
  68094. + spin_unlock(&wb->list_lock);
  68095. +
  68096. + if (result <= 0)
  68097. + goto exit;
  68098. + wbc->nr_to_write = result;
  68099. +
  68100. + /* flush goes here */
  68101. + reiser4_writeout(super, wbc);
  68102. + exit:
  68103. + /* avoid recursive calls to ->writeback_inodes */
  68104. + context_set_commit_async(ctx);
  68105. + reiser4_exit_context(ctx);
  68106. + spin_lock(&wb->list_lock);
  68107. +
  68108. + return result;
  68109. + skip:
  68110. + writeback_skip_sb_inodes(super, wb);
  68111. + return 0;
  68112. +}
  68113. +
  68114. +/* ->sync_fs() of super operations */
  68115. +static int reiser4_sync_fs(struct super_block *super, int wait)
  68116. +{
  68117. + reiser4_context *ctx;
  68118. + struct bdi_writeback *wb;
  68119. + struct wb_writeback_work work = {
  68120. + .sb = super,
  68121. + .sync_mode = WB_SYNC_ALL,
  68122. + .range_cyclic = 0,
  68123. + .nr_pages = LONG_MAX,
  68124. + .reason = WB_REASON_SYNC,
  68125. + .for_sync = 1,
  68126. + };
  68127. + struct writeback_control wbc = {
  68128. + .sync_mode = work.sync_mode,
  68129. + .range_cyclic = work.range_cyclic,
  68130. + .range_start = 0,
  68131. + .range_end = LLONG_MAX,
  68132. + };
  68133. + ctx = reiser4_init_context(super);
  68134. + if (IS_ERR(ctx)) {
  68135. + warning("edward-1567", "failed to init context");
  68136. + return PTR_ERR(ctx);
  68137. + }
  68138. + /*
  68139. + * We don't capture superblock here.
  68140. + * Superblock is captured only by operations, which change
  68141. + * its fields different from free_blocks, nr_files, next_oid.
  68142. + * After system crash the mentioned fields are recovered from
  68143. + * journal records, see reiser4_journal_recover_sb_data().
  68144. + * Also superblock is captured at final commit when releasing
  68145. + * disk format.
  68146. + */
  68147. + wb = &inode_to_bdi(reiser4_get_super_fake(super))->wb;
  68148. + spin_lock(&wb->list_lock);
  68149. + generic_writeback_sb_inodes(super, wb, &wbc, &work, true);
  68150. + spin_unlock(&wb->list_lock);
  68151. + wbc.nr_to_write = LONG_MAX;
  68152. + /*
  68153. + * (flush goes here)
  68154. + * commit all transactions
  68155. + */
  68156. + reiser4_writeout(super, &wbc);
  68157. +
  68158. + reiser4_exit_context(ctx);
  68159. + return 0;
  68160. +}
  68161. +
  68162. +static int reiser4_remount(struct super_block *s, int *mount_flags, char *arg)
  68163. +{
  68164. + sync_filesystem(s);
  68165. + return 0;
  68166. +}
  68167. +
  68168. +/**
  68169. + * reiser4_show_options - show_options of super operations
  68170. + * @m: file where to write information
  68171. + * @mnt: mount structure
  68172. + *
  68173. + * Makes reiser4 mount options visible in /proc/mounts.
  68174. + */
  68175. +static int reiser4_show_options(struct seq_file *m, struct dentry *dentry)
  68176. +{
  68177. + struct super_block *super;
  68178. + reiser4_super_info_data *sbinfo;
  68179. +
  68180. + super = dentry->d_sb;
  68181. + sbinfo = get_super_private(super);
  68182. +
  68183. + seq_printf(m, ",atom_max_size=0x%x", sbinfo->tmgr.atom_max_size);
  68184. + seq_printf(m, ",atom_max_age=0x%x", sbinfo->tmgr.atom_max_age);
  68185. + seq_printf(m, ",atom_min_size=0x%x", sbinfo->tmgr.atom_min_size);
  68186. + seq_printf(m, ",atom_max_flushers=0x%x",
  68187. + sbinfo->tmgr.atom_max_flushers);
  68188. + seq_printf(m, ",cbk_cache_slots=0x%x",
  68189. + sbinfo->tree.cbk_cache.nr_slots);
  68190. +
  68191. + return 0;
  68192. +}
  68193. +
  68194. +struct super_operations reiser4_super_operations = {
  68195. + .alloc_inode = reiser4_alloc_inode,
  68196. + .destroy_inode = reiser4_destroy_inode,
  68197. + .dirty_inode = reiser4_dirty_inode,
  68198. + .evict_inode = reiser4_evict_inode,
  68199. + .put_super = reiser4_put_super,
  68200. + .sync_fs = reiser4_sync_fs,
  68201. + .statfs = reiser4_statfs,
  68202. + .remount_fs = reiser4_remount,
  68203. + .writeback_inodes = reiser4_writeback_inodes,
  68204. + .show_options = reiser4_show_options
  68205. +};
  68206. +
  68207. +/**
  68208. + * fill_super - initialize super block on mount
  68209. + * @super: super block to fill
  68210. + * @data: reiser4 specific mount option
  68211. + * @silent:
  68212. + *
  68213. + * This is to be called by reiser4_get_sb. Mounts filesystem.
  68214. + */
  68215. +static int fill_super(struct super_block *super, void *data, int silent)
  68216. +{
  68217. + reiser4_context ctx;
  68218. + int result;
  68219. + reiser4_super_info_data *sbinfo;
  68220. +
  68221. + assert("zam-989", super != NULL);
  68222. +
  68223. + super->s_op = NULL;
  68224. + init_stack_context(&ctx, super);
  68225. +
  68226. + /* allocate reiser4 specific super block */
  68227. + if ((result = reiser4_init_fs_info(super)) != 0)
  68228. + goto failed_init_sinfo;
  68229. +
  68230. + sbinfo = get_super_private(super);
  68231. +
  68232. + if ((result = reiser4_init_csum_tfm(&sbinfo->csum_tfm)) != 0)
  68233. + goto failed_init_csum_tfm;
  68234. +
  68235. + /* initialize various reiser4 parameters, parse mount options */
  68236. + if ((result = reiser4_init_super_data(super, data)) != 0)
  68237. + goto failed_init_super_data;
  68238. +
  68239. + /* read reiser4 master super block, initialize disk format plugin */
  68240. + if ((result = reiser4_init_read_super(super, silent)) != 0)
  68241. + goto failed_init_read_super;
  68242. +
  68243. + /* initialize transaction manager */
  68244. + reiser4_init_txnmgr(&sbinfo->tmgr);
  68245. +
  68246. + /* initialize ktxnmgrd context and start kernel thread ktxnmrgd */
  68247. + if ((result = reiser4_init_ktxnmgrd(super)) != 0)
  68248. + goto failed_init_ktxnmgrd;
  68249. +
  68250. + /* initialize entd context and start kernel thread entd */
  68251. + if ((result = reiser4_init_entd(super)) != 0)
  68252. + goto failed_init_entd;
  68253. +
  68254. + /* initialize address spaces for formatted nodes and bitmaps */
  68255. + if ((result = reiser4_init_formatted_fake(super)) != 0)
  68256. + goto failed_init_formatted_fake;
  68257. +
  68258. + /* initialize disk format plugin */
  68259. + if ((result = get_super_private(super)->df_plug->init_format(super,
  68260. + data)) != 0)
  68261. + goto failed_init_disk_format;
  68262. +
  68263. + /*
  68264. + * There are some 'committed' versions of reiser4 super block counters,
  68265. + * which correspond to reiser4 on-disk state. These counters are
  68266. + * initialized here
  68267. + */
  68268. + sbinfo->blocks_free_committed = sbinfo->blocks_free;
  68269. + sbinfo->nr_files_committed = oids_used(super);
  68270. +
  68271. + /* get inode of root directory */
  68272. + if ((result = reiser4_init_root_inode(super)) != 0)
  68273. + goto failed_init_root_inode;
  68274. +
  68275. + if ((result = get_super_private(super)->df_plug->version_update(super)) != 0)
  68276. + goto failed_update_format_version;
  68277. +
  68278. + process_safelinks(super);
  68279. + reiser4_exit_context(&ctx);
  68280. +
  68281. + sbinfo->debugfs_root = debugfs_create_dir(super->s_id,
  68282. + reiser4_debugfs_root);
  68283. + if (sbinfo->debugfs_root) {
  68284. + debugfs_create_u32("atom_count", S_IFREG|S_IRUSR,
  68285. + sbinfo->debugfs_root,
  68286. + &sbinfo->tmgr.atom_count);
  68287. + debugfs_create_u32("id_count", S_IFREG|S_IRUSR,
  68288. + sbinfo->debugfs_root,
  68289. + &sbinfo->tmgr.id_count);
  68290. + }
  68291. + printk("reiser4: %s: using %s.\n", super->s_id,
  68292. + txmod_plugin_by_id(sbinfo->txmod)->h.desc);
  68293. + return 0;
  68294. +
  68295. + failed_update_format_version:
  68296. + failed_init_root_inode:
  68297. + if (sbinfo->df_plug->release)
  68298. + sbinfo->df_plug->release(super);
  68299. + failed_init_disk_format:
  68300. + reiser4_done_formatted_fake(super);
  68301. + failed_init_formatted_fake:
  68302. + reiser4_done_entd(super);
  68303. + failed_init_entd:
  68304. + reiser4_done_ktxnmgrd(super);
  68305. + failed_init_ktxnmgrd:
  68306. + reiser4_done_txnmgr(&sbinfo->tmgr);
  68307. + failed_init_read_super:
  68308. + failed_init_super_data:
  68309. + failed_init_csum_tfm:
  68310. + reiser4_done_fs_info(super);
  68311. + failed_init_sinfo:
  68312. + reiser4_exit_context(&ctx);
  68313. + return result;
  68314. +}
  68315. +
  68316. +/**
  68317. + * reiser4_mount - mount of file_system_type operations
  68318. + * @fs_type:
  68319. + * @flags: mount flags
  68320. + * @dev_name: block device file name
  68321. + * @data: specific mount options
  68322. + *
  68323. + * Reiser4 mount entry.
  68324. + */
  68325. +static struct dentry *reiser4_mount(struct file_system_type *fs_type, int flags,
  68326. + const char *dev_name, void *data)
  68327. +{
  68328. + return mount_bdev(fs_type, flags, dev_name, data, fill_super);
  68329. +}
  68330. +
  68331. +/* structure describing the reiser4 filesystem implementation */
  68332. +static struct file_system_type reiser4_fs_type = {
  68333. + .owner = THIS_MODULE,
  68334. + .name = "reiser4",
  68335. + .fs_flags = FS_REQUIRES_DEV,
  68336. + .mount = reiser4_mount,
  68337. + .kill_sb = kill_block_super,
  68338. + .next = NULL
  68339. +};
  68340. +
  68341. +void destroy_reiser4_cache(struct kmem_cache **cachep)
  68342. +{
  68343. + BUG_ON(*cachep == NULL);
  68344. + kmem_cache_destroy(*cachep);
  68345. + *cachep = NULL;
  68346. +}
  68347. +
  68348. +/**
  68349. + * init_reiser4 - reiser4 initialization entry point
  68350. + *
  68351. + * Initializes reiser4 slabs, registers reiser4 filesystem type. It is called
  68352. + * on kernel initialization or during reiser4 module load.
  68353. + */
  68354. +static int __init init_reiser4(void)
  68355. +{
  68356. + int result;
  68357. +
  68358. + printk(KERN_INFO
  68359. + "Loading Reiser4 (format release: 4.%d.%d) "
  68360. + "See www.namesys.com for a description of Reiser4.\n",
  68361. + get_release_number_major(),
  68362. + get_release_number_minor());
  68363. +
  68364. + /* initialize slab cache of inodes */
  68365. + if ((result = init_inodes()) != 0)
  68366. + goto failed_inode_cache;
  68367. +
  68368. + /* initialize cache of znodes */
  68369. + if ((result = init_znodes()) != 0)
  68370. + goto failed_init_znodes;
  68371. +
  68372. + /* initialize all plugins */
  68373. + if ((result = init_plugins()) != 0)
  68374. + goto failed_init_plugins;
  68375. +
  68376. + /* initialize cache of plugin_set-s and plugin_set's hash table */
  68377. + if ((result = init_plugin_set()) != 0)
  68378. + goto failed_init_plugin_set;
  68379. +
  68380. + /* initialize caches of txn_atom-s and txn_handle-s */
  68381. + if ((result = init_txnmgr_static()) != 0)
  68382. + goto failed_init_txnmgr_static;
  68383. +
  68384. + /* initialize cache of jnodes */
  68385. + if ((result = init_jnodes()) != 0)
  68386. + goto failed_init_jnodes;
  68387. +
  68388. + /* initialize cache of flush queues */
  68389. + if ((result = reiser4_init_fqs()) != 0)
  68390. + goto failed_init_fqs;
  68391. +
  68392. + /* initialize cache of structures attached to dentry->d_fsdata */
  68393. + if ((result = reiser4_init_dentry_fsdata()) != 0)
  68394. + goto failed_init_dentry_fsdata;
  68395. +
  68396. + /* initialize cache of structures attached to file->private_data */
  68397. + if ((result = reiser4_init_file_fsdata()) != 0)
  68398. + goto failed_init_file_fsdata;
  68399. +
  68400. + /*
  68401. + * initialize cache of d_cursors. See plugin/file_ops_readdir.c for
  68402. + * more details
  68403. + */
  68404. + if ((result = reiser4_init_d_cursor()) != 0)
  68405. + goto failed_init_d_cursor;
  68406. +
  68407. + /* initialize cache of blocknr set entries */
  68408. + if ((result = blocknr_set_init_static()) != 0)
  68409. + goto failed_init_blocknr_set;
  68410. +
  68411. + /* initialize cache of blocknr list entries */
  68412. + if ((result = blocknr_list_init_static()) != 0)
  68413. + goto failed_init_blocknr_list;
  68414. +
  68415. + if ((result = register_filesystem(&reiser4_fs_type)) == 0) {
  68416. + reiser4_debugfs_root = debugfs_create_dir("reiser4", NULL);
  68417. + return 0;
  68418. + }
  68419. +
  68420. + blocknr_list_done_static();
  68421. + failed_init_blocknr_list:
  68422. + blocknr_set_done_static();
  68423. + failed_init_blocknr_set:
  68424. + reiser4_done_d_cursor();
  68425. + failed_init_d_cursor:
  68426. + reiser4_done_file_fsdata();
  68427. + failed_init_file_fsdata:
  68428. + reiser4_done_dentry_fsdata();
  68429. + failed_init_dentry_fsdata:
  68430. + reiser4_done_fqs();
  68431. + failed_init_fqs:
  68432. + done_jnodes();
  68433. + failed_init_jnodes:
  68434. + done_txnmgr_static();
  68435. + failed_init_txnmgr_static:
  68436. + done_plugin_set();
  68437. + failed_init_plugin_set:
  68438. + failed_init_plugins:
  68439. + done_znodes();
  68440. + failed_init_znodes:
  68441. + done_inodes();
  68442. + failed_inode_cache:
  68443. + return result;
  68444. +}
  68445. +
  68446. +/**
  68447. + * done_reiser4 - reiser4 exit entry point
  68448. + *
  68449. + * Unregister reiser4 filesystem type, deletes caches. It is called on shutdown
  68450. + * or at module unload.
  68451. + */
  68452. +static void __exit done_reiser4(void)
  68453. +{
  68454. + int result;
  68455. +
  68456. + debugfs_remove(reiser4_debugfs_root);
  68457. + result = unregister_filesystem(&reiser4_fs_type);
  68458. + BUG_ON(result != 0);
  68459. + blocknr_list_done_static();
  68460. + blocknr_set_done_static();
  68461. + reiser4_done_d_cursor();
  68462. + reiser4_done_file_fsdata();
  68463. + reiser4_done_dentry_fsdata();
  68464. + reiser4_done_fqs();
  68465. + done_jnodes();
  68466. + done_txnmgr_static();
  68467. + done_plugin_set();
  68468. + done_znodes();
  68469. + destroy_reiser4_cache(&inode_cache);
  68470. +}
  68471. +
  68472. +module_init(init_reiser4);
  68473. +module_exit(done_reiser4);
  68474. +
  68475. +MODULE_ALIAS_FS("reiser4");
  68476. +
  68477. +MODULE_DESCRIPTION("Reiser4 filesystem");
  68478. +MODULE_AUTHOR("Hans Reiser <Reiser@Namesys.COM>");
  68479. +
  68480. +MODULE_LICENSE("GPL");
  68481. +
  68482. +/*
  68483. + * Local variables:
  68484. + * c-indentation-style: "K&R"
  68485. + * mode-name: "LC"
  68486. + * c-basic-offset: 8
  68487. + * tab-width: 8
  68488. + * fill-column: 79
  68489. + * End:
  68490. + */
  68491. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/tap.c linux-5.16.14/fs/reiser4/tap.c
  68492. --- linux-5.16.14.orig/fs/reiser4/tap.c 1970-01-01 01:00:00.000000000 +0100
  68493. +++ linux-5.16.14/fs/reiser4/tap.c 2022-03-12 13:26:19.688892816 +0100
  68494. @@ -0,0 +1,376 @@
  68495. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  68496. + * reiser4/README */
  68497. +
  68498. +/*
  68499. + Tree Access Pointer (tap).
  68500. +
  68501. + tap is data structure combining coord and lock handle (mostly). It is
  68502. + useful when one has to scan tree nodes (for example, in readdir, or flush),
  68503. + for tap functions allow to move tap in either direction transparently
  68504. + crossing unit/item/node borders.
  68505. +
  68506. + Tap doesn't provide automatic synchronization of its fields as it is
  68507. + supposed to be per-thread object.
  68508. +*/
  68509. +
  68510. +#include "forward.h"
  68511. +#include "debug.h"
  68512. +#include "coord.h"
  68513. +#include "tree.h"
  68514. +#include "context.h"
  68515. +#include "tap.h"
  68516. +#include "znode.h"
  68517. +#include "tree_walk.h"
  68518. +
  68519. +#if REISER4_DEBUG
  68520. +static int tap_invariant(const tap_t *tap);
  68521. +static void tap_check(const tap_t *tap);
  68522. +#else
  68523. +#define tap_check(tap) noop
  68524. +#endif
  68525. +
  68526. +/** load node tap is pointing to, if not loaded already */
  68527. +int reiser4_tap_load(tap_t *tap)
  68528. +{
  68529. + tap_check(tap);
  68530. + if (tap->loaded == 0) {
  68531. + int result;
  68532. +
  68533. + result = zload_ra(tap->coord->node, &tap->ra_info);
  68534. + if (result != 0)
  68535. + return result;
  68536. + coord_clear_iplug(tap->coord);
  68537. + }
  68538. + ++tap->loaded;
  68539. + tap_check(tap);
  68540. + return 0;
  68541. +}
  68542. +
  68543. +/** release node tap is pointing to. Dual to tap_load() */
  68544. +void reiser4_tap_relse(tap_t *tap)
  68545. +{
  68546. + tap_check(tap);
  68547. + if (tap->loaded > 0) {
  68548. + --tap->loaded;
  68549. + if (tap->loaded == 0)
  68550. + zrelse(tap->coord->node);
  68551. + }
  68552. + tap_check(tap);
  68553. +}
  68554. +
  68555. +/**
  68556. + * init tap to consist of @coord and @lh. Locks on nodes will be acquired with
  68557. + * @mode
  68558. + */
  68559. +void reiser4_tap_init(tap_t *tap, coord_t *coord, lock_handle * lh,
  68560. + znode_lock_mode mode)
  68561. +{
  68562. + tap->coord = coord;
  68563. + tap->lh = lh;
  68564. + tap->mode = mode;
  68565. + tap->loaded = 0;
  68566. + INIT_LIST_HEAD(&tap->linkage);
  68567. + reiser4_init_ra_info(&tap->ra_info);
  68568. +}
  68569. +
  68570. +/** add @tap to the per-thread list of all taps */
  68571. +void reiser4_tap_monitor(tap_t *tap)
  68572. +{
  68573. + assert("nikita-2623", tap != NULL);
  68574. + tap_check(tap);
  68575. + list_add(&tap->linkage, reiser4_taps_list());
  68576. + tap_check(tap);
  68577. +}
  68578. +
  68579. +/* duplicate @src into @dst. Copy lock handle. @dst is not initially
  68580. + * loaded. */
  68581. +void reiser4_tap_copy(tap_t *dst, tap_t *src)
  68582. +{
  68583. + assert("nikita-3193", src != NULL);
  68584. + assert("nikita-3194", dst != NULL);
  68585. +
  68586. + *dst->coord = *src->coord;
  68587. + if (src->lh->node)
  68588. + copy_lh(dst->lh, src->lh);
  68589. + dst->mode = src->mode;
  68590. + dst->loaded = 0;
  68591. + INIT_LIST_HEAD(&dst->linkage);
  68592. + dst->ra_info = src->ra_info;
  68593. +}
  68594. +
  68595. +/** finish with @tap */
  68596. +void reiser4_tap_done(tap_t *tap)
  68597. +{
  68598. + assert("nikita-2565", tap != NULL);
  68599. + tap_check(tap);
  68600. + if (tap->loaded > 0)
  68601. + zrelse(tap->coord->node);
  68602. + done_lh(tap->lh);
  68603. + tap->loaded = 0;
  68604. + list_del_init(&tap->linkage);
  68605. + tap->coord->node = NULL;
  68606. +}
  68607. +
  68608. +/**
  68609. + * move @tap to the new node, locked with @target. Load @target, if @tap was
  68610. + * already loaded.
  68611. + */
  68612. +int reiser4_tap_move(tap_t *tap, lock_handle * target)
  68613. +{
  68614. + int result = 0;
  68615. +
  68616. + assert("nikita-2567", tap != NULL);
  68617. + assert("nikita-2568", target != NULL);
  68618. + assert("nikita-2570", target->node != NULL);
  68619. + assert("nikita-2569", tap->coord->node == tap->lh->node);
  68620. +
  68621. + tap_check(tap);
  68622. + if (tap->loaded > 0)
  68623. + result = zload_ra(target->node, &tap->ra_info);
  68624. +
  68625. + if (result == 0) {
  68626. + if (tap->loaded > 0)
  68627. + zrelse(tap->coord->node);
  68628. + done_lh(tap->lh);
  68629. + copy_lh(tap->lh, target);
  68630. + tap->coord->node = target->node;
  68631. + coord_clear_iplug(tap->coord);
  68632. + }
  68633. + tap_check(tap);
  68634. + return result;
  68635. +}
  68636. +
  68637. +/**
  68638. + * move @tap to @target. Acquire lock on @target, if @tap was already
  68639. + * loaded.
  68640. + */
  68641. +static int tap_to(tap_t *tap, znode * target)
  68642. +{
  68643. + int result;
  68644. +
  68645. + assert("nikita-2624", tap != NULL);
  68646. + assert("nikita-2625", target != NULL);
  68647. +
  68648. + tap_check(tap);
  68649. + result = 0;
  68650. + if (tap->coord->node != target) {
  68651. + lock_handle here;
  68652. +
  68653. + init_lh(&here);
  68654. + result = longterm_lock_znode(&here, target,
  68655. + tap->mode, ZNODE_LOCK_HIPRI);
  68656. + if (result == 0) {
  68657. + result = reiser4_tap_move(tap, &here);
  68658. + done_lh(&here);
  68659. + }
  68660. + }
  68661. + tap_check(tap);
  68662. + return result;
  68663. +}
  68664. +
  68665. +/**
  68666. + * move @tap to given @target, loading and locking @target->node if
  68667. + * necessary
  68668. + */
  68669. +int tap_to_coord(tap_t *tap, coord_t *target)
  68670. +{
  68671. + int result;
  68672. +
  68673. + tap_check(tap);
  68674. + result = tap_to(tap, target->node);
  68675. + if (result == 0)
  68676. + coord_dup(tap->coord, target);
  68677. + tap_check(tap);
  68678. + return result;
  68679. +}
  68680. +
  68681. +/** return list of all taps */
  68682. +struct list_head *reiser4_taps_list(void)
  68683. +{
  68684. + return &get_current_context()->taps;
  68685. +}
  68686. +
  68687. +/** helper function for go_{next,prev}_{item,unit,node}() */
  68688. +int go_dir_el(tap_t *tap, sideof dir, int units_p)
  68689. +{
  68690. + coord_t dup;
  68691. + coord_t *coord;
  68692. + int result;
  68693. +
  68694. + int (*coord_dir) (coord_t *);
  68695. + int (*get_dir_neighbor) (lock_handle *, znode *, int, int);
  68696. + void (*coord_init) (coord_t *, const znode *);
  68697. + ON_DEBUG(int (*coord_check) (const coord_t *));
  68698. +
  68699. + assert("nikita-2556", tap != NULL);
  68700. + assert("nikita-2557", tap->coord != NULL);
  68701. + assert("nikita-2558", tap->lh != NULL);
  68702. + assert("nikita-2559", tap->coord->node != NULL);
  68703. +
  68704. + tap_check(tap);
  68705. + if (dir == LEFT_SIDE) {
  68706. + coord_dir = units_p ? coord_prev_unit : coord_prev_item;
  68707. + get_dir_neighbor = reiser4_get_left_neighbor;
  68708. + coord_init = coord_init_last_unit;
  68709. + } else {
  68710. + coord_dir = units_p ? coord_next_unit : coord_next_item;
  68711. + get_dir_neighbor = reiser4_get_right_neighbor;
  68712. + coord_init = coord_init_first_unit;
  68713. + }
  68714. + ON_DEBUG(coord_check =
  68715. + units_p ? coord_is_existing_unit : coord_is_existing_item);
  68716. + assert("nikita-2560", coord_check(tap->coord));
  68717. +
  68718. + coord = tap->coord;
  68719. + coord_dup(&dup, coord);
  68720. + if (coord_dir(&dup) != 0) {
  68721. + do {
  68722. + /* move to the left neighboring node */
  68723. + lock_handle dup;
  68724. +
  68725. + init_lh(&dup);
  68726. + result =
  68727. + get_dir_neighbor(&dup, coord->node, (int)tap->mode,
  68728. + GN_CAN_USE_UPPER_LEVELS);
  68729. + if (result == 0) {
  68730. + result = reiser4_tap_move(tap, &dup);
  68731. + if (result == 0)
  68732. + coord_init(tap->coord, dup.node);
  68733. + done_lh(&dup);
  68734. + }
  68735. + /* skip empty nodes */
  68736. + } while ((result == 0) && node_is_empty(coord->node));
  68737. + } else {
  68738. + result = 0;
  68739. + coord_dup(coord, &dup);
  68740. + }
  68741. + assert("nikita-2564", ergo(!result, coord_check(tap->coord)));
  68742. + tap_check(tap);
  68743. + return result;
  68744. +}
  68745. +
  68746. +/**
  68747. + * move @tap to the next unit, transparently crossing item and node
  68748. + * boundaries
  68749. + */
  68750. +int go_next_unit(tap_t *tap)
  68751. +{
  68752. + return go_dir_el(tap, RIGHT_SIDE, 1);
  68753. +}
  68754. +
  68755. +/**
  68756. + * move @tap to the previous unit, transparently crossing item and node
  68757. + * boundaries
  68758. + */
  68759. +int go_prev_unit(tap_t *tap)
  68760. +{
  68761. + return go_dir_el(tap, LEFT_SIDE, 1);
  68762. +}
  68763. +
  68764. +/**
  68765. + * @shift times apply @actor to the @tap. This is used to move @tap by
  68766. + * @shift units (or items, or nodes) in either direction.
  68767. + */
  68768. +static int rewind_to(tap_t *tap, go_actor_t actor, int shift)
  68769. +{
  68770. + int result;
  68771. +
  68772. + assert("nikita-2555", shift >= 0);
  68773. + assert("nikita-2562", tap->coord->node == tap->lh->node);
  68774. +
  68775. + tap_check(tap);
  68776. + result = reiser4_tap_load(tap);
  68777. + if (result != 0)
  68778. + return result;
  68779. +
  68780. + for (; shift > 0; --shift) {
  68781. + result = actor(tap);
  68782. + assert("nikita-2563", tap->coord->node == tap->lh->node);
  68783. + if (result != 0)
  68784. + break;
  68785. + }
  68786. + reiser4_tap_relse(tap);
  68787. + tap_check(tap);
  68788. + return result;
  68789. +}
  68790. +
  68791. +/** move @tap @shift units rightward */
  68792. +int rewind_right(tap_t *tap, int shift)
  68793. +{
  68794. + return rewind_to(tap, go_next_unit, shift);
  68795. +}
  68796. +
  68797. +/** move @tap @shift units leftward */
  68798. +int rewind_left(tap_t *tap, int shift)
  68799. +{
  68800. + return rewind_to(tap, go_prev_unit, shift);
  68801. +}
  68802. +
  68803. +#if REISER4_DEBUG
  68804. +/** debugging function: print @tap content in human readable form */
  68805. +static void print_tap(const char *prefix, const tap_t *tap)
  68806. +{
  68807. + if (tap == NULL) {
  68808. + printk("%s: null tap\n", prefix);
  68809. + return;
  68810. + }
  68811. + printk("%s: loaded: %i, in-list: %i, node: %p, mode: %s\n", prefix,
  68812. + tap->loaded, (&tap->linkage == tap->linkage.next &&
  68813. + &tap->linkage == tap->linkage.prev),
  68814. + tap->lh->node,
  68815. + lock_mode_name(tap->mode));
  68816. + print_coord("\tcoord", tap->coord, 0);
  68817. +}
  68818. +
  68819. +/** check [tap-sane] invariant */
  68820. +static int tap_invariant(const tap_t *tap)
  68821. +{
  68822. + /* [tap-sane] invariant */
  68823. +
  68824. + if (tap == NULL)
  68825. + return 1;
  68826. + /* tap->mode is one of
  68827. + *
  68828. + * {ZNODE_NO_LOCK, ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}, and
  68829. + */
  68830. + if (tap->mode != ZNODE_NO_LOCK &&
  68831. + tap->mode != ZNODE_READ_LOCK && tap->mode != ZNODE_WRITE_LOCK)
  68832. + return 2;
  68833. + /* tap->coord != NULL, and */
  68834. + if (tap->coord == NULL)
  68835. + return 3;
  68836. + /* tap->lh != NULL, and */
  68837. + if (tap->lh == NULL)
  68838. + return 4;
  68839. + /* tap->loaded > 0 => znode_is_loaded(tap->coord->node), and */
  68840. + if (!ergo(tap->loaded, znode_is_loaded(tap->coord->node)))
  68841. + return 5;
  68842. + /* tap->coord->node == tap->lh->node if tap->lh->node is not 0 */
  68843. + if (tap->lh->node != NULL && tap->coord->node != tap->lh->node)
  68844. + return 6;
  68845. + return 0;
  68846. +}
  68847. +
  68848. +/** debugging function: check internal @tap consistency */
  68849. +static void tap_check(const tap_t *tap)
  68850. +{
  68851. + int result;
  68852. +
  68853. + result = tap_invariant(tap);
  68854. + if (result != 0) {
  68855. + print_tap("broken", tap);
  68856. + reiser4_panic("nikita-2831", "tap broken: %i\n", result);
  68857. + }
  68858. +}
  68859. +#endif
  68860. +
  68861. +/* Make Linus happy.
  68862. + Local variables:
  68863. + c-indentation-style: "K&R"
  68864. + mode-name: "LC"
  68865. + c-basic-offset: 8
  68866. + tab-width: 8
  68867. + fill-column: 120
  68868. + scroll-step: 1
  68869. + End:
  68870. +*/
  68871. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/tap.h linux-5.16.14/fs/reiser4/tap.h
  68872. --- linux-5.16.14.orig/fs/reiser4/tap.h 1970-01-01 01:00:00.000000000 +0100
  68873. +++ linux-5.16.14/fs/reiser4/tap.h 2022-03-12 13:26:19.688892816 +0100
  68874. @@ -0,0 +1,70 @@
  68875. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  68876. +
  68877. +/* Tree Access Pointers. See tap.c for more details. */
  68878. +
  68879. +#if !defined(__REISER4_TAP_H__)
  68880. +#define __REISER4_TAP_H__
  68881. +
  68882. +#include "forward.h"
  68883. +#include "readahead.h"
  68884. +
  68885. +/**
  68886. + tree_access_pointer aka tap. Data structure combining coord_t and lock
  68887. + handle.
  68888. + Invariants involving this data-type, see doc/lock-ordering for details:
  68889. +
  68890. + [tap-sane]
  68891. + */
  68892. +struct tree_access_pointer {
  68893. + /* coord tap is at */
  68894. + coord_t *coord;
  68895. + /* lock handle on ->coord->node */
  68896. + lock_handle *lh;
  68897. + /* mode of lock acquired by this tap */
  68898. + znode_lock_mode mode;
  68899. + /* incremented by reiser4_tap_load().
  68900. + Decremented by reiser4_tap_relse(). */
  68901. + int loaded;
  68902. + /* list of taps */
  68903. + struct list_head linkage;
  68904. + /* read-ahead hint */
  68905. + ra_info_t ra_info;
  68906. +};
  68907. +
  68908. +typedef int (*go_actor_t) (tap_t *tap);
  68909. +
  68910. +extern int reiser4_tap_load(tap_t *tap);
  68911. +extern void reiser4_tap_relse(tap_t *tap);
  68912. +extern void reiser4_tap_init(tap_t *tap, coord_t *coord, lock_handle * lh,
  68913. + znode_lock_mode mode);
  68914. +extern void reiser4_tap_monitor(tap_t *tap);
  68915. +extern void reiser4_tap_copy(tap_t *dst, tap_t *src);
  68916. +extern void reiser4_tap_done(tap_t *tap);
  68917. +extern int reiser4_tap_move(tap_t *tap, lock_handle * target);
  68918. +extern int tap_to_coord(tap_t *tap, coord_t *target);
  68919. +
  68920. +extern int go_dir_el(tap_t *tap, sideof dir, int units_p);
  68921. +extern int go_next_unit(tap_t *tap);
  68922. +extern int go_prev_unit(tap_t *tap);
  68923. +extern int rewind_right(tap_t *tap, int shift);
  68924. +extern int rewind_left(tap_t *tap, int shift);
  68925. +
  68926. +extern struct list_head *reiser4_taps_list(void);
  68927. +
  68928. +#define for_all_taps(tap) \
  68929. + for (tap = list_entry(reiser4_taps_list()->next, tap_t, linkage); \
  68930. + reiser4_taps_list() != &tap->linkage; \
  68931. + tap = list_entry(tap->linkage.next, tap_t, linkage))
  68932. +
  68933. +/* __REISER4_TAP_H__ */
  68934. +#endif
  68935. +/* Make Linus happy.
  68936. + Local variables:
  68937. + c-indentation-style: "K&R"
  68938. + mode-name: "LC"
  68939. + c-basic-offset: 8
  68940. + tab-width: 8
  68941. + fill-column: 120
  68942. + scroll-step: 1
  68943. + End:
  68944. +*/
  68945. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/tree.c linux-5.16.14/fs/reiser4/tree.c
  68946. --- linux-5.16.14.orig/fs/reiser4/tree.c 1970-01-01 01:00:00.000000000 +0100
  68947. +++ linux-5.16.14/fs/reiser4/tree.c 2022-03-12 13:26:19.689892818 +0100
  68948. @@ -0,0 +1,1884 @@
  68949. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  68950. + * reiser4/README */
  68951. +
  68952. +/*
  68953. + * KEYS IN A TREE.
  68954. + *
  68955. + * The tree consists of nodes located on the disk. Node in the tree is either
  68956. + * formatted or unformatted. Formatted node is one that has structure
  68957. + * understood by the tree balancing and traversal code. Formatted nodes are
  68958. + * further classified into leaf and internal nodes. Latter distinctions is
  68959. + * (almost) of only historical importance: general structure of leaves and
  68960. + * internal nodes is the same in Reiser4. Unformatted nodes contain raw data
  68961. + * that are part of bodies of ordinary files and attributes.
  68962. + *
  68963. + * Each node in the tree spawns some interval in the key space. Key ranges for
  68964. + * all nodes in the tree are disjoint. Actually, this only holds in some weak
  68965. + * sense, because of the non-unique keys: intersection of key ranges for
  68966. + * different nodes is either empty, or consists of exactly one key.
  68967. + *
  68968. + * Formatted node consists of a sequence of items. Each item spawns some
  68969. + * interval in key space. Key ranges for all items in a tree are disjoint,
  68970. + * modulo non-unique keys again. Items within nodes are ordered in the key
  68971. + * order of the smallest key in a item.
  68972. + *
  68973. + * Particular type of item can be further split into units. Unit is piece of
  68974. + * item that can be cut from item and moved into another item of the same
  68975. + * time. Units are used by balancing code to repack data during balancing.
  68976. + *
  68977. + * Unit can be further split into smaller entities (for example, extent unit
  68978. + * represents several pages, and it is natural for extent code to operate on
  68979. + * particular pages and even bytes within one unit), but this is of no
  68980. + * relevance to the generic balancing and lookup code.
  68981. + *
  68982. + * Although item is said to "spawn" range or interval of keys, it is not
  68983. + * necessary that item contains piece of data addressable by each and every
  68984. + * key in this range. For example, compound directory item, consisting of
  68985. + * units corresponding to directory entries and keyed by hashes of file names,
  68986. + * looks more as having "discrete spectrum": only some disjoint keys inside
  68987. + * range occupied by this item really address data.
  68988. + *
  68989. + * No than less, each item always has well-defined least (minimal) key, that
  68990. + * is recorded in item header, stored in the node this item is in. Also, item
  68991. + * plugin can optionally define method ->max_key_inside() returning maximal
  68992. + * key that can _possibly_ be located within this item. This method is used
  68993. + * (mainly) to determine when given piece of data should be merged into
  68994. + * existing item, in stead of creating new one. Because of this, even though
  68995. + * ->max_key_inside() can be larger that any key actually located in the item,
  68996. + * intervals
  68997. + *
  68998. + * [ reiser4_min_key( item ), ->max_key_inside( item ) ]
  68999. + *
  69000. + * are still disjoint for all items within the _same_ node.
  69001. + *
  69002. + * In memory node is represented by znode. It plays several roles:
  69003. + *
  69004. + * . something locks are taken on
  69005. + *
  69006. + * . something tracked by transaction manager (this is going to change)
  69007. + *
  69008. + * . something used to access node data
  69009. + *
  69010. + * . something used to maintain tree structure in memory: sibling and
  69011. + * parental linkage.
  69012. + *
  69013. + * . something used to organize nodes into "slums"
  69014. + *
  69015. + * More on znodes see in znode.[ch]
  69016. + *
  69017. + * DELIMITING KEYS
  69018. + *
  69019. + * To simplify balancing, allow some flexibility in locking and speed up
  69020. + * important coord cache optimization, we keep delimiting keys of nodes in
  69021. + * memory. Depending on disk format (implemented by appropriate node plugin)
  69022. + * node on disk can record both left and right delimiting key, only one of
  69023. + * them, or none. Still, our balancing and tree traversal code keep both
  69024. + * delimiting keys for a node that is in memory stored in the znode. When
  69025. + * node is first brought into memory during tree traversal, its left
  69026. + * delimiting key is taken from its parent, and its right delimiting key is
  69027. + * either next key in its parent, or is right delimiting key of parent if
  69028. + * node is the rightmost child of parent.
  69029. + *
  69030. + * Physical consistency of delimiting key is protected by special dk
  69031. + * read-write lock. That is, delimiting keys can only be inspected or
  69032. + * modified under this lock. But dk lock is only sufficient for fast
  69033. + * "pessimistic" check, because to simplify code and to decrease lock
  69034. + * contention, balancing (carry) only updates delimiting keys right before
  69035. + * unlocking all locked nodes on the given tree level. For example,
  69036. + * coord-by-key cache scans LRU list of recently accessed znodes. For each
  69037. + * node it first does fast check under dk spin lock. If key looked for is
  69038. + * not between delimiting keys for this node, next node is inspected and so
  69039. + * on. If key is inside of the key range, long term lock is taken on node
  69040. + * and key range is rechecked.
  69041. + *
  69042. + * COORDINATES
  69043. + *
  69044. + * To find something in the tree, you supply a key, and the key is resolved
  69045. + * by coord_by_key() into a coord (coordinate) that is valid as long as the
  69046. + * node the coord points to remains locked. As mentioned above trees
  69047. + * consist of nodes that consist of items that consist of units. A unit is
  69048. + * the smallest and indivisible piece of tree as far as balancing and tree
  69049. + * search are concerned. Each node, item, and unit can be addressed by
  69050. + * giving its level in the tree and the key occupied by this entity. A node
  69051. + * knows what the key ranges are of the items within it, and how to find its
  69052. + * items and invoke their item handlers, but it does not know how to access
  69053. + * individual units within its items except through the item handlers.
  69054. + * coord is a structure containing a pointer to the node, the ordinal number
  69055. + * of the item within this node (a sort of item offset), and the ordinal
  69056. + * number of the unit within this item.
  69057. + *
  69058. + * TREE LOOKUP
  69059. + *
  69060. + * There are two types of access to the tree: lookup and modification.
  69061. + *
  69062. + * Lookup is a search for the key in the tree. Search can look for either
  69063. + * exactly the key given to it, or for the largest key that is not greater
  69064. + * than the key given to it. This distinction is determined by "bias"
  69065. + * parameter of search routine (coord_by_key()). coord_by_key() either
  69066. + * returns error (key is not in the tree, or some kind of external error
  69067. + * occurred), or successfully resolves key into coord.
  69068. + *
  69069. + * This resolution is done by traversing tree top-to-bottom from root level
  69070. + * to the desired level. On levels above twig level (level one above the
  69071. + * leaf level) nodes consist exclusively of internal items. Internal item is
  69072. + * nothing more than pointer to the tree node on the child level. On twig
  69073. + * level nodes consist of internal items intermixed with extent
  69074. + * items. Internal items form normal search tree structure used by traversal
  69075. + * to descent through the tree.
  69076. + *
  69077. + * TREE LOOKUP OPTIMIZATIONS
  69078. + *
  69079. + * Tree lookup described above is expensive even if all nodes traversed are
  69080. + * already in the memory: for each node binary search within it has to be
  69081. + * performed and binary searches are CPU consuming and tend to destroy CPU
  69082. + * caches.
  69083. + *
  69084. + * Several optimizations are used to work around this:
  69085. + *
  69086. + * . cbk_cache (look-aside cache for tree traversals, see search.c for
  69087. + * details)
  69088. + *
  69089. + * . seals (see seal.[ch])
  69090. + *
  69091. + * . vroot (see search.c)
  69092. + *
  69093. + * General search-by-key is layered thusly:
  69094. + *
  69095. + * [check seal, if any] --ok--> done
  69096. + * |
  69097. + * failed
  69098. + * |
  69099. + * V
  69100. + * [vroot defined] --no--> node = tree_root
  69101. + * | |
  69102. + * yes |
  69103. + * | |
  69104. + * V |
  69105. + * node = vroot |
  69106. + * | |
  69107. + * | |
  69108. + * | |
  69109. + * V V
  69110. + * [check cbk_cache for key] --ok--> done
  69111. + * |
  69112. + * failed
  69113. + * |
  69114. + * V
  69115. + * [start tree traversal from node]
  69116. + *
  69117. + */
  69118. +
  69119. +#include "forward.h"
  69120. +#include "debug.h"
  69121. +#include "dformat.h"
  69122. +#include "key.h"
  69123. +#include "coord.h"
  69124. +#include "plugin/item/static_stat.h"
  69125. +#include "plugin/item/item.h"
  69126. +#include "plugin/node/node.h"
  69127. +#include "plugin/plugin.h"
  69128. +#include "txnmgr.h"
  69129. +#include "jnode.h"
  69130. +#include "znode.h"
  69131. +#include "block_alloc.h"
  69132. +#include "tree_walk.h"
  69133. +#include "carry.h"
  69134. +#include "carry_ops.h"
  69135. +#include "tap.h"
  69136. +#include "tree.h"
  69137. +#include "vfs_ops.h"
  69138. +#include "page_cache.h"
  69139. +#include "super.h"
  69140. +#include "reiser4.h"
  69141. +#include "inode.h"
  69142. +
  69143. +#include <linux/fs.h> /* for struct super_block */
  69144. +#include <linux/spinlock.h>
  69145. +
  69146. +/* Disk address (block number) never ever used for any real tree node. This is
  69147. + used as block number of "uber" znode.
  69148. +
  69149. + Invalid block addresses are 0 by tradition.
  69150. +
  69151. +*/
  69152. +const reiser4_block_nr UBER_TREE_ADDR = 0ull;
  69153. +
  69154. +#define CUT_TREE_MIN_ITERATIONS 64
  69155. +
  69156. +static int find_child_by_addr(znode * parent, znode * child, coord_t *result);
  69157. +
  69158. +/* return node plugin of coord->node */
  69159. +node_plugin *node_plugin_by_coord(const coord_t *coord)
  69160. +{
  69161. + assert("vs-1", coord != NULL);
  69162. + assert("vs-2", coord->node != NULL);
  69163. +
  69164. + return coord->node->nplug;
  69165. +}
  69166. +
  69167. +/* insert item into tree. Fields of @coord are updated so that they can be
  69168. + * used by consequent insert operation. */
  69169. +insert_result insert_by_key(reiser4_tree * tree /* tree to insert new item
  69170. + * into */ ,
  69171. + const reiser4_key * key /* key of new item */ ,
  69172. + reiser4_item_data * data /* parameters for item
  69173. + * creation */ ,
  69174. + coord_t *coord /* resulting insertion coord */ ,
  69175. + lock_handle * lh /* resulting lock
  69176. + * handle */ ,
  69177. + tree_level stop_level /* level where to insert */ ,
  69178. + __u32 flags/* insertion flags */)
  69179. +{
  69180. + int result;
  69181. +
  69182. + assert("nikita-358", tree != NULL);
  69183. + assert("nikita-360", coord != NULL);
  69184. +
  69185. + result = coord_by_key(tree, key, coord, lh, ZNODE_WRITE_LOCK,
  69186. + FIND_EXACT, stop_level, stop_level,
  69187. + flags | CBK_FOR_INSERT, NULL/*ra_info */);
  69188. + switch (result) {
  69189. + default:
  69190. + break;
  69191. + case CBK_COORD_FOUND:
  69192. + result = IBK_ALREADY_EXISTS;
  69193. + break;
  69194. + case CBK_COORD_NOTFOUND:
  69195. + assert("nikita-2017", coord->node != NULL);
  69196. + result = insert_by_coord(coord, data, key, lh, 0/*flags */);
  69197. + break;
  69198. + }
  69199. + return result;
  69200. +}
  69201. +
  69202. +/* insert item by calling carry. Helper function called if short-cut
  69203. + insertion failed */
  69204. +static insert_result insert_with_carry_by_coord(coord_t *coord,
  69205. + /* coord where to insert */
  69206. + lock_handle * lh,
  69207. + /* lock handle of insertion node */
  69208. + reiser4_item_data * data,
  69209. + /* parameters of new item */
  69210. + const reiser4_key * key,
  69211. + /* key of new item */
  69212. + carry_opcode cop,
  69213. + /* carry operation to perform */
  69214. + cop_insert_flag flags
  69215. + /* carry flags */ )
  69216. +{
  69217. + int result;
  69218. + carry_pool *pool;
  69219. + carry_level *lowest_level;
  69220. + carry_insert_data *cdata;
  69221. + carry_op *op;
  69222. +
  69223. + assert("umka-314", coord != NULL);
  69224. +
  69225. + /* allocate carry_pool and 3 carry_level-s */
  69226. + pool =
  69227. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  69228. + sizeof(*cdata));
  69229. + if (IS_ERR(pool))
  69230. + return PTR_ERR(pool);
  69231. + lowest_level = (carry_level *) (pool + 1);
  69232. + init_carry_level(lowest_level, pool);
  69233. +
  69234. + op = reiser4_post_carry(lowest_level, cop, coord->node, 0);
  69235. + if (IS_ERR(op) || (op == NULL)) {
  69236. + done_carry_pool(pool);
  69237. + return RETERR(op ? PTR_ERR(op) : -EIO);
  69238. + }
  69239. + cdata = (carry_insert_data *) (lowest_level + 3);
  69240. + cdata->coord = coord;
  69241. + cdata->data = data;
  69242. + cdata->key = key;
  69243. + op->u.insert.d = cdata;
  69244. + if (flags == 0)
  69245. + flags = znode_get_tree(coord->node)->carry.insert_flags;
  69246. + op->u.insert.flags = flags;
  69247. + op->u.insert.type = COPT_ITEM_DATA;
  69248. + op->u.insert.child = NULL;
  69249. + if (lh != NULL) {
  69250. + assert("nikita-3245", lh->node == coord->node);
  69251. + lowest_level->track_type = CARRY_TRACK_CHANGE;
  69252. + lowest_level->tracked = lh;
  69253. + }
  69254. +
  69255. + result = reiser4_carry(lowest_level, NULL);
  69256. + done_carry_pool(pool);
  69257. +
  69258. + return result;
  69259. +}
  69260. +
  69261. +/* form carry queue to perform paste of @data with @key at @coord, and launch
  69262. + its execution by calling carry().
  69263. +
  69264. + Instruct carry to update @lh it after balancing insertion coord moves into
  69265. + different block.
  69266. +
  69267. +*/
  69268. +static int paste_with_carry(coord_t *coord, /* coord of paste */
  69269. + lock_handle * lh, /* lock handle of node
  69270. + * where item is
  69271. + * pasted */
  69272. + reiser4_item_data * data, /* parameters of new
  69273. + * item */
  69274. + const reiser4_key * key, /* key of new item */
  69275. + unsigned flags/* paste flags */)
  69276. +{
  69277. + int result;
  69278. + carry_pool *pool;
  69279. + carry_level *lowest_level;
  69280. + carry_insert_data *cdata;
  69281. + carry_op *op;
  69282. +
  69283. + assert("umka-315", coord != NULL);
  69284. + assert("umka-316", key != NULL);
  69285. +
  69286. + pool =
  69287. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  69288. + sizeof(*cdata));
  69289. + if (IS_ERR(pool))
  69290. + return PTR_ERR(pool);
  69291. + lowest_level = (carry_level *) (pool + 1);
  69292. + init_carry_level(lowest_level, pool);
  69293. +
  69294. + op = reiser4_post_carry(lowest_level, COP_PASTE, coord->node, 0);
  69295. + if (IS_ERR(op) || (op == NULL)) {
  69296. + done_carry_pool(pool);
  69297. + return RETERR(op ? PTR_ERR(op) : -EIO);
  69298. + }
  69299. + cdata = (carry_insert_data *) (lowest_level + 3);
  69300. + cdata->coord = coord;
  69301. + cdata->data = data;
  69302. + cdata->key = key;
  69303. + op->u.paste.d = cdata;
  69304. + if (flags == 0)
  69305. + flags = znode_get_tree(coord->node)->carry.paste_flags;
  69306. + op->u.paste.flags = flags;
  69307. + op->u.paste.type = COPT_ITEM_DATA;
  69308. + if (lh != NULL) {
  69309. + lowest_level->track_type = CARRY_TRACK_CHANGE;
  69310. + lowest_level->tracked = lh;
  69311. + }
  69312. +
  69313. + result = reiser4_carry(lowest_level, NULL);
  69314. + done_carry_pool(pool);
  69315. +
  69316. + return result;
  69317. +}
  69318. +
  69319. +/* insert item at the given coord.
  69320. +
  69321. + First try to skip carry by directly calling ->create_item() method of node
  69322. + plugin. If this is impossible (there is not enough free space in the node,
  69323. + or leftmost item in the node is created), call insert_with_carry_by_coord()
  69324. + that will do full carry().
  69325. +
  69326. +*/
  69327. +insert_result insert_by_coord(coord_t *coord /* coord where to
  69328. + * insert. coord->node has
  69329. + * to be write locked by
  69330. + * caller */ ,
  69331. + reiser4_item_data * data /* data to be
  69332. + * inserted */ ,
  69333. + const reiser4_key * key /* key of new item */ ,
  69334. + lock_handle * lh /* lock handle of write
  69335. + * lock on node */ ,
  69336. + __u32 flags/* insertion flags */)
  69337. +{
  69338. + unsigned item_size;
  69339. + int result;
  69340. + znode *node;
  69341. +
  69342. + assert("vs-247", coord != NULL);
  69343. + assert("vs-248", data != NULL);
  69344. + assert("vs-249", data->length >= 0);
  69345. + assert("nikita-1191", znode_is_write_locked(coord->node));
  69346. +
  69347. + node = coord->node;
  69348. + coord_clear_iplug(coord);
  69349. + result = zload(node);
  69350. + if (result != 0)
  69351. + return result;
  69352. +
  69353. + item_size = space_needed(node, NULL, data, 1);
  69354. + if (item_size > znode_free_space(node) &&
  69355. + (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT)
  69356. + && (flags & COPI_DONT_ALLOCATE)) {
  69357. + /* we are forced to use free space of coord->node and new item
  69358. + does not fit into it.
  69359. +
  69360. + Currently we get here only when we allocate and copy units
  69361. + of extent item from a node to its left neighbor during
  69362. + "squalloc"-ing. If @node (this is left neighbor) does not
  69363. + have enough free space - we do not want to attempt any
  69364. + shifting and allocations because we are in squeezing and
  69365. + everything to the left of @node is tightly packed.
  69366. + */
  69367. + result = -E_NODE_FULL;
  69368. + } else if ((item_size <= znode_free_space(node)) &&
  69369. + !coord_is_before_leftmost(coord) &&
  69370. + (node_plugin_by_node(node)->fast_insert != NULL)
  69371. + && node_plugin_by_node(node)->fast_insert(coord)) {
  69372. + /* shortcut insertion without carry() overhead.
  69373. +
  69374. + Only possible if:
  69375. +
  69376. + - there is enough free space
  69377. +
  69378. + - insertion is not into the leftmost position in a node
  69379. + (otherwise it would require updating of delimiting key in a
  69380. + parent)
  69381. +
  69382. + - node plugin agrees with this
  69383. +
  69384. + */
  69385. + result =
  69386. + node_plugin_by_node(node)->create_item(coord, key, data,
  69387. + NULL);
  69388. + znode_make_dirty(node);
  69389. + } else {
  69390. + /* otherwise do full-fledged carry(). */
  69391. + result =
  69392. + insert_with_carry_by_coord(coord, lh, data, key, COP_INSERT,
  69393. + flags);
  69394. + }
  69395. + zrelse(node);
  69396. + return result;
  69397. +}
  69398. +
  69399. +/* @coord is set to leaf level and @data is to be inserted to twig level */
  69400. +insert_result
  69401. +insert_extent_by_coord(coord_t *coord, /* coord where to insert.
  69402. + * coord->node has to be write
  69403. + * locked by caller */
  69404. + reiser4_item_data *data,/* data to be inserted */
  69405. + const reiser4_key *key, /* key of new item */
  69406. + lock_handle *lh /* lock handle of write lock
  69407. + on node */)
  69408. +{
  69409. + assert("vs-405", coord != NULL);
  69410. + assert("vs-406", data != NULL);
  69411. + assert("vs-407", data->length > 0);
  69412. + assert("vs-408", znode_is_write_locked(coord->node));
  69413. + assert("vs-409", znode_get_level(coord->node) == LEAF_LEVEL);
  69414. +
  69415. + return insert_with_carry_by_coord(coord, lh, data, key, COP_EXTENT,
  69416. + 0 /*flags */ );
  69417. +}
  69418. +
  69419. +/* Insert into the item at the given coord.
  69420. +
  69421. + First try to skip carry by directly calling ->paste() method of item
  69422. + plugin. If this is impossible (there is not enough free space in the node,
  69423. + or we are pasting into leftmost position in the node), call
  69424. + paste_with_carry() that will do full carry().
  69425. +
  69426. +*/
  69427. +/* paste_into_item */
  69428. +int insert_into_item(coord_t * coord /* coord of pasting */ ,
  69429. + lock_handle * lh /* lock handle on node involved */ ,
  69430. + const reiser4_key * key /* key of unit being pasted */ ,
  69431. + reiser4_item_data * data /* parameters for new unit */ ,
  69432. + unsigned flags /* insert/paste flags */ )
  69433. +{
  69434. + int result;
  69435. + int size_change;
  69436. + node_plugin *nplug;
  69437. + item_plugin *iplug;
  69438. +
  69439. + assert("umka-317", coord != NULL);
  69440. + assert("umka-318", key != NULL);
  69441. +
  69442. + iplug = item_plugin_by_coord(coord);
  69443. + nplug = node_plugin_by_coord(coord);
  69444. +
  69445. + assert("nikita-1480", iplug == data->iplug);
  69446. +
  69447. + size_change = space_needed(coord->node, coord, data, 0);
  69448. + if (size_change > (int)znode_free_space(coord->node) &&
  69449. + (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT)
  69450. + && (flags & COPI_DONT_ALLOCATE)) {
  69451. + /* we are forced to use free space of coord->node and new data
  69452. + does not fit into it. */
  69453. + return -E_NODE_FULL;
  69454. + }
  69455. +
  69456. + /* shortcut paste without carry() overhead.
  69457. +
  69458. + Only possible if:
  69459. +
  69460. + - there is enough free space
  69461. +
  69462. + - paste is not into the leftmost unit in a node (otherwise
  69463. + it would require updating of delimiting key in a parent)
  69464. +
  69465. + - node plugin agrees with this
  69466. +
  69467. + - item plugin agrees with us
  69468. + */
  69469. + if (size_change <= (int)znode_free_space(coord->node) &&
  69470. + (coord->item_pos != 0 ||
  69471. + coord->unit_pos != 0 || coord->between == AFTER_UNIT) &&
  69472. + coord->unit_pos != 0 && nplug->fast_paste != NULL &&
  69473. + nplug->fast_paste(coord) &&
  69474. + iplug->b.fast_paste != NULL && iplug->b.fast_paste(coord)) {
  69475. + if (size_change > 0)
  69476. + nplug->change_item_size(coord, size_change);
  69477. + /* NOTE-NIKITA: huh? where @key is used? */
  69478. + result = iplug->b.paste(coord, data, NULL);
  69479. + if (size_change < 0)
  69480. + nplug->change_item_size(coord, size_change);
  69481. + znode_make_dirty(coord->node);
  69482. + } else
  69483. + /* otherwise do full-fledged carry(). */
  69484. + result = paste_with_carry(coord, lh, data, key, flags);
  69485. + return result;
  69486. +}
  69487. +
  69488. +/* this either appends or truncates item @coord */
  69489. +int reiser4_resize_item(coord_t * coord /* coord of item being resized */ ,
  69490. + reiser4_item_data * data /* parameters of resize */ ,
  69491. + reiser4_key * key /* key of new unit */ ,
  69492. + lock_handle * lh /* lock handle of node
  69493. + * being modified */ ,
  69494. + cop_insert_flag flags /* carry flags */ )
  69495. +{
  69496. + int result;
  69497. + znode *node;
  69498. +
  69499. + assert("nikita-362", coord != NULL);
  69500. + assert("nikita-363", data != NULL);
  69501. + assert("vs-245", data->length != 0);
  69502. +
  69503. + node = coord->node;
  69504. + coord_clear_iplug(coord);
  69505. + result = zload(node);
  69506. + if (result != 0)
  69507. + return result;
  69508. +
  69509. + if (data->length < 0)
  69510. + result = node_plugin_by_coord(coord)->shrink_item(coord,
  69511. + -data->length);
  69512. + else
  69513. + result = insert_into_item(coord, lh, key, data, flags);
  69514. +
  69515. + zrelse(node);
  69516. + return result;
  69517. +}
  69518. +
  69519. +/* insert flow @f */
  69520. +int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f)
  69521. +{
  69522. + int result;
  69523. + carry_pool *pool;
  69524. + carry_level *lowest_level;
  69525. + reiser4_item_data *data;
  69526. + carry_op *op;
  69527. +
  69528. + pool =
  69529. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  69530. + sizeof(*data));
  69531. + if (IS_ERR(pool))
  69532. + return PTR_ERR(pool);
  69533. + lowest_level = (carry_level *) (pool + 1);
  69534. + init_carry_level(lowest_level, pool);
  69535. +
  69536. + op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node,
  69537. + 0 /* operate directly on coord -> node */ );
  69538. + if (IS_ERR(op) || (op == NULL)) {
  69539. + done_carry_pool(pool);
  69540. + return RETERR(op ? PTR_ERR(op) : -EIO);
  69541. + }
  69542. +
  69543. + /* these are permanent during insert_flow */
  69544. + data = (reiser4_item_data *) (lowest_level + 3);
  69545. + data->user = 1;
  69546. + data->iplug = item_plugin_by_id(FORMATTING_ID);
  69547. + data->arg = NULL;
  69548. + /* data.length and data.data will be set before calling paste or
  69549. + insert */
  69550. + data->length = 0;
  69551. + data->data = NULL;
  69552. +
  69553. + op->u.insert_flow.flags = 0;
  69554. + op->u.insert_flow.insert_point = coord;
  69555. + op->u.insert_flow.flow = f;
  69556. + op->u.insert_flow.data = data;
  69557. + op->u.insert_flow.new_nodes = 0;
  69558. +
  69559. + lowest_level->track_type = CARRY_TRACK_CHANGE;
  69560. + lowest_level->tracked = lh;
  69561. +
  69562. + result = reiser4_carry(lowest_level, NULL);
  69563. + done_carry_pool(pool);
  69564. +
  69565. + return result;
  69566. +}
  69567. +
  69568. +/* Given a coord in parent node, obtain a znode for the corresponding child */
  69569. +znode *child_znode(const coord_t * parent_coord /* coord of pointer to
  69570. + * child */ ,
  69571. + znode * parent /* parent of child */ ,
  69572. + int incore_p /* if !0 only return child if already in
  69573. + * memory */ ,
  69574. + int setup_dkeys_p /* if !0 update delimiting keys of
  69575. + * child */ )
  69576. +{
  69577. + znode *child;
  69578. +
  69579. + assert("nikita-1374", parent_coord != NULL);
  69580. + assert("nikita-1482", parent != NULL);
  69581. +#if REISER4_DEBUG
  69582. + if (setup_dkeys_p)
  69583. + assert_rw_not_locked(&(znode_get_tree(parent)->dk_lock));
  69584. +#endif
  69585. + assert("nikita-2947", znode_is_any_locked(parent));
  69586. +
  69587. + if (znode_get_level(parent) <= LEAF_LEVEL) {
  69588. + /* trying to get child of leaf node */
  69589. + warning("nikita-1217", "Child of maize?");
  69590. + return ERR_PTR(RETERR(-EIO));
  69591. + }
  69592. + if (item_is_internal(parent_coord)) {
  69593. + reiser4_block_nr addr;
  69594. + item_plugin *iplug;
  69595. + reiser4_tree *tree;
  69596. +
  69597. + iplug = item_plugin_by_coord(parent_coord);
  69598. + assert("vs-512", iplug->s.internal.down_link);
  69599. + iplug->s.internal.down_link(parent_coord, NULL, &addr);
  69600. +
  69601. + tree = znode_get_tree(parent);
  69602. + if (incore_p)
  69603. + child = zlook(tree, &addr);
  69604. + else
  69605. + child =
  69606. + zget(tree, &addr, parent,
  69607. + znode_get_level(parent) - 1,
  69608. + reiser4_ctx_gfp_mask_get());
  69609. + if ((child != NULL) && !IS_ERR(child) && setup_dkeys_p)
  69610. + set_child_delimiting_keys(parent, parent_coord, child);
  69611. + } else {
  69612. + warning("nikita-1483", "Internal item expected");
  69613. + child = ERR_PTR(RETERR(-EIO));
  69614. + }
  69615. + return child;
  69616. +}
  69617. +
  69618. +/* remove znode from transaction */
  69619. +static void uncapture_znode(znode * node)
  69620. +{
  69621. + struct page *page;
  69622. +
  69623. + assert("zam-1001", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  69624. +
  69625. + if (!reiser4_blocknr_is_fake(znode_get_block(node))) {
  69626. + int ret;
  69627. +
  69628. + /* An already allocated block goes right to the atom's delete set. */
  69629. + ret =
  69630. + reiser4_dealloc_block(znode_get_block(node), 0,
  69631. + BA_DEFER | BA_FORMATTED);
  69632. + if (ret)
  69633. + warning("zam-942",
  69634. + "can\'t add a block (%llu) number to atom's delete set\n",
  69635. + (unsigned long long)(*znode_get_block(node)));
  69636. +
  69637. + spin_lock_znode(node);
  69638. + /* Here we return flush reserved block which was reserved at the
  69639. + * moment when this allocated node was marked dirty and still
  69640. + * not used by flush in node relocation procedure. */
  69641. + if (ZF_ISSET(node, JNODE_FLUSH_RESERVED)) {
  69642. + txn_atom *atom;
  69643. +
  69644. + atom = jnode_get_atom(ZJNODE(node));
  69645. + assert("zam-939", atom != NULL);
  69646. + spin_unlock_znode(node);
  69647. + flush_reserved2grabbed(atom, (__u64) 1);
  69648. + spin_unlock_atom(atom);
  69649. + } else
  69650. + spin_unlock_znode(node);
  69651. + } else {
  69652. + /* znode has assigned block which is counted as "fake
  69653. + allocated". Return it back to "free blocks") */
  69654. + fake_allocated2free((__u64) 1, BA_FORMATTED);
  69655. + }
  69656. +
  69657. + /*
  69658. + * uncapture page from transaction. There is a possibility of a race
  69659. + * with ->releasepage(): reiser4_releasepage() detaches page from this
  69660. + * jnode and we have nothing to uncapture. To avoid this, get
  69661. + * reference of node->pg under jnode spin lock. reiser4_uncapture_page()
  69662. + * will deal with released page itself.
  69663. + */
  69664. + spin_lock_znode(node);
  69665. + page = znode_page(node);
  69666. + if (likely(page != NULL)) {
  69667. + /*
  69668. + * reiser4_uncapture_page() can only be called when we are sure
  69669. + * that znode is pinned in memory, which we are, because
  69670. + * forget_znode() is only called from longterm_unlock_znode().
  69671. + */
  69672. + get_page(page);
  69673. + spin_unlock_znode(node);
  69674. + lock_page(page);
  69675. + reiser4_uncapture_page(page);
  69676. + unlock_page(page);
  69677. + put_page(page);
  69678. + } else {
  69679. + txn_atom *atom;
  69680. +
  69681. + /* handle "flush queued" znodes */
  69682. + while (1) {
  69683. + atom = jnode_get_atom(ZJNODE(node));
  69684. + assert("zam-943", atom != NULL);
  69685. +
  69686. + if (!ZF_ISSET(node, JNODE_FLUSH_QUEUED)
  69687. + || !atom->nr_running_queues)
  69688. + break;
  69689. +
  69690. + spin_unlock_znode(node);
  69691. + reiser4_atom_wait_event(atom);
  69692. + spin_lock_znode(node);
  69693. + }
  69694. +
  69695. + reiser4_uncapture_block(ZJNODE(node));
  69696. + spin_unlock_atom(atom);
  69697. + zput(node);
  69698. + }
  69699. +}
  69700. +
  69701. +/* This is called from longterm_unlock_znode() when last lock is released from
  69702. + the node that has been removed from the tree. At this point node is removed
  69703. + from sibling list and its lock is invalidated. */
  69704. +void forget_znode(lock_handle * handle)
  69705. +{
  69706. + znode *node;
  69707. + reiser4_tree *tree;
  69708. +
  69709. + assert("umka-319", handle != NULL);
  69710. +
  69711. + node = handle->node;
  69712. + tree = znode_get_tree(node);
  69713. +
  69714. + assert("vs-164", znode_is_write_locked(node));
  69715. + assert("nikita-1280", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  69716. + assert_rw_locked(&(node->lock.guard));
  69717. +
  69718. + /* We assume that this node was detached from its parent before
  69719. + * unlocking, it gives no way to reach this node from parent through a
  69720. + * down link. The node should have no children and, thereby, can't be
  69721. + * reached from them by their parent pointers. The only way to obtain a
  69722. + * reference to the node is to use sibling pointers from its left and
  69723. + * right neighbors. In the next several lines we remove the node from
  69724. + * the sibling list. */
  69725. +
  69726. + write_lock_tree(tree);
  69727. + sibling_list_remove(node);
  69728. + znode_remove(node, tree);
  69729. + write_unlock_tree(tree);
  69730. +
  69731. + /* Here we set JNODE_DYING and cancel all pending lock requests. It
  69732. + * forces all lock requestor threads to repeat iterations of getting
  69733. + * lock on a child, neighbor or parent node. But, those threads can't
  69734. + * come to this node again, because this node is no longer a child,
  69735. + * neighbor or parent of any other node. This order of znode
  69736. + * invalidation does not allow other threads to waste cpu time is a busy
  69737. + * loop, trying to lock dying object. The exception is in the flush
  69738. + * code when we take node directly from atom's capture list.*/
  69739. + reiser4_invalidate_lock(handle);
  69740. + uncapture_znode(node);
  69741. +}
  69742. +
  69743. +/* Check that internal item at @pointer really contains pointer to @child. */
  69744. +int check_tree_pointer(const coord_t * pointer /* would-be pointer to
  69745. + * @child */ ,
  69746. + const znode * child /* child znode */ )
  69747. +{
  69748. + assert("nikita-1016", pointer != NULL);
  69749. + assert("nikita-1017", child != NULL);
  69750. + assert("nikita-1018", pointer->node != NULL);
  69751. +
  69752. + assert("nikita-1325", znode_is_any_locked(pointer->node));
  69753. +
  69754. + assert("nikita-2985",
  69755. + znode_get_level(pointer->node) == znode_get_level(child) + 1);
  69756. +
  69757. + coord_clear_iplug((coord_t *) pointer);
  69758. +
  69759. + if (coord_is_existing_unit(pointer)) {
  69760. + item_plugin *iplug;
  69761. + reiser4_block_nr addr;
  69762. +
  69763. + if (item_is_internal(pointer)) {
  69764. + iplug = item_plugin_by_coord(pointer);
  69765. + assert("vs-513", iplug->s.internal.down_link);
  69766. + iplug->s.internal.down_link(pointer, NULL, &addr);
  69767. + /* check that cached value is correct */
  69768. + if (disk_addr_eq(&addr, znode_get_block(child))) {
  69769. + return NS_FOUND;
  69770. + }
  69771. + }
  69772. + }
  69773. + /* warning ("jmacd-1002", "tree pointer incorrect"); */
  69774. + return NS_NOT_FOUND;
  69775. +}
  69776. +
  69777. +/* find coord of pointer to new @child in @parent.
  69778. +
  69779. + Find the &coord_t in the @parent where pointer to a given @child will
  69780. + be in.
  69781. +
  69782. +*/
  69783. +int find_new_child_ptr(znode * parent /* parent znode, passed locked */ ,
  69784. + znode *
  69785. + child UNUSED_ARG /* child znode, passed locked */ ,
  69786. + znode * left /* left brother of new node */ ,
  69787. + coord_t * result /* where result is stored in */ )
  69788. +{
  69789. + int ret;
  69790. +
  69791. + assert("nikita-1486", parent != NULL);
  69792. + assert("nikita-1487", child != NULL);
  69793. + assert("nikita-1488", result != NULL);
  69794. +
  69795. + ret = find_child_ptr(parent, left, result);
  69796. + if (ret != NS_FOUND) {
  69797. + warning("nikita-1489", "Cannot find brother position: %i", ret);
  69798. + return RETERR(-EIO);
  69799. + } else {
  69800. + result->between = AFTER_UNIT;
  69801. + return RETERR(NS_NOT_FOUND);
  69802. + }
  69803. +}
  69804. +
  69805. +/* find coord of pointer to @child in @parent.
  69806. +
  69807. + Find the &coord_t in the @parent where pointer to a given @child is in.
  69808. +
  69809. +*/
  69810. +int find_child_ptr(znode * parent /* parent znode, passed locked */ ,
  69811. + znode * child /* child znode, passed locked */ ,
  69812. + coord_t * result /* where result is stored in */ )
  69813. +{
  69814. + int lookup_res;
  69815. + node_plugin *nplug;
  69816. + /* left delimiting key of a child */
  69817. + reiser4_key ld;
  69818. + reiser4_tree *tree;
  69819. +
  69820. + assert("nikita-934", parent != NULL);
  69821. + assert("nikita-935", child != NULL);
  69822. + assert("nikita-936", result != NULL);
  69823. + assert("zam-356", znode_is_loaded(parent));
  69824. +
  69825. + coord_init_zero(result);
  69826. + result->node = parent;
  69827. +
  69828. + nplug = parent->nplug;
  69829. + assert("nikita-939", nplug != NULL);
  69830. +
  69831. + tree = znode_get_tree(parent);
  69832. + /* NOTE-NIKITA taking read-lock on tree here assumes that @result is
  69833. + * not aliased to ->in_parent of some znode. Otherwise,
  69834. + * parent_coord_to_coord() below would modify data protected by tree
  69835. + * lock. */
  69836. + read_lock_tree(tree);
  69837. + /* fast path. Try to use cached value. Lock tree to keep
  69838. + node->pos_in_parent and pos->*_blocknr consistent. */
  69839. + if (child->in_parent.item_pos + 1 != 0) {
  69840. + parent_coord_to_coord(&child->in_parent, result);
  69841. + if (check_tree_pointer(result, child) == NS_FOUND) {
  69842. + read_unlock_tree(tree);
  69843. + return NS_FOUND;
  69844. + }
  69845. +
  69846. + child->in_parent.item_pos = (unsigned short)~0;
  69847. + }
  69848. + read_unlock_tree(tree);
  69849. +
  69850. + /* is above failed, find some key from @child. We are looking for the
  69851. + least key in a child. */
  69852. + read_lock_dk(tree);
  69853. + ld = *znode_get_ld_key(child);
  69854. + read_unlock_dk(tree);
  69855. + /*
  69856. + * now, lookup parent with key just found. Note, that left delimiting
  69857. + * key doesn't identify node uniquely, because (in extremely rare
  69858. + * case) two nodes can have equal left delimiting keys, if one of them
  69859. + * is completely filled with directory entries that all happened to be
  69860. + * hash collision. But, we check block number in check_tree_pointer()
  69861. + * and, so, are safe.
  69862. + */
  69863. + lookup_res = nplug->lookup(parent, &ld, FIND_EXACT, result);
  69864. + /* update cached pos_in_node */
  69865. + if (lookup_res == NS_FOUND) {
  69866. + write_lock_tree(tree);
  69867. + coord_to_parent_coord(result, &child->in_parent);
  69868. + write_unlock_tree(tree);
  69869. + lookup_res = check_tree_pointer(result, child);
  69870. + }
  69871. + if (lookup_res == NS_NOT_FOUND)
  69872. + lookup_res = find_child_by_addr(parent, child, result);
  69873. + return lookup_res;
  69874. +}
  69875. +
  69876. +/* find coord of pointer to @child in @parent by scanning
  69877. +
  69878. + Find the &coord_t in the @parent where pointer to a given @child
  69879. + is in by scanning all internal items in @parent and comparing block
  69880. + numbers in them with that of @child.
  69881. +
  69882. +*/
  69883. +static int find_child_by_addr(znode * parent /* parent znode, passed locked */ ,
  69884. + znode * child /* child znode, passed locked */ ,
  69885. + coord_t * result /* where result is stored in */ )
  69886. +{
  69887. + int ret;
  69888. +
  69889. + assert("nikita-1320", parent != NULL);
  69890. + assert("nikita-1321", child != NULL);
  69891. + assert("nikita-1322", result != NULL);
  69892. +
  69893. + ret = NS_NOT_FOUND;
  69894. +
  69895. + for_all_units(result, parent) {
  69896. + if (check_tree_pointer(result, child) == NS_FOUND) {
  69897. + write_lock_tree(znode_get_tree(parent));
  69898. + coord_to_parent_coord(result, &child->in_parent);
  69899. + write_unlock_tree(znode_get_tree(parent));
  69900. + ret = NS_FOUND;
  69901. + break;
  69902. + }
  69903. + }
  69904. + return ret;
  69905. +}
  69906. +
  69907. +/* true, if @addr is "unallocated block number", which is just address, with
  69908. + highest bit set. */
  69909. +int is_disk_addr_unallocated(const reiser4_block_nr * addr /* address to
  69910. + * check */ )
  69911. +{
  69912. + assert("nikita-1766", addr != NULL);
  69913. +
  69914. + return (*addr & REISER4_BLOCKNR_STATUS_BIT_MASK) ==
  69915. + REISER4_UNALLOCATED_STATUS_VALUE;
  69916. +}
  69917. +
  69918. +/* returns true if removing bytes of given range of key [from_key, to_key]
  69919. + causes removing of whole item @from */
  69920. +static int
  69921. +item_removed_completely(coord_t * from, const reiser4_key * from_key,
  69922. + const reiser4_key * to_key)
  69923. +{
  69924. + item_plugin *iplug;
  69925. + reiser4_key key_in_item;
  69926. +
  69927. + assert("umka-325", from != NULL);
  69928. + assert("", item_is_extent(from));
  69929. +
  69930. + /* check first key just for case */
  69931. + item_key_by_coord(from, &key_in_item);
  69932. + if (keygt(from_key, &key_in_item))
  69933. + return 0;
  69934. +
  69935. + /* check last key */
  69936. + iplug = item_plugin_by_coord(from);
  69937. + assert("vs-611", iplug && iplug->s.file.append_key);
  69938. +
  69939. + iplug->s.file.append_key(from, &key_in_item);
  69940. + set_key_offset(&key_in_item, get_key_offset(&key_in_item) - 1);
  69941. +
  69942. + if (keylt(to_key, &key_in_item))
  69943. + /* last byte is not removed */
  69944. + return 0;
  69945. + return 1;
  69946. +}
  69947. +
  69948. +/* helper function for prepare_twig_kill(): @left and @right are formatted
  69949. + * neighbors of extent item being completely removed. Load and lock neighbors
  69950. + * and store lock handles into @cdata for later use by kill_hook_extent() */
  69951. +static int
  69952. +prepare_children(znode * left, znode * right, carry_kill_data * kdata)
  69953. +{
  69954. + int result;
  69955. + int left_loaded;
  69956. + int right_loaded;
  69957. +
  69958. + result = 0;
  69959. + left_loaded = right_loaded = 0;
  69960. +
  69961. + if (left != NULL) {
  69962. + result = zload(left);
  69963. + if (result == 0) {
  69964. + left_loaded = 1;
  69965. + result = longterm_lock_znode(kdata->left, left,
  69966. + ZNODE_READ_LOCK,
  69967. + ZNODE_LOCK_LOPRI);
  69968. + }
  69969. + }
  69970. + if (result == 0 && right != NULL) {
  69971. + result = zload(right);
  69972. + if (result == 0) {
  69973. + right_loaded = 1;
  69974. + result = longterm_lock_znode(kdata->right, right,
  69975. + ZNODE_READ_LOCK,
  69976. + ZNODE_LOCK_HIPRI |
  69977. + ZNODE_LOCK_NONBLOCK);
  69978. + }
  69979. + }
  69980. + if (result != 0) {
  69981. + done_lh(kdata->left);
  69982. + done_lh(kdata->right);
  69983. + if (left_loaded != 0)
  69984. + zrelse(left);
  69985. + if (right_loaded != 0)
  69986. + zrelse(right);
  69987. + }
  69988. + return result;
  69989. +}
  69990. +
  69991. +static void done_children(carry_kill_data * kdata)
  69992. +{
  69993. + if (kdata->left != NULL && kdata->left->node != NULL) {
  69994. + zrelse(kdata->left->node);
  69995. + done_lh(kdata->left);
  69996. + }
  69997. + if (kdata->right != NULL && kdata->right->node != NULL) {
  69998. + zrelse(kdata->right->node);
  69999. + done_lh(kdata->right);
  70000. + }
  70001. +}
  70002. +
  70003. +/* part of cut_node. It is called when cut_node is called to remove or cut part
  70004. + of extent item. When head of that item is removed - we have to update right
  70005. + delimiting of left neighbor of extent. When item is removed completely - we
  70006. + have to set sibling link between left and right neighbor of removed
  70007. + extent. This may return -E_DEADLOCK because of trying to get left neighbor
  70008. + locked. So, caller should repeat an attempt
  70009. +*/
  70010. +/* Audited by: umka (2002.06.16) */
  70011. +static int
  70012. +prepare_twig_kill(carry_kill_data * kdata, znode * locked_left_neighbor)
  70013. +{
  70014. + int result;
  70015. + reiser4_key key;
  70016. + lock_handle left_lh;
  70017. + lock_handle right_lh;
  70018. + coord_t left_coord;
  70019. + coord_t *from;
  70020. + znode *left_child;
  70021. + znode *right_child;
  70022. + reiser4_tree *tree;
  70023. + int left_zloaded_here, right_zloaded_here;
  70024. +
  70025. + from = kdata->params.from;
  70026. + assert("umka-326", from != NULL);
  70027. + assert("umka-327", kdata->params.to != NULL);
  70028. +
  70029. + /* for one extent item only yet */
  70030. + assert("vs-591", item_is_extent(from));
  70031. + assert("vs-592", from->item_pos == kdata->params.to->item_pos);
  70032. +
  70033. + if ((kdata->params.from_key
  70034. + && keygt(kdata->params.from_key, item_key_by_coord(from, &key)))
  70035. + || from->unit_pos != 0) {
  70036. + /* head of item @from is not removed, there is nothing to
  70037. + worry about */
  70038. + return 0;
  70039. + }
  70040. +
  70041. + result = 0;
  70042. + left_zloaded_here = 0;
  70043. + right_zloaded_here = 0;
  70044. +
  70045. + left_child = right_child = NULL;
  70046. +
  70047. + coord_dup(&left_coord, from);
  70048. + init_lh(&left_lh);
  70049. + init_lh(&right_lh);
  70050. + if (coord_prev_unit(&left_coord)) {
  70051. + /* @from is leftmost item in its node */
  70052. + if (!locked_left_neighbor) {
  70053. + result =
  70054. + reiser4_get_left_neighbor(&left_lh, from->node,
  70055. + ZNODE_READ_LOCK,
  70056. + GN_CAN_USE_UPPER_LEVELS);
  70057. + switch (result) {
  70058. + case 0:
  70059. + break;
  70060. + case -E_NO_NEIGHBOR:
  70061. + /* there is no formatted node to the left of
  70062. + from->node */
  70063. + warning("vs-605",
  70064. + "extent item has smallest key in "
  70065. + "the tree and it is about to be removed");
  70066. + return 0;
  70067. + case -E_DEADLOCK:
  70068. + /* need to restart */
  70069. + default:
  70070. + return result;
  70071. + }
  70072. +
  70073. + /* we have acquired left neighbor of from->node */
  70074. + result = zload(left_lh.node);
  70075. + if (result)
  70076. + goto done;
  70077. +
  70078. + locked_left_neighbor = left_lh.node;
  70079. + } else {
  70080. + /* squalloc_right_twig_cut should have supplied locked
  70081. + * left neighbor */
  70082. + assert("vs-834",
  70083. + znode_is_write_locked(locked_left_neighbor));
  70084. + result = zload(locked_left_neighbor);
  70085. + if (result)
  70086. + return result;
  70087. + }
  70088. +
  70089. + left_zloaded_here = 1;
  70090. + coord_init_last_unit(&left_coord, locked_left_neighbor);
  70091. + }
  70092. +
  70093. + if (!item_is_internal(&left_coord)) {
  70094. + /* what else but extent can be on twig level */
  70095. + assert("vs-606", item_is_extent(&left_coord));
  70096. +
  70097. + /* there is no left formatted child */
  70098. + if (left_zloaded_here)
  70099. + zrelse(locked_left_neighbor);
  70100. + done_lh(&left_lh);
  70101. + return 0;
  70102. + }
  70103. +
  70104. + tree = znode_get_tree(left_coord.node);
  70105. + left_child = child_znode(&left_coord, left_coord.node, 1, 0);
  70106. +
  70107. + if (IS_ERR(left_child)) {
  70108. + result = PTR_ERR(left_child);
  70109. + goto done;
  70110. + }
  70111. +
  70112. + /* left child is acquired, calculate new right delimiting key for it
  70113. + and get right child if it is necessary */
  70114. + if (item_removed_completely
  70115. + (from, kdata->params.from_key, kdata->params.to_key)) {
  70116. + /* try to get right child of removed item */
  70117. + coord_t right_coord;
  70118. +
  70119. + assert("vs-607",
  70120. + kdata->params.to->unit_pos ==
  70121. + coord_last_unit_pos(kdata->params.to));
  70122. + coord_dup(&right_coord, kdata->params.to);
  70123. + if (coord_next_unit(&right_coord)) {
  70124. + /* @to is rightmost unit in the node */
  70125. + result =
  70126. + reiser4_get_right_neighbor(&right_lh, from->node,
  70127. + ZNODE_READ_LOCK,
  70128. + GN_CAN_USE_UPPER_LEVELS);
  70129. + switch (result) {
  70130. + case 0:
  70131. + result = zload(right_lh.node);
  70132. + if (result)
  70133. + goto done;
  70134. +
  70135. + right_zloaded_here = 1;
  70136. + coord_init_first_unit(&right_coord,
  70137. + right_lh.node);
  70138. + item_key_by_coord(&right_coord, &key);
  70139. + break;
  70140. +
  70141. + case -E_NO_NEIGHBOR:
  70142. + /* there is no formatted node to the right of
  70143. + from->node */
  70144. + read_lock_dk(tree);
  70145. + key = *znode_get_rd_key(from->node);
  70146. + read_unlock_dk(tree);
  70147. + right_coord.node = NULL;
  70148. + result = 0;
  70149. + break;
  70150. + default:
  70151. + /* real error */
  70152. + goto done;
  70153. + }
  70154. + } else {
  70155. + /* there is an item to the right of @from - take its key */
  70156. + item_key_by_coord(&right_coord, &key);
  70157. + }
  70158. +
  70159. + /* try to get right child of @from */
  70160. + if (right_coord.node && /* there is right neighbor of @from */
  70161. + item_is_internal(&right_coord)) { /* it is internal item */
  70162. + right_child = child_znode(&right_coord,
  70163. + right_coord.node, 1, 0);
  70164. +
  70165. + if (IS_ERR(right_child)) {
  70166. + result = PTR_ERR(right_child);
  70167. + goto done;
  70168. + }
  70169. +
  70170. + }
  70171. + /* whole extent is removed between znodes left_child and right_child. Prepare them for linking and
  70172. + update of right delimiting key of left_child */
  70173. + result = prepare_children(left_child, right_child, kdata);
  70174. + } else {
  70175. + /* head of item @to is removed. left_child has to get right delimting key update. Prepare it for that */
  70176. + result = prepare_children(left_child, NULL, kdata);
  70177. + }
  70178. +
  70179. + done:
  70180. + if (right_child)
  70181. + zput(right_child);
  70182. + if (right_zloaded_here)
  70183. + zrelse(right_lh.node);
  70184. + done_lh(&right_lh);
  70185. +
  70186. + if (left_child)
  70187. + zput(left_child);
  70188. + if (left_zloaded_here)
  70189. + zrelse(locked_left_neighbor);
  70190. + done_lh(&left_lh);
  70191. + return result;
  70192. +}
  70193. +
  70194. +/* this is used to remove part of node content between coordinates @from and @to. Units to which @from and @to are set
  70195. + are to be cut completely */
  70196. +/* for try_to_merge_with_left, delete_copied, reiser4_delete_node */
  70197. +int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key, /* first key to be removed */
  70198. + const reiser4_key * to_key, /* last key to be removed */
  70199. + reiser4_key *
  70200. + smallest_removed /* smallest key actually removed */ )
  70201. +{
  70202. + int result;
  70203. + carry_pool *pool;
  70204. + carry_level *lowest_level;
  70205. + carry_cut_data *cut_data;
  70206. + carry_op *op;
  70207. +
  70208. + assert("vs-1715", coord_compare(from, to) != COORD_CMP_ON_RIGHT);
  70209. +
  70210. + pool =
  70211. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  70212. + sizeof(*cut_data));
  70213. + if (IS_ERR(pool))
  70214. + return PTR_ERR(pool);
  70215. + lowest_level = (carry_level *) (pool + 1);
  70216. + init_carry_level(lowest_level, pool);
  70217. +
  70218. + op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0);
  70219. + assert("vs-1509", op != 0);
  70220. + if (IS_ERR(op)) {
  70221. + done_carry_pool(pool);
  70222. + return PTR_ERR(op);
  70223. + }
  70224. +
  70225. + cut_data = (carry_cut_data *) (lowest_level + 3);
  70226. + cut_data->params.from = from;
  70227. + cut_data->params.to = to;
  70228. + cut_data->params.from_key = from_key;
  70229. + cut_data->params.to_key = to_key;
  70230. + cut_data->params.smallest_removed = smallest_removed;
  70231. +
  70232. + op->u.cut_or_kill.is_cut = 1;
  70233. + op->u.cut_or_kill.u.cut = cut_data;
  70234. +
  70235. + result = reiser4_carry(lowest_level, NULL);
  70236. + done_carry_pool(pool);
  70237. +
  70238. + return result;
  70239. +}
  70240. +
  70241. +/* cut part of the node
  70242. +
  70243. + Cut part or whole content of node.
  70244. +
  70245. + cut data between @from and @to of @from->node and call carry() to make
  70246. + corresponding changes in the tree. @from->node may become empty. If so -
  70247. + pointer to it will be removed. Neighboring nodes are not changed. Smallest
  70248. + removed key is stored in @smallest_removed
  70249. +
  70250. +*/
  70251. +int kill_node_content(coord_t * from, /* coord of the first unit/item that will be eliminated */
  70252. + coord_t * to, /* coord of the last unit/item that will be eliminated */
  70253. + const reiser4_key * from_key, /* first key to be removed */
  70254. + const reiser4_key * to_key, /* last key to be removed */
  70255. + reiser4_key * smallest_removed, /* smallest key actually removed */
  70256. + znode * locked_left_neighbor, /* this is set when kill_node_content is called with left neighbor
  70257. + * locked (in squalloc_right_twig_cut, namely) */
  70258. + struct inode *inode, /* inode of file whose item (or its part) is to be killed. This is necessary to
  70259. + invalidate pages together with item pointing to them */
  70260. + int truncate)
  70261. +{ /* this call is made for file truncate) */
  70262. + int result;
  70263. + carry_pool *pool;
  70264. + carry_level *lowest_level;
  70265. + carry_kill_data *kdata;
  70266. + lock_handle *left_child;
  70267. + lock_handle *right_child;
  70268. + carry_op *op;
  70269. +
  70270. + assert("umka-328", from != NULL);
  70271. + assert("vs-316", !node_is_empty(from->node));
  70272. + assert("nikita-1812", coord_is_existing_unit(from)
  70273. + && coord_is_existing_unit(to));
  70274. +
  70275. + /* allocate carry_pool, 3 carry_level-s, carry_kill_data and structures for kill_hook_extent */
  70276. + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  70277. + sizeof(carry_kill_data) +
  70278. + 2 * sizeof(lock_handle) +
  70279. + 5 * sizeof(reiser4_key) + 2 * sizeof(coord_t));
  70280. + if (IS_ERR(pool))
  70281. + return PTR_ERR(pool);
  70282. +
  70283. + lowest_level = (carry_level *) (pool + 1);
  70284. + init_carry_level(lowest_level, pool);
  70285. +
  70286. + kdata = (carry_kill_data *) (lowest_level + 3);
  70287. + left_child = (lock_handle *) (kdata + 1);
  70288. + right_child = left_child + 1;
  70289. +
  70290. + init_lh(left_child);
  70291. + init_lh(right_child);
  70292. +
  70293. + kdata->params.from = from;
  70294. + kdata->params.to = to;
  70295. + kdata->params.from_key = from_key;
  70296. + kdata->params.to_key = to_key;
  70297. + kdata->params.smallest_removed = smallest_removed;
  70298. + kdata->params.truncate = truncate;
  70299. + kdata->flags = 0;
  70300. + kdata->inode = inode;
  70301. + kdata->left = left_child;
  70302. + kdata->right = right_child;
  70303. + /* memory for 5 reiser4_key and 2 coord_t will be used in kill_hook_extent */
  70304. + kdata->buf = (char *)(right_child + 1);
  70305. +
  70306. + if (znode_get_level(from->node) == TWIG_LEVEL && item_is_extent(from)) {
  70307. + /* left child of extent item may have to get updated right
  70308. + delimiting key and to get linked with right child of extent
  70309. + @from if it will be removed completely */
  70310. + result = prepare_twig_kill(kdata, locked_left_neighbor);
  70311. + if (result) {
  70312. + done_children(kdata);
  70313. + done_carry_pool(pool);
  70314. + return result;
  70315. + }
  70316. + }
  70317. +
  70318. + op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0);
  70319. + if (IS_ERR(op) || (op == NULL)) {
  70320. + done_children(kdata);
  70321. + done_carry_pool(pool);
  70322. + return RETERR(op ? PTR_ERR(op) : -EIO);
  70323. + }
  70324. +
  70325. + op->u.cut_or_kill.is_cut = 0;
  70326. + op->u.cut_or_kill.u.kill = kdata;
  70327. +
  70328. + result = reiser4_carry(lowest_level, NULL);
  70329. +
  70330. + done_children(kdata);
  70331. + done_carry_pool(pool);
  70332. + return result;
  70333. +}
  70334. +
  70335. +void
  70336. +fake_kill_hook_tail(struct inode *inode, loff_t start, loff_t end, int truncate)
  70337. +{
  70338. + if (reiser4_inode_get_flag(inode, REISER4_HAS_MMAP)) {
  70339. + pgoff_t start_pg, end_pg;
  70340. +
  70341. + start_pg = start >> PAGE_SHIFT;
  70342. + end_pg = (end - 1) >> PAGE_SHIFT;
  70343. +
  70344. + if ((start & (PAGE_SIZE - 1)) == 0) {
  70345. + /*
  70346. + * kill up to the page boundary.
  70347. + */
  70348. + assert("vs-123456", start_pg == end_pg);
  70349. + reiser4_invalidate_pages(inode->i_mapping, start_pg, 1,
  70350. + truncate);
  70351. + } else if (start_pg != end_pg) {
  70352. + /*
  70353. + * page boundary is within killed portion of node.
  70354. + */
  70355. + assert("vs-654321", end_pg - start_pg == 1);
  70356. + reiser4_invalidate_pages(inode->i_mapping, end_pg,
  70357. + end_pg - start_pg, 1);
  70358. + }
  70359. + }
  70360. + inode_sub_bytes(inode, end - start);
  70361. +}
  70362. +
  70363. +/**
  70364. + * Delete whole @node from the reiser4 tree without loading it.
  70365. + *
  70366. + * @left: locked left neighbor,
  70367. + * @node: node to be deleted,
  70368. + * @smallest_removed: leftmost key of deleted node,
  70369. + * @object: inode pointer, if we truncate a file body.
  70370. + * @truncate: true if called for file truncate.
  70371. + *
  70372. + * @return: 0 if success, error code otherwise.
  70373. + *
  70374. + * NOTE: if @object!=NULL we assume that @smallest_removed != NULL and it
  70375. + * contains the right value of the smallest removed key from the previous
  70376. + * cut_worker() iteration. This is needed for proper accounting of
  70377. + * "i_blocks" and "i_bytes" fields of the @object.
  70378. + */
  70379. +int reiser4_delete_node(znode * node, reiser4_key * smallest_removed,
  70380. + struct inode *object, int truncate)
  70381. +{
  70382. + lock_handle parent_lock;
  70383. + coord_t cut_from;
  70384. + coord_t cut_to;
  70385. + reiser4_tree *tree;
  70386. + int ret;
  70387. +
  70388. + assert("zam-937", node != NULL);
  70389. + assert("zam-933", znode_is_write_locked(node));
  70390. + assert("zam-999", smallest_removed != NULL);
  70391. +
  70392. + init_lh(&parent_lock);
  70393. +
  70394. + ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK);
  70395. + if (ret)
  70396. + return ret;
  70397. +
  70398. + assert("zam-934", !znode_above_root(parent_lock.node));
  70399. +
  70400. + ret = zload(parent_lock.node);
  70401. + if (ret)
  70402. + goto failed_nozrelse;
  70403. +
  70404. + ret = find_child_ptr(parent_lock.node, node, &cut_from);
  70405. + if (ret)
  70406. + goto failed;
  70407. +
  70408. + /* decrement child counter and set parent pointer to NULL before
  70409. + deleting the list from parent node because of checks in
  70410. + internal_kill_item_hook (we can delete the last item from the parent
  70411. + node, the parent node is going to be deleted and its c_count should
  70412. + be zero). */
  70413. +
  70414. + tree = znode_get_tree(node);
  70415. + write_lock_tree(tree);
  70416. + init_parent_coord(&node->in_parent, NULL);
  70417. + --parent_lock.node->c_count;
  70418. + write_unlock_tree(tree);
  70419. +
  70420. + assert("zam-989", item_is_internal(&cut_from));
  70421. +
  70422. + /* @node should be deleted after unlocking. */
  70423. + ZF_SET(node, JNODE_HEARD_BANSHEE);
  70424. +
  70425. + /* remove a pointer from the parent node to the node being deleted. */
  70426. + coord_dup(&cut_to, &cut_from);
  70427. + /* FIXME: shouldn't this be kill_node_content */
  70428. + ret = cut_node_content(&cut_from, &cut_to, NULL, NULL, NULL);
  70429. + if (ret)
  70430. + /* FIXME(Zam): Should we re-connect the node to its parent if
  70431. + * cut_node fails? */
  70432. + goto failed;
  70433. +
  70434. + {
  70435. + reiser4_tree *tree = current_tree;
  70436. + __u64 start_offset = 0, end_offset = 0;
  70437. +
  70438. + read_lock_tree(tree);
  70439. + write_lock_dk(tree);
  70440. + if (object) {
  70441. + /* We use @smallest_removed and the left delimiting of
  70442. + * the current node for @object->i_blocks, i_bytes
  70443. + * calculation. We assume that the items after the
  70444. + * *@smallest_removed key have been deleted from the
  70445. + * file body. */
  70446. + start_offset = get_key_offset(znode_get_ld_key(node));
  70447. + end_offset = get_key_offset(smallest_removed);
  70448. + }
  70449. +
  70450. + assert("zam-1021", znode_is_connected(node));
  70451. + if (node->left)
  70452. + znode_set_rd_key(node->left, znode_get_rd_key(node));
  70453. +
  70454. + *smallest_removed = *znode_get_ld_key(node);
  70455. +
  70456. + write_unlock_dk(tree);
  70457. + read_unlock_tree(tree);
  70458. +
  70459. + if (object) {
  70460. + /* we used to perform actions which are to be performed on items on their removal from tree in
  70461. + special item method - kill_hook. Here for optimization reasons we avoid reading node
  70462. + containing item we remove and can not call item's kill hook. Instead we call function which
  70463. + does exactly the same things as tail kill hook in assumption that node we avoid reading
  70464. + contains only one item and that item is a tail one. */
  70465. + fake_kill_hook_tail(object, start_offset, end_offset,
  70466. + truncate);
  70467. + }
  70468. + }
  70469. + failed:
  70470. + zrelse(parent_lock.node);
  70471. + failed_nozrelse:
  70472. + done_lh(&parent_lock);
  70473. +
  70474. + return ret;
  70475. +}
  70476. +
  70477. +static int can_delete(const reiser4_key *key, znode *node)
  70478. +{
  70479. + int result;
  70480. +
  70481. + read_lock_dk(current_tree);
  70482. + result = keyle(key, znode_get_ld_key(node));
  70483. + read_unlock_dk(current_tree);
  70484. + return result;
  70485. +}
  70486. +
  70487. +/**
  70488. + * This subroutine is not optimal but implementation seems to
  70489. + * be easier).
  70490. + *
  70491. + * @tap: the point deletion process begins from,
  70492. + * @from_key: the beginning of the deleted key range,
  70493. + * @to_key: the end of the deleted key range,
  70494. + * @smallest_removed: the smallest removed key,
  70495. + * @truncate: true if called for file truncate.
  70496. + * @progress: return true if a progress in file items deletions was made,
  70497. + * @smallest_removed value is actual in that case.
  70498. + *
  70499. + * @return: 0 if success, error code otherwise, -E_REPEAT means that long
  70500. + * reiser4_cut_tree operation was interrupted for allowing atom commit.
  70501. + */
  70502. +int
  70503. +cut_tree_worker_common(tap_t * tap, const reiser4_key * from_key,
  70504. + const reiser4_key * to_key,
  70505. + reiser4_key * smallest_removed, struct inode *object,
  70506. + int truncate, int *progress)
  70507. +{
  70508. + lock_handle next_node_lock;
  70509. + coord_t left_coord;
  70510. + int result;
  70511. +
  70512. + assert("zam-931", tap->coord->node != NULL);
  70513. + assert("zam-932", znode_is_write_locked(tap->coord->node));
  70514. +
  70515. + *progress = 0;
  70516. + init_lh(&next_node_lock);
  70517. +
  70518. + while (1) {
  70519. + znode *node; /* node from which items are cut */
  70520. + node_plugin *nplug; /* node plugin for @node */
  70521. +
  70522. + node = tap->coord->node;
  70523. +
  70524. + /* Move next_node_lock to the next node on the left. */
  70525. + result =
  70526. + reiser4_get_left_neighbor(&next_node_lock, node,
  70527. + ZNODE_WRITE_LOCK,
  70528. + GN_CAN_USE_UPPER_LEVELS);
  70529. + if (result != 0 && result != -E_NO_NEIGHBOR)
  70530. + break;
  70531. + /* Check can we delete the node as a whole. */
  70532. + if (*progress && znode_get_level(node) == LEAF_LEVEL &&
  70533. + can_delete(from_key, node)) {
  70534. + result = reiser4_delete_node(node, smallest_removed,
  70535. + object, truncate);
  70536. + } else {
  70537. + result = reiser4_tap_load(tap);
  70538. + if (result)
  70539. + return result;
  70540. +
  70541. + /* Prepare the second (right) point for cut_node() */
  70542. + if (*progress)
  70543. + coord_init_last_unit(tap->coord, node);
  70544. +
  70545. + else if (item_plugin_by_coord(tap->coord)->b.lookup ==
  70546. + NULL)
  70547. + /* set rightmost unit for the items without lookup method */
  70548. + tap->coord->unit_pos =
  70549. + coord_last_unit_pos(tap->coord);
  70550. +
  70551. + nplug = node->nplug;
  70552. +
  70553. + assert("vs-686", nplug);
  70554. + assert("vs-687", nplug->lookup);
  70555. +
  70556. + /* left_coord is leftmost unit cut from @node */
  70557. + result = nplug->lookup(node, from_key,
  70558. + FIND_MAX_NOT_MORE_THAN,
  70559. + &left_coord);
  70560. +
  70561. + if (IS_CBKERR(result))
  70562. + break;
  70563. +
  70564. + /* adjust coordinates so that they are set to existing units */
  70565. + if (coord_set_to_right(&left_coord)
  70566. + || coord_set_to_left(tap->coord)) {
  70567. + result = 0;
  70568. + break;
  70569. + }
  70570. +
  70571. + if (coord_compare(&left_coord, tap->coord) ==
  70572. + COORD_CMP_ON_RIGHT) {
  70573. + /* keys from @from_key to @to_key are not in the tree */
  70574. + result = 0;
  70575. + break;
  70576. + }
  70577. +
  70578. + if (left_coord.item_pos != tap->coord->item_pos) {
  70579. + /* do not allow to cut more than one item. It is added to solve problem of truncating
  70580. + partially converted files. If file is partially converted there may exist a twig node
  70581. + containing both internal item or items pointing to leaf nodes with formatting items
  70582. + and extent item. We do not want to kill internal items being at twig node here
  70583. + because cut_tree_worker assumes killing them from level level */
  70584. + coord_dup(&left_coord, tap->coord);
  70585. + assert("vs-1652",
  70586. + coord_is_existing_unit(&left_coord));
  70587. + left_coord.unit_pos = 0;
  70588. + }
  70589. +
  70590. + /* cut data from one node */
  70591. + /* *smallest_removed = *reiser4_min_key(); */
  70592. + result =
  70593. + kill_node_content(&left_coord, tap->coord, from_key,
  70594. + to_key, smallest_removed,
  70595. + next_node_lock.node, object,
  70596. + truncate);
  70597. + reiser4_tap_relse(tap);
  70598. + }
  70599. + if (result)
  70600. + break;
  70601. +
  70602. + ++(*progress);
  70603. +
  70604. + /* Check whether all items with keys >= from_key were removed
  70605. + * from the tree. */
  70606. + if (keyle(smallest_removed, from_key))
  70607. + /* result = 0; */
  70608. + break;
  70609. +
  70610. + if (next_node_lock.node == NULL)
  70611. + break;
  70612. +
  70613. + result = reiser4_tap_move(tap, &next_node_lock);
  70614. + done_lh(&next_node_lock);
  70615. + if (result)
  70616. + break;
  70617. +
  70618. + /* Break long reiser4_cut_tree operation (deletion of a large
  70619. + file) if atom requires commit. */
  70620. + if (*progress > CUT_TREE_MIN_ITERATIONS
  70621. + && current_atom_should_commit()) {
  70622. + result = -E_REPEAT;
  70623. + break;
  70624. + }
  70625. + }
  70626. + done_lh(&next_node_lock);
  70627. + /* assert("vs-301", !keyeq(&smallest_removed, reiser4_min_key())); */
  70628. + return result;
  70629. +}
  70630. +
  70631. +/* there is a fundamental problem with optimizing deletes: VFS does it
  70632. + one file at a time. Another problem is that if an item can be
  70633. + anything, then deleting items must be done one at a time. It just
  70634. + seems clean to writes this to specify a from and a to key, and cut
  70635. + everything between them though. */
  70636. +
  70637. +/* use this function with care if deleting more than what is part of a single file. */
  70638. +/* do not use this when cutting a single item, it is suboptimal for that */
  70639. +
  70640. +/* You are encouraged to write plugin specific versions of this. It
  70641. + cannot be optimal for all plugins because it works item at a time,
  70642. + and some plugins could sometimes work node at a time. Regular files
  70643. + however are not optimizable to work node at a time because of
  70644. + extents needing to free the blocks they point to.
  70645. +
  70646. + Optimizations compared to v3 code:
  70647. +
  70648. + It does not balance (that task is left to memory pressure code).
  70649. +
  70650. + Nodes are deleted only if empty.
  70651. +
  70652. + Uses extents.
  70653. +
  70654. + Performs read-ahead of formatted nodes whose contents are part of
  70655. + the deletion.
  70656. +*/
  70657. +
  70658. +/**
  70659. + * Delete everything from the reiser4 tree between two keys: @from_key and
  70660. + * @to_key.
  70661. + *
  70662. + * @from_key: the beginning of the deleted key range,
  70663. + * @to_key: the end of the deleted key range,
  70664. + * @smallest_removed: the smallest removed key,
  70665. + * @object: owner of cutting items.
  70666. + * @truncate: true if called for file truncate.
  70667. + * @progress: return true if a progress in file items deletions was made,
  70668. + * @smallest_removed value is actual in that case.
  70669. + *
  70670. + * @return: 0 if success, error code otherwise, -E_REPEAT means that long cut_tree
  70671. + * operation was interrupted for allowing atom commit .
  70672. + */
  70673. +
  70674. +int reiser4_cut_tree_object(reiser4_tree * tree, const reiser4_key * from_key,
  70675. + const reiser4_key * to_key,
  70676. + reiser4_key * smallest_removed_p,
  70677. + struct inode *object, int truncate, int *progress)
  70678. +{
  70679. + lock_handle lock;
  70680. + int result;
  70681. + tap_t tap;
  70682. + coord_t right_coord;
  70683. + reiser4_key smallest_removed;
  70684. + int (*cut_tree_worker) (tap_t *, const reiser4_key *,
  70685. + const reiser4_key *, reiser4_key *,
  70686. + struct inode *, int, int *);
  70687. + STORE_COUNTERS;
  70688. +
  70689. + assert("umka-329", tree != NULL);
  70690. + assert("umka-330", from_key != NULL);
  70691. + assert("umka-331", to_key != NULL);
  70692. + assert("zam-936", keyle(from_key, to_key));
  70693. +
  70694. + if (smallest_removed_p == NULL)
  70695. + smallest_removed_p = &smallest_removed;
  70696. +
  70697. + init_lh(&lock);
  70698. +
  70699. + do {
  70700. + /* Find rightmost item to cut away from the tree. */
  70701. + result = reiser4_object_lookup(object, to_key, &right_coord,
  70702. + &lock, ZNODE_WRITE_LOCK,
  70703. + FIND_MAX_NOT_MORE_THAN,
  70704. + TWIG_LEVEL, LEAF_LEVEL,
  70705. + CBK_UNIQUE, NULL /*ra_info */);
  70706. + if (result != CBK_COORD_FOUND)
  70707. + break;
  70708. + if (object == NULL
  70709. + || inode_file_plugin(object)->cut_tree_worker == NULL)
  70710. + cut_tree_worker = cut_tree_worker_common;
  70711. + else
  70712. + cut_tree_worker =
  70713. + inode_file_plugin(object)->cut_tree_worker;
  70714. + reiser4_tap_init(&tap, &right_coord, &lock, ZNODE_WRITE_LOCK);
  70715. + result =
  70716. + cut_tree_worker(&tap, from_key, to_key, smallest_removed_p,
  70717. + object, truncate, progress);
  70718. + reiser4_tap_done(&tap);
  70719. +
  70720. + reiser4_preempt_point();
  70721. +
  70722. + } while (0);
  70723. +
  70724. + done_lh(&lock);
  70725. +
  70726. + if (result) {
  70727. + switch (result) {
  70728. + case -E_NO_NEIGHBOR:
  70729. + result = 0;
  70730. + break;
  70731. + case -E_DEADLOCK:
  70732. + result = -E_REPEAT;
  70733. + case -E_REPEAT:
  70734. + case -ENOMEM:
  70735. + case -ENOENT:
  70736. + break;
  70737. + default:
  70738. + warning("nikita-2861", "failure: %i", result);
  70739. + }
  70740. + }
  70741. +
  70742. + CHECK_COUNTERS;
  70743. + return result;
  70744. +}
  70745. +
  70746. +/* repeat reiser4_cut_tree_object until everything is deleted.
  70747. + * unlike cut_file_items, it does not end current transaction if -E_REPEAT
  70748. + * is returned by cut_tree_object. */
  70749. +int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from,
  70750. + const reiser4_key * to, struct inode *inode, int truncate)
  70751. +{
  70752. + int result;
  70753. + int progress;
  70754. +
  70755. + do {
  70756. + result = reiser4_cut_tree_object(tree, from, to, NULL,
  70757. + inode, truncate, &progress);
  70758. + } while (result == -E_REPEAT);
  70759. +
  70760. + return result;
  70761. +}
  70762. +
  70763. +/* finishing reiser4 initialization */
  70764. +int reiser4_init_tree(reiser4_tree * tree /* pointer to structure being
  70765. + * initialized */ ,
  70766. + const reiser4_block_nr * root_block /* address of a root block
  70767. + * on a disk */ ,
  70768. + tree_level height /* height of a tree */ ,
  70769. + node_plugin * nplug /* default node plugin */ )
  70770. +{
  70771. + int result;
  70772. +
  70773. + assert("nikita-306", tree != NULL);
  70774. + assert("nikita-307", root_block != NULL);
  70775. + assert("nikita-308", height > 0);
  70776. + assert("nikita-309", nplug != NULL);
  70777. + assert("zam-587", tree->super != NULL);
  70778. + assert("edward-171", get_current_context() != NULL);
  70779. + /*
  70780. + * We'll perform costly memory allocations for znode hash table, etc.
  70781. + * So, set proper allocation flags
  70782. + */
  70783. + get_current_context()->gfp_mask |= (__GFP_NOWARN);
  70784. +
  70785. + tree->root_block = *root_block;
  70786. + tree->height = height;
  70787. + tree->estimate_one_insert = calc_estimate_one_insert(height);
  70788. + tree->nplug = nplug;
  70789. +
  70790. + tree->znode_epoch = 1ull;
  70791. +
  70792. + cbk_cache_init(&tree->cbk_cache);
  70793. +
  70794. + result = znodes_tree_init(tree);
  70795. + if (result == 0)
  70796. + result = jnodes_tree_init(tree);
  70797. + if (result == 0) {
  70798. + tree->uber = zget(tree, &UBER_TREE_ADDR, NULL, 0,
  70799. + reiser4_ctx_gfp_mask_get());
  70800. + if (IS_ERR(tree->uber)) {
  70801. + result = PTR_ERR(tree->uber);
  70802. + tree->uber = NULL;
  70803. + }
  70804. + }
  70805. + return result;
  70806. +}
  70807. +
  70808. +/* release resources associated with @tree */
  70809. +void reiser4_done_tree(reiser4_tree * tree /* tree to release */ )
  70810. +{
  70811. + if (tree == NULL)
  70812. + return;
  70813. +
  70814. + if (tree->uber != NULL) {
  70815. + zput(tree->uber);
  70816. + tree->uber = NULL;
  70817. + }
  70818. + znodes_tree_done(tree);
  70819. + jnodes_tree_done(tree);
  70820. + cbk_cache_done(&tree->cbk_cache);
  70821. +}
  70822. +
  70823. +/* Make Linus happy.
  70824. + Local variables:
  70825. + c-indentation-style: "K&R"
  70826. + mode-name: "LC"
  70827. + c-basic-offset: 8
  70828. + tab-width: 8
  70829. + fill-column: 120
  70830. + scroll-step: 1
  70831. + End:
  70832. +*/
  70833. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/tree.h linux-5.16.14/fs/reiser4/tree.h
  70834. --- linux-5.16.14.orig/fs/reiser4/tree.h 1970-01-01 01:00:00.000000000 +0100
  70835. +++ linux-5.16.14/fs/reiser4/tree.h 2022-03-12 13:26:19.689892818 +0100
  70836. @@ -0,0 +1,579 @@
  70837. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  70838. + * reiser4/README */
  70839. +
  70840. +/* Tree operations. See fs/reiser4/tree.c for comments */
  70841. +
  70842. +#if !defined( __REISER4_TREE_H__ )
  70843. +#define __REISER4_TREE_H__
  70844. +
  70845. +#include "forward.h"
  70846. +#include "debug.h"
  70847. +#include "dformat.h"
  70848. +#include "plugin/node/node.h"
  70849. +#include "plugin/plugin.h"
  70850. +#include "znode.h"
  70851. +#include "tap.h"
  70852. +
  70853. +#include <linux/types.h> /* for __u?? */
  70854. +#include <linux/fs.h> /* for struct super_block */
  70855. +#include <linux/spinlock.h>
  70856. +#include <linux/sched.h> /* for struct task_struct */
  70857. +
  70858. +/* fictive block number never actually used */
  70859. +extern const reiser4_block_nr UBER_TREE_ADDR;
  70860. +
  70861. +/* &cbk_cache_slot - entry in a coord cache.
  70862. +
  70863. + This is entry in a coord_by_key (cbk) cache, represented by
  70864. + &cbk_cache.
  70865. +
  70866. +*/
  70867. +typedef struct cbk_cache_slot {
  70868. + /* cached node */
  70869. + znode *node;
  70870. + /* linkage to the next cbk cache slot in a LRU order */
  70871. + struct list_head lru;
  70872. +} cbk_cache_slot;
  70873. +
  70874. +/* &cbk_cache - coord cache. This is part of reiser4_tree.
  70875. +
  70876. + cbk_cache is supposed to speed up tree lookups by caching results of recent
  70877. + successful lookups (we don't cache negative results as dentry cache
  70878. + does). Cache consists of relatively small number of entries kept in a LRU
  70879. + order. Each entry (&cbk_cache_slot) contains a pointer to znode, from
  70880. + which we can obtain a range of keys that covered by this znode. Before
  70881. + embarking into real tree traversal we scan cbk_cache slot by slot and for
  70882. + each slot check whether key we are looking for is between minimal and
  70883. + maximal keys for node pointed to by this slot. If no match is found, real
  70884. + tree traversal is performed and if result is successful, appropriate entry
  70885. + is inserted into cache, possibly pulling least recently used entry out of
  70886. + it.
  70887. +
  70888. + Tree spin lock is used to protect coord cache. If contention for this
  70889. + lock proves to be too high, more finer grained locking can be added.
  70890. +
  70891. + Invariants involving parts of this data-type:
  70892. +
  70893. + [cbk-cache-invariant]
  70894. +*/
  70895. +typedef struct cbk_cache {
  70896. + /* serializator */
  70897. + rwlock_t guard;
  70898. + int nr_slots;
  70899. + /* head of LRU list of cache slots */
  70900. + struct list_head lru;
  70901. + /* actual array of slots */
  70902. + cbk_cache_slot *slot;
  70903. +} cbk_cache;
  70904. +
  70905. +/* level_lookup_result - possible outcome of looking up key at some level.
  70906. + This is used by coord_by_key when traversing tree downward. */
  70907. +typedef enum {
  70908. + /* continue to the next level */
  70909. + LOOKUP_CONT,
  70910. + /* done. Either required item was found, or we can prove it
  70911. + doesn't exist, or some error occurred. */
  70912. + LOOKUP_DONE,
  70913. + /* restart traversal from the root. Infamous "repetition". */
  70914. + LOOKUP_REST,
  70915. + /* this should not appear as an outcome */
  70916. + LOOKUP_INVAL
  70917. +} level_lookup_result;
  70918. +
  70919. +/* This is representation of internal reiser4 tree where all file-system
  70920. + data and meta-data are stored. This structure is passed to all tree
  70921. + manipulation functions. It's different from the super block because:
  70922. + we don't want to limit ourselves to strictly one to one mapping
  70923. + between super blocks and trees, and, because they are logically
  70924. + different: there are things in a super block that have no relation to
  70925. + the tree (bitmaps, journalling area, mount options, etc.) and there
  70926. + are things in a tree that bear no relation to the super block, like
  70927. + tree of znodes.
  70928. +
  70929. + At this time, there is only one tree
  70930. + per filesystem, and this struct is part of the super block. We only
  70931. + call the super block the super block for historical reasons (most
  70932. + other filesystems call the per filesystem metadata the super block).
  70933. +*/
  70934. +
  70935. +struct reiser4_tree {
  70936. + /* block_nr == 0 is fake znode. Write lock it, while changing
  70937. + tree height. */
  70938. + /* disk address of root node of a tree */
  70939. + reiser4_block_nr root_block;
  70940. +
  70941. + /* level of the root node. If this is 1, tree consists of root
  70942. + node only */
  70943. + tree_level height;
  70944. +
  70945. + /*
  70946. + * this is cached here avoid calling plugins through function
  70947. + * dereference all the time.
  70948. + */
  70949. + __u64 estimate_one_insert;
  70950. +
  70951. + /* cache of recent tree lookup results */
  70952. + cbk_cache cbk_cache;
  70953. +
  70954. + /* hash table to look up znodes by block number. */
  70955. + z_hash_table zhash_table;
  70956. + z_hash_table zfake_table;
  70957. + /* hash table to look up jnodes by inode and offset. */
  70958. + j_hash_table jhash_table;
  70959. +
  70960. + /* lock protecting:
  70961. + - parent pointers,
  70962. + - sibling pointers,
  70963. + - znode hash table
  70964. + - coord cache
  70965. + */
  70966. + /* NOTE: The "giant" tree lock can be replaced by more spin locks,
  70967. + hoping they will be less contented. We can use one spin lock per one
  70968. + znode hash bucket. With adding of some code complexity, sibling
  70969. + pointers can be protected by both znode spin locks. However it looks
  70970. + more SMP scalable we should test this locking change on n-ways (n >
  70971. + 4) SMP machines. Current 4-ways machine test does not show that tree
  70972. + lock is contented and it is a bottleneck (2003.07.25). */
  70973. +
  70974. + rwlock_t tree_lock;
  70975. +
  70976. + /* lock protecting delimiting keys */
  70977. + rwlock_t dk_lock;
  70978. +
  70979. + /* spin lock protecting znode_epoch */
  70980. + spinlock_t epoch_lock;
  70981. + /* version stamp used to mark znode updates. See seal.[ch] for more
  70982. + * information. */
  70983. + __u64 znode_epoch;
  70984. +
  70985. + znode *uber;
  70986. + node_plugin *nplug;
  70987. + struct super_block *super;
  70988. + struct {
  70989. + /* carry flags used for insertion of new nodes */
  70990. + __u32 new_node_flags;
  70991. + /* carry flags used for insertion of new extents */
  70992. + __u32 new_extent_flags;
  70993. + /* carry flags used for paste operations */
  70994. + __u32 paste_flags;
  70995. + /* carry flags used for insert operations */
  70996. + __u32 insert_flags;
  70997. + } carry;
  70998. +};
  70999. +
  71000. +extern int reiser4_init_tree(reiser4_tree * tree,
  71001. + const reiser4_block_nr * root_block,
  71002. + tree_level height, node_plugin * default_plugin);
  71003. +extern void reiser4_done_tree(reiser4_tree * tree);
  71004. +
  71005. +/* cbk flags: options for coord_by_key() */
  71006. +typedef enum {
  71007. + /* coord_by_key() is called for insertion. This is necessary because
  71008. + of extents being located at the twig level. For explanation, see
  71009. + comment just above is_next_item_internal().
  71010. + */
  71011. + CBK_FOR_INSERT = (1 << 0),
  71012. + /* coord_by_key() is called with key that is known to be unique */
  71013. + CBK_UNIQUE = (1 << 1),
  71014. + /* coord_by_key() can trust delimiting keys. This options is not user
  71015. + accessible. coord_by_key() will set it automatically. It will be
  71016. + only cleared by special-case in extents-on-the-twig-level handling
  71017. + where it is necessary to insert item with a key smaller than
  71018. + leftmost key in a node. This is necessary because of extents being
  71019. + located at the twig level. For explanation, see comment just above
  71020. + is_next_item_internal().
  71021. + */
  71022. + CBK_TRUST_DK = (1 << 2),
  71023. + CBK_READA = (1 << 3), /* original: readahead leaves which contain items of certain file */
  71024. + CBK_READDIR_RA = (1 << 4), /* readdir: readahead whole directory and all its stat datas */
  71025. + CBK_DKSET = (1 << 5),
  71026. + CBK_EXTENDED_COORD = (1 << 6), /* coord_t is actually */
  71027. + CBK_IN_CACHE = (1 << 7), /* node is already in cache */
  71028. + CBK_USE_CRABLOCK = (1 << 8) /* use crab_lock in stead of long term
  71029. + * lock */
  71030. +} cbk_flags;
  71031. +
  71032. +/* insertion outcome. IBK = insert by key */
  71033. +typedef enum {
  71034. + IBK_INSERT_OK = 0,
  71035. + IBK_ALREADY_EXISTS = -EEXIST,
  71036. + IBK_IO_ERROR = -EIO,
  71037. + IBK_NO_SPACE = -E_NODE_FULL,
  71038. + IBK_OOM = -ENOMEM
  71039. +} insert_result;
  71040. +
  71041. +#define IS_CBKERR(err) ((err) != CBK_COORD_FOUND && (err) != CBK_COORD_NOTFOUND)
  71042. +
  71043. +typedef int (*tree_iterate_actor_t) (reiser4_tree * tree, coord_t * coord,
  71044. + lock_handle * lh, void *arg);
  71045. +extern int reiser4_iterate_tree(reiser4_tree * tree, coord_t * coord,
  71046. + lock_handle * lh,
  71047. + tree_iterate_actor_t actor, void *arg,
  71048. + znode_lock_mode mode, int through_units_p);
  71049. +extern int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
  71050. + znode_lock_request pri, lock_handle * lh);
  71051. +
  71052. +/* return node plugin of @node */
  71053. +static inline node_plugin *node_plugin_by_node(const znode *
  71054. + node /* node to query */ )
  71055. +{
  71056. + assert("vs-213", node != NULL);
  71057. + assert("vs-214", znode_is_loaded(node));
  71058. +
  71059. + return node->nplug;
  71060. +}
  71061. +
  71062. +/* number of items in @node */
  71063. +static inline pos_in_node_t node_num_items(const znode * node)
  71064. +{
  71065. + assert("nikita-2754", znode_is_loaded(node));
  71066. + assert("nikita-2468",
  71067. + node_plugin_by_node(node)->num_of_items(node) == node->nr_items);
  71068. +
  71069. + return node->nr_items;
  71070. +}
  71071. +
  71072. +/* Return the number of items at the present node. Asserts coord->node !=
  71073. + NULL. */
  71074. +static inline unsigned coord_num_items(const coord_t * coord)
  71075. +{
  71076. + assert("jmacd-9805", coord->node != NULL);
  71077. +
  71078. + return node_num_items(coord->node);
  71079. +}
  71080. +
  71081. +/* true if @node is empty */
  71082. +static inline int node_is_empty(const znode * node)
  71083. +{
  71084. + return node_num_items(node) == 0;
  71085. +}
  71086. +
  71087. +typedef enum {
  71088. + SHIFTED_SOMETHING = 0,
  71089. + SHIFT_NO_SPACE = -E_NODE_FULL,
  71090. + SHIFT_IO_ERROR = -EIO,
  71091. + SHIFT_OOM = -ENOMEM,
  71092. +} shift_result;
  71093. +
  71094. +extern node_plugin *node_plugin_by_coord(const coord_t * coord);
  71095. +extern int is_coord_in_node(const coord_t * coord);
  71096. +extern int key_in_node(const reiser4_key *, const coord_t *);
  71097. +extern void coord_item_move_to(coord_t * coord, int items);
  71098. +extern void coord_unit_move_to(coord_t * coord, int units);
  71099. +
  71100. +/* there are two types of repetitive accesses (ra): intra-syscall
  71101. + (local) and inter-syscall (global). Local ra is used when
  71102. + during single syscall we add/delete several items and units in the
  71103. + same place in a tree. Note that plan-A fragments local ra by
  71104. + separating stat-data and file body in key-space. Global ra is
  71105. + used when user does repetitive modifications in the same place in a
  71106. + tree.
  71107. +
  71108. + Our ra implementation serves following purposes:
  71109. + 1 it affects balancing decisions so that next operation in a row
  71110. + can be performed faster;
  71111. + 2 it affects lower-level read-ahead in page-cache;
  71112. + 3 it allows to avoid unnecessary lookups by maintaining some state
  71113. + across several operations (this is only for local ra);
  71114. + 4 it leaves room for lazy-micro-balancing: when we start a sequence of
  71115. + operations they are performed without actually doing any intra-node
  71116. + shifts, until we finish sequence or scope of sequence leaves
  71117. + current node, only then we really pack node (local ra only).
  71118. +*/
  71119. +
  71120. +/* another thing that can be useful is to keep per-tree and/or
  71121. + per-process cache of recent lookups. This cache can be organised as a
  71122. + list of block numbers of formatted nodes sorted by starting key in
  71123. + this node. Balancings should invalidate appropriate parts of this
  71124. + cache.
  71125. +*/
  71126. +
  71127. +lookup_result coord_by_key(reiser4_tree * tree, const reiser4_key * key,
  71128. + coord_t * coord, lock_handle * handle,
  71129. + znode_lock_mode lock, lookup_bias bias,
  71130. + tree_level lock_level, tree_level stop_level,
  71131. + __u32 flags, ra_info_t *);
  71132. +
  71133. +lookup_result reiser4_object_lookup(struct inode *object,
  71134. + const reiser4_key * key,
  71135. + coord_t * coord,
  71136. + lock_handle * lh,
  71137. + znode_lock_mode lock_mode,
  71138. + lookup_bias bias,
  71139. + tree_level lock_level,
  71140. + tree_level stop_level,
  71141. + __u32 flags, ra_info_t * info);
  71142. +
  71143. +insert_result insert_by_key(reiser4_tree * tree, const reiser4_key * key,
  71144. + reiser4_item_data * data, coord_t * coord,
  71145. + lock_handle * lh,
  71146. + tree_level stop_level, __u32 flags);
  71147. +insert_result insert_by_coord(coord_t * coord,
  71148. + reiser4_item_data * data, const reiser4_key * key,
  71149. + lock_handle * lh, __u32);
  71150. +insert_result insert_extent_by_coord(coord_t * coord,
  71151. + reiser4_item_data * data,
  71152. + const reiser4_key * key, lock_handle * lh);
  71153. +int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key,
  71154. + const reiser4_key * to_key,
  71155. + reiser4_key * smallest_removed);
  71156. +int kill_node_content(coord_t * from, coord_t * to,
  71157. + const reiser4_key * from_key, const reiser4_key * to_key,
  71158. + reiser4_key * smallest_removed,
  71159. + znode * locked_left_neighbor, struct inode *inode,
  71160. + int truncate);
  71161. +
  71162. +int reiser4_resize_item(coord_t * coord, reiser4_item_data * data,
  71163. + reiser4_key * key, lock_handle * lh, cop_insert_flag);
  71164. +int insert_into_item(coord_t * coord, lock_handle * lh, const reiser4_key * key,
  71165. + reiser4_item_data * data, unsigned);
  71166. +int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f);
  71167. +int find_new_child_ptr(znode * parent, znode * child, znode * left,
  71168. + coord_t * result);
  71169. +
  71170. +int shift_right_of_but_excluding_insert_coord(coord_t * insert_coord);
  71171. +int shift_left_of_and_including_insert_coord(coord_t * insert_coord);
  71172. +
  71173. +void fake_kill_hook_tail(struct inode *, loff_t start, loff_t end, int);
  71174. +
  71175. +extern int cut_tree_worker_common(tap_t *, const reiser4_key *,
  71176. + const reiser4_key *, reiser4_key *,
  71177. + struct inode *, int, int *);
  71178. +extern int reiser4_cut_tree_object(reiser4_tree *, const reiser4_key *,
  71179. + const reiser4_key *, reiser4_key *,
  71180. + struct inode *, int, int *);
  71181. +extern int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from,
  71182. + const reiser4_key * to, struct inode *, int);
  71183. +
  71184. +extern int reiser4_delete_node(znode *, reiser4_key *, struct inode *, int);
  71185. +extern int check_tree_pointer(const coord_t * pointer, const znode * child);
  71186. +extern int find_new_child_ptr(znode * parent, znode * child UNUSED_ARG,
  71187. + znode * left, coord_t * result);
  71188. +extern int find_child_ptr(znode * parent, znode * child, coord_t * result);
  71189. +extern int set_child_delimiting_keys(znode * parent, const coord_t * in_parent,
  71190. + znode * child);
  71191. +extern znode *child_znode(const coord_t * in_parent, znode * parent,
  71192. + int incore_p, int setup_dkeys_p);
  71193. +
  71194. +extern int cbk_cache_init(cbk_cache * cache);
  71195. +extern void cbk_cache_done(cbk_cache * cache);
  71196. +extern void cbk_cache_invalidate(const znode * node, reiser4_tree * tree);
  71197. +
  71198. +extern char *sprint_address(const reiser4_block_nr * block);
  71199. +
  71200. +#if REISER4_DEBUG
  71201. +extern void print_coord_content(const char *prefix, coord_t * p);
  71202. +extern void reiser4_print_address(const char *prefix,
  71203. + const reiser4_block_nr * block);
  71204. +extern void print_tree_rec(const char *prefix, reiser4_tree * tree,
  71205. + __u32 flags);
  71206. +extern void check_dkeys(znode *node);
  71207. +#else
  71208. +#define print_coord_content(p, c) noop
  71209. +#define reiser4_print_address(p, b) noop
  71210. +#endif
  71211. +
  71212. +extern void forget_znode(lock_handle * handle);
  71213. +extern int deallocate_znode(znode * node);
  71214. +
  71215. +extern int is_disk_addr_unallocated(const reiser4_block_nr * addr);
  71216. +
  71217. +/* struct used internally to pack all numerous arguments of tree lookup.
  71218. + Used to avoid passing a lot of arguments to helper functions. */
  71219. +typedef struct cbk_handle {
  71220. + /* tree we are in */
  71221. + reiser4_tree *tree;
  71222. + /* key we are going after */
  71223. + const reiser4_key *key;
  71224. + /* coord we will store result in */
  71225. + coord_t *coord;
  71226. + /* type of lock to take on target node */
  71227. + znode_lock_mode lock_mode;
  71228. + /* lookup bias. See comments at the declaration of lookup_bias */
  71229. + lookup_bias bias;
  71230. + /* lock level: level starting from which tree traversal starts taking
  71231. + * write locks. */
  71232. + tree_level lock_level;
  71233. + /* level where search will stop. Either item will be found between
  71234. + lock_level and stop_level, or CBK_COORD_NOTFOUND will be
  71235. + returned.
  71236. + */
  71237. + tree_level stop_level;
  71238. + /* level we are currently at */
  71239. + tree_level level;
  71240. + /* block number of @active node. Tree traversal operates on two
  71241. + nodes: active and parent. */
  71242. + reiser4_block_nr block;
  71243. + /* put here error message to be printed by caller */
  71244. + const char *error;
  71245. + /* result passed back to caller */
  71246. + int result;
  71247. + /* lock handles for active and parent */
  71248. + lock_handle *parent_lh;
  71249. + lock_handle *active_lh;
  71250. + reiser4_key ld_key;
  71251. + reiser4_key rd_key;
  71252. + /* flags, passed to the cbk routine. Bits of this bitmask are defined
  71253. + in tree.h:cbk_flags enum. */
  71254. + __u32 flags;
  71255. + ra_info_t *ra_info;
  71256. + struct inode *object;
  71257. +} cbk_handle;
  71258. +
  71259. +extern znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h);
  71260. +
  71261. +/* eottl.c */
  71262. +extern int handle_eottl(cbk_handle *h, int *outcome);
  71263. +
  71264. +int lookup_multikey(cbk_handle * handle, int nr_keys);
  71265. +int lookup_couple(reiser4_tree * tree,
  71266. + const reiser4_key * key1, const reiser4_key * key2,
  71267. + coord_t * coord1, coord_t * coord2,
  71268. + lock_handle * lh1, lock_handle * lh2,
  71269. + znode_lock_mode lock_mode, lookup_bias bias,
  71270. + tree_level lock_level, tree_level stop_level, __u32 flags,
  71271. + int *result1, int *result2);
  71272. +
  71273. +static inline void read_lock_tree(reiser4_tree *tree)
  71274. +{
  71275. + /* check that tree is not locked */
  71276. + assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
  71277. + LOCK_CNT_NIL(read_locked_tree) &&
  71278. + LOCK_CNT_NIL(write_locked_tree)));
  71279. + /* check that spinlocks of lower priorities are not held */
  71280. + assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
  71281. + LOCK_CNT_NIL(rw_locked_dk) &&
  71282. + LOCK_CNT_NIL(spin_locked_stack)));
  71283. +
  71284. + read_lock(&(tree->tree_lock));
  71285. +
  71286. + LOCK_CNT_INC(read_locked_tree);
  71287. + LOCK_CNT_INC(rw_locked_tree);
  71288. + LOCK_CNT_INC(spin_locked);
  71289. +}
  71290. +
  71291. +static inline void read_unlock_tree(reiser4_tree *tree)
  71292. +{
  71293. + assert("nikita-1375", LOCK_CNT_GTZ(read_locked_tree));
  71294. + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree));
  71295. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  71296. +
  71297. + LOCK_CNT_DEC(read_locked_tree);
  71298. + LOCK_CNT_DEC(rw_locked_tree);
  71299. + LOCK_CNT_DEC(spin_locked);
  71300. +
  71301. + read_unlock(&(tree->tree_lock));
  71302. +}
  71303. +
  71304. +static inline void write_lock_tree(reiser4_tree *tree)
  71305. +{
  71306. + /* check that tree is not locked */
  71307. + assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
  71308. + LOCK_CNT_NIL(read_locked_tree) &&
  71309. + LOCK_CNT_NIL(write_locked_tree)));
  71310. + /* check that spinlocks of lower priorities are not held */
  71311. + assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
  71312. + LOCK_CNT_NIL(rw_locked_dk) &&
  71313. + LOCK_CNT_NIL(spin_locked_stack)));
  71314. +
  71315. + write_lock(&(tree->tree_lock));
  71316. +
  71317. + LOCK_CNT_INC(write_locked_tree);
  71318. + LOCK_CNT_INC(rw_locked_tree);
  71319. + LOCK_CNT_INC(spin_locked);
  71320. +}
  71321. +
  71322. +static inline void write_unlock_tree(reiser4_tree *tree)
  71323. +{
  71324. + assert("nikita-1375", LOCK_CNT_GTZ(write_locked_tree));
  71325. + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree));
  71326. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  71327. +
  71328. + LOCK_CNT_DEC(write_locked_tree);
  71329. + LOCK_CNT_DEC(rw_locked_tree);
  71330. + LOCK_CNT_DEC(spin_locked);
  71331. +
  71332. + write_unlock(&(tree->tree_lock));
  71333. +}
  71334. +
  71335. +static inline void read_lock_dk(reiser4_tree *tree)
  71336. +{
  71337. + /* check that dk is not locked */
  71338. + assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
  71339. + LOCK_CNT_NIL(read_locked_dk) &&
  71340. + LOCK_CNT_NIL(write_locked_dk)));
  71341. + /* check that spinlocks of lower priorities are not held */
  71342. + assert("", LOCK_CNT_NIL(spin_locked_stack));
  71343. +
  71344. + read_lock(&((tree)->dk_lock));
  71345. +
  71346. + LOCK_CNT_INC(read_locked_dk);
  71347. + LOCK_CNT_INC(rw_locked_dk);
  71348. + LOCK_CNT_INC(spin_locked);
  71349. +}
  71350. +
  71351. +static inline void read_unlock_dk(reiser4_tree *tree)
  71352. +{
  71353. + assert("nikita-1375", LOCK_CNT_GTZ(read_locked_dk));
  71354. + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk));
  71355. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  71356. +
  71357. + LOCK_CNT_DEC(read_locked_dk);
  71358. + LOCK_CNT_DEC(rw_locked_dk);
  71359. + LOCK_CNT_DEC(spin_locked);
  71360. +
  71361. + read_unlock(&(tree->dk_lock));
  71362. +}
  71363. +
  71364. +static inline void write_lock_dk(reiser4_tree *tree)
  71365. +{
  71366. + /* check that dk is not locked */
  71367. + assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
  71368. + LOCK_CNT_NIL(read_locked_dk) &&
  71369. + LOCK_CNT_NIL(write_locked_dk)));
  71370. + /* check that spinlocks of lower priorities are not held */
  71371. + assert("", LOCK_CNT_NIL(spin_locked_stack));
  71372. +
  71373. + write_lock(&((tree)->dk_lock));
  71374. +
  71375. + LOCK_CNT_INC(write_locked_dk);
  71376. + LOCK_CNT_INC(rw_locked_dk);
  71377. + LOCK_CNT_INC(spin_locked);
  71378. +}
  71379. +
  71380. +static inline void write_unlock_dk(reiser4_tree *tree)
  71381. +{
  71382. + assert("nikita-1375", LOCK_CNT_GTZ(write_locked_dk));
  71383. + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk));
  71384. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  71385. +
  71386. + LOCK_CNT_DEC(write_locked_dk);
  71387. + LOCK_CNT_DEC(rw_locked_dk);
  71388. + LOCK_CNT_DEC(spin_locked);
  71389. +
  71390. + write_unlock(&(tree->dk_lock));
  71391. +}
  71392. +
  71393. +/* estimate api. Implementation is in estimate.c */
  71394. +reiser4_block_nr estimate_one_insert_item(reiser4_tree *);
  71395. +reiser4_block_nr estimate_one_insert_into_item(reiser4_tree *);
  71396. +reiser4_block_nr estimate_insert_flow(tree_level);
  71397. +reiser4_block_nr estimate_one_item_removal(reiser4_tree *);
  71398. +reiser4_block_nr calc_estimate_one_insert(tree_level);
  71399. +reiser4_block_nr estimate_dirty_cluster(struct inode *);
  71400. +reiser4_block_nr estimate_insert_cluster(struct inode *);
  71401. +reiser4_block_nr estimate_update_cluster(struct inode *);
  71402. +
  71403. +/* __REISER4_TREE_H__ */
  71404. +#endif
  71405. +
  71406. +/* Make Linus happy.
  71407. + Local variables:
  71408. + c-indentation-style: "K&R"
  71409. + mode-name: "LC"
  71410. + c-basic-offset: 8
  71411. + tab-width: 8
  71412. + fill-column: 120
  71413. + scroll-step: 1
  71414. + End:
  71415. +*/
  71416. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/tree_mod.c linux-5.16.14/fs/reiser4/tree_mod.c
  71417. --- linux-5.16.14.orig/fs/reiser4/tree_mod.c 1970-01-01 01:00:00.000000000 +0100
  71418. +++ linux-5.16.14/fs/reiser4/tree_mod.c 2022-03-12 13:26:19.689892818 +0100
  71419. @@ -0,0 +1,387 @@
  71420. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  71421. + * reiser4/README */
  71422. +
  71423. +/*
  71424. + * Functions to add/delete new nodes to/from the tree.
  71425. + *
  71426. + * Functions from this file are used by carry (see carry*) to handle:
  71427. + *
  71428. + * . insertion of new formatted node into tree
  71429. + *
  71430. + * . addition of new tree root, increasing tree height
  71431. + *
  71432. + * . removing tree root, decreasing tree height
  71433. + *
  71434. + */
  71435. +
  71436. +#include "forward.h"
  71437. +#include "debug.h"
  71438. +#include "dformat.h"
  71439. +#include "key.h"
  71440. +#include "coord.h"
  71441. +#include "plugin/plugin.h"
  71442. +#include "jnode.h"
  71443. +#include "znode.h"
  71444. +#include "tree_mod.h"
  71445. +#include "block_alloc.h"
  71446. +#include "tree_walk.h"
  71447. +#include "tree.h"
  71448. +#include "super.h"
  71449. +
  71450. +#include <linux/err.h>
  71451. +
  71452. +static int add_child_ptr(znode * parent, znode * child);
  71453. +/* warning only issued if error is not -E_REPEAT */
  71454. +#define ewarning( error, ... ) \
  71455. + if( ( error ) != -E_REPEAT ) \
  71456. + warning( __VA_ARGS__ )
  71457. +
  71458. +/* allocate new node on the @level and immediately on the right of @brother. */
  71459. +znode * reiser4_new_node(znode * brother /* existing left neighbor
  71460. + * of new node */,
  71461. + tree_level level /* tree level at which new node is to
  71462. + * be allocated */)
  71463. +{
  71464. + znode *result;
  71465. + int retcode;
  71466. + reiser4_block_nr blocknr;
  71467. +
  71468. + assert("nikita-930", brother != NULL);
  71469. + assert("umka-264", level < REAL_MAX_ZTREE_HEIGHT);
  71470. +
  71471. + retcode = assign_fake_blocknr_formatted(&blocknr);
  71472. + if (retcode == 0) {
  71473. + result =
  71474. + zget(znode_get_tree(brother), &blocknr, NULL, level,
  71475. + reiser4_ctx_gfp_mask_get());
  71476. + if (IS_ERR(result)) {
  71477. + ewarning(PTR_ERR(result), "nikita-929",
  71478. + "Cannot allocate znode for carry: %li",
  71479. + PTR_ERR(result));
  71480. + return result;
  71481. + }
  71482. + /* cheap test, can be executed even when debugging is off */
  71483. + if (!znode_just_created(result)) {
  71484. + warning("nikita-2213",
  71485. + "Allocated already existing block: %llu",
  71486. + (unsigned long long)blocknr);
  71487. + zput(result);
  71488. + return ERR_PTR(RETERR(-EIO));
  71489. + }
  71490. +
  71491. + assert("nikita-931", result != NULL);
  71492. + result->nplug = znode_get_tree(brother)->nplug;
  71493. + assert("nikita-933", result->nplug != NULL);
  71494. +
  71495. + retcode = zinit_new(result, reiser4_ctx_gfp_mask_get());
  71496. + if (retcode == 0) {
  71497. + ZF_SET(result, JNODE_CREATED);
  71498. + zrelse(result);
  71499. + } else {
  71500. + zput(result);
  71501. + result = ERR_PTR(retcode);
  71502. + }
  71503. + } else {
  71504. + /* failure to allocate new node during balancing.
  71505. + This should never happen. Ever. Returning -E_REPEAT
  71506. + is not viable solution, because "out of disk space"
  71507. + is not transient error that will go away by itself.
  71508. + */
  71509. + ewarning(retcode, "nikita-928",
  71510. + "Cannot allocate block for carry: %i", retcode);
  71511. + result = ERR_PTR(retcode);
  71512. + }
  71513. + assert("nikita-1071", result != NULL);
  71514. + return result;
  71515. +}
  71516. +
  71517. +/* allocate new root and add it to the tree
  71518. +
  71519. + This helper function is called by add_new_root().
  71520. +
  71521. +*/
  71522. +znode *reiser4_add_tree_root(znode * old_root /* existing tree root */ ,
  71523. + znode * fake /* "fake" znode */ )
  71524. +{
  71525. + reiser4_tree *tree = znode_get_tree(old_root);
  71526. + znode *new_root = NULL; /* to shut gcc up */
  71527. + int result;
  71528. +
  71529. + assert("nikita-1069", old_root != NULL);
  71530. + assert("umka-262", fake != NULL);
  71531. + assert("umka-263", tree != NULL);
  71532. +
  71533. + /* "fake" znode---one always hanging just above current root. This
  71534. + node is locked when new root is created or existing root is
  71535. + deleted. Downward tree traversal takes lock on it before taking
  71536. + lock on a root node. This avoids race conditions with root
  71537. + manipulations.
  71538. +
  71539. + */
  71540. + assert("nikita-1348", znode_above_root(fake));
  71541. + assert("nikita-1211", znode_is_root(old_root));
  71542. +
  71543. + result = 0;
  71544. + if (tree->height >= REAL_MAX_ZTREE_HEIGHT) {
  71545. + warning("nikita-1344", "Tree is too tall: %i", tree->height);
  71546. + /* ext2 returns -ENOSPC when it runs out of free inodes with a
  71547. + following comment (fs/ext2/ialloc.c:441): Is it really
  71548. + ENOSPC?
  71549. +
  71550. + -EXFULL? -EINVAL?
  71551. + */
  71552. + result = RETERR(-ENOSPC);
  71553. + } else {
  71554. + /* Allocate block for new root. It's not that
  71555. + important where it will be allocated, as root is
  71556. + almost always in memory. Moreover, allocate on
  71557. + flush can be going here.
  71558. + */
  71559. + assert("nikita-1448", znode_is_root(old_root));
  71560. + new_root = reiser4_new_node(fake, tree->height + 1);
  71561. + if (!IS_ERR(new_root) && (result = zload(new_root)) == 0) {
  71562. + lock_handle rlh;
  71563. +
  71564. + init_lh(&rlh);
  71565. + result =
  71566. + longterm_lock_znode(&rlh, new_root,
  71567. + ZNODE_WRITE_LOCK,
  71568. + ZNODE_LOCK_LOPRI);
  71569. + if (result == 0) {
  71570. + parent_coord_t *in_parent;
  71571. +
  71572. + znode_make_dirty(fake);
  71573. +
  71574. + /* new root is a child of "fake" node */
  71575. + write_lock_tree(tree);
  71576. +
  71577. + ++tree->height;
  71578. +
  71579. + /* recalculate max balance overhead */
  71580. + tree->estimate_one_insert =
  71581. + calc_estimate_one_insert(tree->height);
  71582. +
  71583. + tree->root_block = *znode_get_block(new_root);
  71584. + in_parent = &new_root->in_parent;
  71585. + init_parent_coord(in_parent, fake);
  71586. + /* manually insert new root into sibling
  71587. + * list. With this all nodes involved into
  71588. + * balancing are connected after balancing is
  71589. + * done---useful invariant to check. */
  71590. + sibling_list_insert_nolock(new_root, NULL);
  71591. + write_unlock_tree(tree);
  71592. +
  71593. + /* insert into new root pointer to the
  71594. + @old_root. */
  71595. + assert("nikita-1110",
  71596. + WITH_DATA(new_root,
  71597. + node_is_empty(new_root)));
  71598. + write_lock_dk(tree);
  71599. + znode_set_ld_key(new_root, reiser4_min_key());
  71600. + znode_set_rd_key(new_root, reiser4_max_key());
  71601. + write_unlock_dk(tree);
  71602. + if (REISER4_DEBUG) {
  71603. + ZF_CLR(old_root, JNODE_LEFT_CONNECTED);
  71604. + ZF_CLR(old_root, JNODE_RIGHT_CONNECTED);
  71605. + ZF_SET(old_root, JNODE_ORPHAN);
  71606. + }
  71607. + result = add_child_ptr(new_root, old_root);
  71608. + done_lh(&rlh);
  71609. + }
  71610. + zrelse(new_root);
  71611. + }
  71612. + }
  71613. + if (result != 0)
  71614. + new_root = ERR_PTR(result);
  71615. + return new_root;
  71616. +}
  71617. +
  71618. +/* build &reiser4_item_data for inserting child pointer
  71619. +
  71620. + Build &reiser4_item_data that can be later used to insert pointer to @child
  71621. + in its parent.
  71622. +
  71623. +*/
  71624. +void build_child_ptr_data(znode * child /* node pointer to which will be
  71625. + * inserted */ ,
  71626. + reiser4_item_data * data /* where to store result */ )
  71627. +{
  71628. + assert("nikita-1116", child != NULL);
  71629. + assert("nikita-1117", data != NULL);
  71630. +
  71631. + /*
  71632. + * NOTE: use address of child's blocknr as address of data to be
  71633. + * inserted. As result of this data gets into on-disk structure in cpu
  71634. + * byte order. internal's create_hook converts it to little endian byte
  71635. + * order.
  71636. + */
  71637. + data->data = (char *)znode_get_block(child);
  71638. + /* data -> data is kernel space */
  71639. + data->user = 0;
  71640. + data->length = sizeof(reiser4_block_nr);
  71641. + /* FIXME-VS: hardcoded internal item? */
  71642. +
  71643. + /* AUDIT: Is it possible that "item_plugin_by_id" may find nothing? */
  71644. + data->iplug = item_plugin_by_id(NODE_POINTER_ID);
  71645. +}
  71646. +
  71647. +/* add pointer to @child into empty @parent.
  71648. +
  71649. + This is used when pointer to old root is inserted into new root which is
  71650. + empty.
  71651. +*/
  71652. +static int add_child_ptr(znode * parent, znode * child)
  71653. +{
  71654. + coord_t coord;
  71655. + reiser4_item_data data;
  71656. + int result;
  71657. + reiser4_key key;
  71658. +
  71659. + assert("nikita-1111", parent != NULL);
  71660. + assert("nikita-1112", child != NULL);
  71661. + assert("nikita-1115",
  71662. + znode_get_level(parent) == znode_get_level(child) + 1);
  71663. +
  71664. + result = zload(parent);
  71665. + if (result != 0)
  71666. + return result;
  71667. + assert("nikita-1113", node_is_empty(parent));
  71668. + coord_init_first_unit(&coord, parent);
  71669. +
  71670. + build_child_ptr_data(child, &data);
  71671. + data.arg = NULL;
  71672. +
  71673. + read_lock_dk(znode_get_tree(parent));
  71674. + key = *znode_get_ld_key(child);
  71675. + read_unlock_dk(znode_get_tree(parent));
  71676. +
  71677. + result = node_plugin_by_node(parent)->create_item(&coord, &key, &data,
  71678. + NULL);
  71679. + znode_make_dirty(parent);
  71680. + zrelse(parent);
  71681. + return result;
  71682. +}
  71683. +
  71684. +/* actually remove tree root */
  71685. +static int reiser4_kill_root(reiser4_tree * tree /* tree from which root is
  71686. + * being removed */,
  71687. + znode * old_root /* root node that is being
  71688. + * removed */ ,
  71689. + znode * new_root /* new root---sole child of
  71690. + * @old_root */,
  71691. + const reiser4_block_nr * new_root_blk /* disk address of
  71692. + * @new_root */)
  71693. +{
  71694. + znode *uber;
  71695. + int result;
  71696. + lock_handle handle_for_uber;
  71697. +
  71698. + assert("umka-265", tree != NULL);
  71699. + assert("nikita-1198", new_root != NULL);
  71700. + assert("nikita-1199",
  71701. + znode_get_level(new_root) + 1 == znode_get_level(old_root));
  71702. +
  71703. + assert("nikita-1201", znode_is_write_locked(old_root));
  71704. +
  71705. + assert("nikita-1203",
  71706. + disk_addr_eq(new_root_blk, znode_get_block(new_root)));
  71707. +
  71708. + init_lh(&handle_for_uber);
  71709. + /* obtain and lock "fake" znode protecting changes in tree height. */
  71710. + result = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
  71711. + &handle_for_uber);
  71712. + if (result == 0) {
  71713. + uber = handle_for_uber.node;
  71714. +
  71715. + znode_make_dirty(uber);
  71716. +
  71717. + /* don't take long term lock a @new_root. Take spinlock. */
  71718. +
  71719. + write_lock_tree(tree);
  71720. +
  71721. + tree->root_block = *new_root_blk;
  71722. + --tree->height;
  71723. +
  71724. + /* recalculate max balance overhead */
  71725. + tree->estimate_one_insert =
  71726. + calc_estimate_one_insert(tree->height);
  71727. +
  71728. + assert("nikita-1202",
  71729. + tree->height == znode_get_level(new_root));
  71730. +
  71731. + /* new root is child on "fake" node */
  71732. + init_parent_coord(&new_root->in_parent, uber);
  71733. + ++uber->c_count;
  71734. +
  71735. + /* sibling_list_insert_nolock(new_root, NULL); */
  71736. + write_unlock_tree(tree);
  71737. +
  71738. + /* reinitialise old root. */
  71739. + result = init_znode(ZJNODE(old_root));
  71740. + znode_make_dirty(old_root);
  71741. + if (result == 0) {
  71742. + assert("nikita-1279", node_is_empty(old_root));
  71743. + ZF_SET(old_root, JNODE_HEARD_BANSHEE);
  71744. + old_root->c_count = 0;
  71745. + }
  71746. + }
  71747. + done_lh(&handle_for_uber);
  71748. +
  71749. + return result;
  71750. +}
  71751. +
  71752. +/* remove tree root
  71753. +
  71754. + This function removes tree root, decreasing tree height by one. Tree root
  71755. + and its only child (that is going to become new tree root) are write locked
  71756. + at the entry.
  71757. +
  71758. + To remove tree root we need to take lock on special "fake" znode that
  71759. + protects changes of tree height. See comments in reiser4_add_tree_root() for
  71760. + more on this.
  71761. +
  71762. + Also parent pointers have to be updated in
  71763. + old and new root. To simplify code, function is split into two parts: outer
  71764. + reiser4_kill_tree_root() collects all necessary arguments and calls
  71765. + reiser4_kill_root() to do the actual job.
  71766. +
  71767. +*/
  71768. +int reiser4_kill_tree_root(znode * old_root /* tree root that we are
  71769. + removing*/)
  71770. +{
  71771. + int result;
  71772. + coord_t down_link;
  71773. + znode *new_root;
  71774. + reiser4_tree *tree;
  71775. +
  71776. + assert("umka-266", current_tree != NULL);
  71777. + assert("nikita-1194", old_root != NULL);
  71778. + assert("nikita-1196", znode_is_root(old_root));
  71779. + assert("nikita-1200", node_num_items(old_root) == 1);
  71780. + assert("nikita-1401", znode_is_write_locked(old_root));
  71781. +
  71782. + coord_init_first_unit(&down_link, old_root);
  71783. +
  71784. + tree = znode_get_tree(old_root);
  71785. + new_root = child_znode(&down_link, old_root, 0, 1);
  71786. + if (!IS_ERR(new_root)) {
  71787. + result =
  71788. + reiser4_kill_root(tree, old_root, new_root,
  71789. + znode_get_block(new_root));
  71790. + zput(new_root);
  71791. + } else
  71792. + result = PTR_ERR(new_root);
  71793. +
  71794. + return result;
  71795. +}
  71796. +
  71797. +/* Make Linus happy.
  71798. + Local variables:
  71799. + c-indentation-style: "K&R"
  71800. + mode-name: "LC"
  71801. + c-basic-offset: 8
  71802. + tab-width: 8
  71803. + fill-column: 120
  71804. + scroll-step: 1
  71805. + End:
  71806. +*/
  71807. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/tree_mod.h linux-5.16.14/fs/reiser4/tree_mod.h
  71808. --- linux-5.16.14.orig/fs/reiser4/tree_mod.h 1970-01-01 01:00:00.000000000 +0100
  71809. +++ linux-5.16.14/fs/reiser4/tree_mod.h 2022-03-12 13:26:19.689892818 +0100
  71810. @@ -0,0 +1,29 @@
  71811. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  71812. + * reiser4/README */
  71813. +
  71814. +/* Functions to add/delete new nodes to/from the tree. See tree_mod.c for
  71815. + * comments. */
  71816. +
  71817. +#if !defined( __REISER4_TREE_MOD_H__ )
  71818. +#define __REISER4_TREE_MOD_H__
  71819. +
  71820. +#include "forward.h"
  71821. +
  71822. +znode *reiser4_new_node(znode * brother, tree_level level);
  71823. +znode *reiser4_add_tree_root(znode * old_root, znode * fake);
  71824. +int reiser4_kill_tree_root(znode * old_root);
  71825. +void build_child_ptr_data(znode * child, reiser4_item_data * data);
  71826. +
  71827. +/* __REISER4_TREE_MOD_H__ */
  71828. +#endif
  71829. +
  71830. +/* Make Linus happy.
  71831. + Local variables:
  71832. + c-indentation-style: "K&R"
  71833. + mode-name: "LC"
  71834. + c-basic-offset: 8
  71835. + tab-width: 8
  71836. + fill-column: 120
  71837. + scroll-step: 1
  71838. + End:
  71839. +*/
  71840. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/tree_walk.c linux-5.16.14/fs/reiser4/tree_walk.c
  71841. --- linux-5.16.14.orig/fs/reiser4/tree_walk.c 1970-01-01 01:00:00.000000000 +0100
  71842. +++ linux-5.16.14/fs/reiser4/tree_walk.c 2022-03-12 13:26:19.690892821 +0100
  71843. @@ -0,0 +1,927 @@
  71844. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  71845. + * reiser4/README */
  71846. +
  71847. +/* Routines and macros to:
  71848. +
  71849. + get_left_neighbor()
  71850. +
  71851. + get_right_neighbor()
  71852. +
  71853. + get_parent()
  71854. +
  71855. + get_first_child()
  71856. +
  71857. + get_last_child()
  71858. +
  71859. + various routines to walk the whole tree and do things to it like
  71860. + repack it, or move it to tertiary storage. Please make them as
  71861. + generic as is reasonable.
  71862. +
  71863. +*/
  71864. +
  71865. +#include "forward.h"
  71866. +#include "debug.h"
  71867. +#include "dformat.h"
  71868. +#include "coord.h"
  71869. +#include "plugin/item/item.h"
  71870. +#include "jnode.h"
  71871. +#include "znode.h"
  71872. +#include "tree_walk.h"
  71873. +#include "tree.h"
  71874. +#include "super.h"
  71875. +
  71876. +/* These macros are used internally in tree_walk.c in attempt to make
  71877. + lock_neighbor() code usable to build lock_parent(), lock_right_neighbor,
  71878. + lock_left_neighbor */
  71879. +#define GET_NODE_BY_PTR_OFFSET(node, off) (*(znode**)(((unsigned long)(node)) + (off)))
  71880. +#define FIELD_OFFSET(name) offsetof(znode, name)
  71881. +#define PARENT_PTR_OFFSET FIELD_OFFSET(in_parent.node)
  71882. +#define LEFT_PTR_OFFSET FIELD_OFFSET(left)
  71883. +#define RIGHT_PTR_OFFSET FIELD_OFFSET(right)
  71884. +
  71885. +/* This is the generic procedure to get and lock `generic' neighbor (left or
  71886. + right neighbor or parent). It implements common algorithm for all cases of
  71887. + getting lock on neighbor node, only znode structure field is different in
  71888. + each case. This is parameterized by ptr_offset argument, which is byte
  71889. + offset for the pointer to the desired neighbor within the current node's
  71890. + znode structure. This function should be called with the tree lock held */
  71891. +static int lock_neighbor(
  71892. + /* resulting lock handle */
  71893. + lock_handle * result,
  71894. + /* znode to lock */
  71895. + znode * node,
  71896. + /* pointer to neighbor (or parent) znode field offset, in bytes from
  71897. + the base address of znode structure */
  71898. + int ptr_offset,
  71899. + /* lock mode for longterm_lock_znode call */
  71900. + znode_lock_mode mode,
  71901. + /* lock request for longterm_lock_znode call */
  71902. + znode_lock_request req,
  71903. + /* GN_* flags */
  71904. + int flags, int rlocked)
  71905. +{
  71906. + reiser4_tree *tree = znode_get_tree(node);
  71907. + znode *neighbor;
  71908. + int ret;
  71909. +
  71910. + assert("umka-236", node != NULL);
  71911. + assert("umka-237", tree != NULL);
  71912. + assert_rw_locked(&(tree->tree_lock));
  71913. +
  71914. + if (flags & GN_TRY_LOCK)
  71915. + req |= ZNODE_LOCK_NONBLOCK;
  71916. + if (flags & GN_SAME_ATOM)
  71917. + req |= ZNODE_LOCK_DONT_FUSE;
  71918. +
  71919. + /* get neighbor's address by using of sibling link, quit while loop
  71920. + (and return) if link is not available. */
  71921. + while (1) {
  71922. + neighbor = GET_NODE_BY_PTR_OFFSET(node, ptr_offset);
  71923. +
  71924. + /* return -E_NO_NEIGHBOR if parent or side pointer is NULL or if
  71925. + * node pointed by it is not connected.
  71926. + *
  71927. + * However, GN_ALLOW_NOT_CONNECTED option masks "connected"
  71928. + * check and allows passing reference to not connected znode to
  71929. + * subsequent longterm_lock_znode() call. This kills possible
  71930. + * busy loop if we are trying to get longterm lock on locked but
  71931. + * not yet connected parent node. */
  71932. + if (neighbor == NULL || !((flags & GN_ALLOW_NOT_CONNECTED)
  71933. + || znode_is_connected(neighbor))) {
  71934. + return RETERR(-E_NO_NEIGHBOR);
  71935. + }
  71936. +
  71937. + /* protect it from deletion. */
  71938. + zref(neighbor);
  71939. +
  71940. + rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree);
  71941. +
  71942. + ret = longterm_lock_znode(result, neighbor, mode, req);
  71943. +
  71944. + /* The lock handle obtains its own reference, release the one from above. */
  71945. + zput(neighbor);
  71946. +
  71947. + rlocked ? read_lock_tree(tree) : write_lock_tree(tree);
  71948. +
  71949. + /* restart if node we got reference to is being
  71950. + invalidated. we should not get reference to this node
  71951. + again. */
  71952. + if (ret == -EINVAL)
  71953. + continue;
  71954. + if (ret)
  71955. + return ret;
  71956. +
  71957. + /* check if neighbor link still points to just locked znode;
  71958. + the link could have been changed while the process slept. */
  71959. + if (neighbor == GET_NODE_BY_PTR_OFFSET(node, ptr_offset))
  71960. + return 0;
  71961. +
  71962. + /* znode was locked by mistake; unlock it and restart locking
  71963. + process from beginning. */
  71964. + rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree);
  71965. + longterm_unlock_znode(result);
  71966. + rlocked ? read_lock_tree(tree) : write_lock_tree(tree);
  71967. + }
  71968. +}
  71969. +
  71970. +/* get parent node with longterm lock, accepts GN* flags. */
  71971. +int reiser4_get_parent_flags(lock_handle * lh /* resulting lock handle */ ,
  71972. + znode * node /* child node */ ,
  71973. + znode_lock_mode mode
  71974. + /* type of lock: read or write */ ,
  71975. + int flags /* GN_* flags */ )
  71976. +{
  71977. + int result;
  71978. +
  71979. + read_lock_tree(znode_get_tree(node));
  71980. + result = lock_neighbor(lh, node, PARENT_PTR_OFFSET, mode,
  71981. + ZNODE_LOCK_HIPRI, flags, 1);
  71982. + read_unlock_tree(znode_get_tree(node));
  71983. + return result;
  71984. +}
  71985. +
  71986. +/* wrapper function to lock right or left neighbor depending on GN_GO_LEFT
  71987. + bit in @flags parameter */
  71988. +/* Audited by: umka (2002.06.14) */
  71989. +static inline int
  71990. +lock_side_neighbor(lock_handle * result,
  71991. + znode * node, znode_lock_mode mode, int flags, int rlocked)
  71992. +{
  71993. + int ret;
  71994. + int ptr_offset;
  71995. + znode_lock_request req;
  71996. +
  71997. + if (flags & GN_GO_LEFT) {
  71998. + ptr_offset = LEFT_PTR_OFFSET;
  71999. + req = ZNODE_LOCK_LOPRI;
  72000. + } else {
  72001. + ptr_offset = RIGHT_PTR_OFFSET;
  72002. + req = ZNODE_LOCK_HIPRI;
  72003. + }
  72004. +
  72005. + ret =
  72006. + lock_neighbor(result, node, ptr_offset, mode, req, flags, rlocked);
  72007. +
  72008. + if (ret == -E_NO_NEIGHBOR) /* if we walk left or right -E_NO_NEIGHBOR does not
  72009. + * guarantee that neighbor is absent in the
  72010. + * tree; in this case we return -ENOENT --
  72011. + * means neighbor at least not found in
  72012. + * cache */
  72013. + return RETERR(-ENOENT);
  72014. +
  72015. + return ret;
  72016. +}
  72017. +
  72018. +#if REISER4_DEBUG
  72019. +
  72020. +int check_sibling_list(znode * node)
  72021. +{
  72022. + znode *scan;
  72023. + znode *next;
  72024. +
  72025. + assert("nikita-3283", LOCK_CNT_GTZ(write_locked_tree));
  72026. +
  72027. + if (node == NULL)
  72028. + return 1;
  72029. +
  72030. + if (ZF_ISSET(node, JNODE_RIP))
  72031. + return 1;
  72032. +
  72033. + assert("nikita-3270", node != NULL);
  72034. + assert_rw_write_locked(&(znode_get_tree(node)->tree_lock));
  72035. +
  72036. + for (scan = node; znode_is_left_connected(scan); scan = next) {
  72037. + next = scan->left;
  72038. + if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) {
  72039. + assert("nikita-3271", znode_is_right_connected(next));
  72040. + assert("nikita-3272", next->right == scan);
  72041. + } else
  72042. + break;
  72043. + }
  72044. + for (scan = node; znode_is_right_connected(scan); scan = next) {
  72045. + next = scan->right;
  72046. + if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) {
  72047. + assert("nikita-3273", znode_is_left_connected(next));
  72048. + assert("nikita-3274", next->left == scan);
  72049. + } else
  72050. + break;
  72051. + }
  72052. + return 1;
  72053. +}
  72054. +
  72055. +#endif
  72056. +
  72057. +/* Znode sibling pointers maintenence. */
  72058. +
  72059. +/* Znode sibling pointers are established between any neighbored nodes which are
  72060. + in cache. There are two znode state bits (JNODE_LEFT_CONNECTED,
  72061. + JNODE_RIGHT_CONNECTED), if left or right sibling pointer contains actual
  72062. + value (even NULL), corresponded JNODE_*_CONNECTED bit is set.
  72063. +
  72064. + Reiser4 tree operations which may allocate new znodes (CBK, tree balancing)
  72065. + take care about searching (hash table lookup may be required) of znode
  72066. + neighbors, establishing sibling pointers between them and setting
  72067. + JNODE_*_CONNECTED state bits. */
  72068. +
  72069. +/* adjusting of sibling pointers and `connected' states for two
  72070. + neighbors; works if one neighbor is NULL (was not found). */
  72071. +
  72072. +/* FIXME-VS: this is unstatic-ed to use in tree.c in prepare_twig_cut */
  72073. +void link_left_and_right(znode * left, znode * right)
  72074. +{
  72075. + assert("nikita-3275", check_sibling_list(left));
  72076. + assert("nikita-3275", check_sibling_list(right));
  72077. +
  72078. + if (left != NULL) {
  72079. + if (left->right == NULL) {
  72080. + left->right = right;
  72081. + ZF_SET(left, JNODE_RIGHT_CONNECTED);
  72082. +
  72083. + ON_DEBUG(left->right_version =
  72084. + atomic_inc_return(&delim_key_version);
  72085. + );
  72086. +
  72087. + } else if (ZF_ISSET(left->right, JNODE_HEARD_BANSHEE)
  72088. + && left->right != right) {
  72089. +
  72090. + ON_DEBUG(left->right->left_version =
  72091. + atomic_inc_return(&delim_key_version);
  72092. + left->right_version =
  72093. + atomic_inc_return(&delim_key_version););
  72094. +
  72095. + left->right->left = NULL;
  72096. + left->right = right;
  72097. + ZF_SET(left, JNODE_RIGHT_CONNECTED);
  72098. + } else
  72099. + /*
  72100. + * there is a race condition in renew_sibling_link()
  72101. + * and assertions below check that it is only one
  72102. + * there. Thread T1 calls renew_sibling_link() without
  72103. + * GN_NO_ALLOC flag. zlook() doesn't find neighbor
  72104. + * node, but before T1 gets to the
  72105. + * link_left_and_right(), another thread T2 creates
  72106. + * neighbor node and connects it. check for
  72107. + * left->right == NULL above protects T1 from
  72108. + * overwriting correct left->right pointer installed
  72109. + * by T2.
  72110. + */
  72111. + assert("nikita-3302",
  72112. + right == NULL || left->right == right);
  72113. + }
  72114. + if (right != NULL) {
  72115. + if (right->left == NULL) {
  72116. + right->left = left;
  72117. + ZF_SET(right, JNODE_LEFT_CONNECTED);
  72118. +
  72119. + ON_DEBUG(right->left_version =
  72120. + atomic_inc_return(&delim_key_version);
  72121. + );
  72122. +
  72123. + } else if (ZF_ISSET(right->left, JNODE_HEARD_BANSHEE)
  72124. + && right->left != left) {
  72125. +
  72126. + ON_DEBUG(right->left->right_version =
  72127. + atomic_inc_return(&delim_key_version);
  72128. + right->left_version =
  72129. + atomic_inc_return(&delim_key_version););
  72130. +
  72131. + right->left->right = NULL;
  72132. + right->left = left;
  72133. + ZF_SET(right, JNODE_LEFT_CONNECTED);
  72134. +
  72135. + } else
  72136. + assert("nikita-3303",
  72137. + left == NULL || right->left == left);
  72138. + }
  72139. + assert("nikita-3275", check_sibling_list(left));
  72140. + assert("nikita-3275", check_sibling_list(right));
  72141. +}
  72142. +
  72143. +/* Audited by: umka (2002.06.14) */
  72144. +static void link_znodes(znode * first, znode * second, int to_left)
  72145. +{
  72146. + if (to_left)
  72147. + link_left_and_right(second, first);
  72148. + else
  72149. + link_left_and_right(first, second);
  72150. +}
  72151. +
  72152. +/* getting of next (to left or to right, depend on gn_to_left bit in flags)
  72153. + coord's unit position in horizontal direction, even across node
  72154. + boundary. Should be called under tree lock, it protects nonexistence of
  72155. + sibling link on parent level, if lock_side_neighbor() fails with
  72156. + -ENOENT. */
  72157. +static int far_next_coord(coord_t * coord, lock_handle * handle, int flags)
  72158. +{
  72159. + int ret;
  72160. + znode *node;
  72161. + reiser4_tree *tree;
  72162. +
  72163. + assert("umka-243", coord != NULL);
  72164. + assert("umka-244", handle != NULL);
  72165. + assert("zam-1069", handle->node == NULL);
  72166. +
  72167. + ret =
  72168. + (flags & GN_GO_LEFT) ? coord_prev_unit(coord) :
  72169. + coord_next_unit(coord);
  72170. + if (!ret)
  72171. + return 0;
  72172. +
  72173. + ret =
  72174. + lock_side_neighbor(handle, coord->node, ZNODE_READ_LOCK, flags, 0);
  72175. + if (ret)
  72176. + return ret;
  72177. +
  72178. + node = handle->node;
  72179. + tree = znode_get_tree(node);
  72180. + write_unlock_tree(tree);
  72181. +
  72182. + coord_init_zero(coord);
  72183. +
  72184. + /* We avoid synchronous read here if it is specified by flag. */
  72185. + if ((flags & GN_ASYNC) && znode_page(handle->node) == NULL) {
  72186. + ret = jstartio(ZJNODE(handle->node));
  72187. + if (!ret)
  72188. + ret = -E_REPEAT;
  72189. + goto error_locked;
  72190. + }
  72191. +
  72192. + /* corresponded zrelse() should be called by the clients of
  72193. + far_next_coord(), in place when this node gets unlocked. */
  72194. + ret = zload(handle->node);
  72195. + if (ret)
  72196. + goto error_locked;
  72197. +
  72198. + if (flags & GN_GO_LEFT)
  72199. + coord_init_last_unit(coord, node);
  72200. + else
  72201. + coord_init_first_unit(coord, node);
  72202. +
  72203. + if (0) {
  72204. + error_locked:
  72205. + longterm_unlock_znode(handle);
  72206. + }
  72207. + write_lock_tree(tree);
  72208. + return ret;
  72209. +}
  72210. +
  72211. +/* Very significant function which performs a step in horizontal direction
  72212. + when sibling pointer is not available. Actually, it is only function which
  72213. + does it.
  72214. + Note: this function does not restore locking status at exit,
  72215. + caller should does care about proper unlocking and zrelsing */
  72216. +static int
  72217. +renew_sibling_link(coord_t * coord, lock_handle * handle, znode * child,
  72218. + tree_level level, int flags, int *nr_locked)
  72219. +{
  72220. + int ret;
  72221. + int to_left = flags & GN_GO_LEFT;
  72222. + reiser4_block_nr da;
  72223. + /* parent of the neighbor node; we set it to parent until not sharing
  72224. + of one parent between child and neighbor node is detected */
  72225. + znode *side_parent = coord->node;
  72226. + reiser4_tree *tree = znode_get_tree(child);
  72227. + znode *neighbor = NULL;
  72228. +
  72229. + assert("umka-245", coord != NULL);
  72230. + assert("umka-246", handle != NULL);
  72231. + assert("umka-247", child != NULL);
  72232. + assert("umka-303", tree != NULL);
  72233. +
  72234. + init_lh(handle);
  72235. + write_lock_tree(tree);
  72236. + ret = far_next_coord(coord, handle, flags);
  72237. +
  72238. + if (ret) {
  72239. + if (ret != -ENOENT) {
  72240. + write_unlock_tree(tree);
  72241. + return ret;
  72242. + }
  72243. + } else {
  72244. + item_plugin *iplug;
  72245. +
  72246. + if (handle->node != NULL) {
  72247. + (*nr_locked)++;
  72248. + side_parent = handle->node;
  72249. + }
  72250. +
  72251. + /* does coord object points to internal item? We do not
  72252. + support sibling pointers between znode for formatted and
  72253. + unformatted nodes and return -E_NO_NEIGHBOR in that case. */
  72254. + iplug = item_plugin_by_coord(coord);
  72255. + if (!item_is_internal(coord)) {
  72256. + link_znodes(child, NULL, to_left);
  72257. + write_unlock_tree(tree);
  72258. + /* we know there can't be formatted neighbor */
  72259. + return RETERR(-E_NO_NEIGHBOR);
  72260. + }
  72261. + write_unlock_tree(tree);
  72262. +
  72263. + iplug->s.internal.down_link(coord, NULL, &da);
  72264. +
  72265. + if (flags & GN_NO_ALLOC) {
  72266. + neighbor = zlook(tree, &da);
  72267. + } else {
  72268. + neighbor =
  72269. + zget(tree, &da, side_parent, level,
  72270. + reiser4_ctx_gfp_mask_get());
  72271. + }
  72272. +
  72273. + if (IS_ERR(neighbor)) {
  72274. + ret = PTR_ERR(neighbor);
  72275. + return ret;
  72276. + }
  72277. +
  72278. + if (neighbor)
  72279. + /* update delimiting keys */
  72280. + set_child_delimiting_keys(coord->node, coord, neighbor);
  72281. +
  72282. + write_lock_tree(tree);
  72283. + }
  72284. +
  72285. + if (likely(neighbor == NULL ||
  72286. + (znode_get_level(child) == znode_get_level(neighbor)
  72287. + && child != neighbor)))
  72288. + link_znodes(child, neighbor, to_left);
  72289. + else {
  72290. + warning("nikita-3532",
  72291. + "Sibling nodes on the different levels: %i != %i\n",
  72292. + znode_get_level(child), znode_get_level(neighbor));
  72293. + ret = RETERR(-EIO);
  72294. + }
  72295. +
  72296. + write_unlock_tree(tree);
  72297. +
  72298. + /* if GN_NO_ALLOC isn't set we keep reference to neighbor znode */
  72299. + if (neighbor != NULL && (flags & GN_NO_ALLOC))
  72300. + /* atomic_dec(&ZJNODE(neighbor)->x_count); */
  72301. + zput(neighbor);
  72302. +
  72303. + return ret;
  72304. +}
  72305. +
  72306. +/* This function is for establishing of one side relation. */
  72307. +/* Audited by: umka (2002.06.14) */
  72308. +static int connect_one_side(coord_t * coord, znode * node, int flags)
  72309. +{
  72310. + coord_t local;
  72311. + lock_handle handle;
  72312. + int nr_locked;
  72313. + int ret;
  72314. +
  72315. + assert("umka-248", coord != NULL);
  72316. + assert("umka-249", node != NULL);
  72317. +
  72318. + coord_dup_nocheck(&local, coord);
  72319. +
  72320. + init_lh(&handle);
  72321. +
  72322. + ret =
  72323. + renew_sibling_link(&local, &handle, node, znode_get_level(node),
  72324. + flags | GN_NO_ALLOC, &nr_locked);
  72325. +
  72326. + if (handle.node != NULL) {
  72327. + /* complementary operations for zload() and lock() in far_next_coord() */
  72328. + zrelse(handle.node);
  72329. + longterm_unlock_znode(&handle);
  72330. + }
  72331. +
  72332. + /* we catch error codes which are not interesting for us because we
  72333. + run renew_sibling_link() only for znode connection. */
  72334. + if (ret == -ENOENT || ret == -E_NO_NEIGHBOR)
  72335. + return 0;
  72336. +
  72337. + return ret;
  72338. +}
  72339. +
  72340. +/* if @child is not in `connected' state, performs hash searches for left and
  72341. + right neighbor nodes and establishes horizontal sibling links */
  72342. +/* Audited by: umka (2002.06.14), umka (2002.06.15) */
  72343. +int connect_znode(coord_t * parent_coord, znode * child)
  72344. +{
  72345. + reiser4_tree *tree = znode_get_tree(child);
  72346. + int ret = 0;
  72347. +
  72348. + assert("zam-330", parent_coord != NULL);
  72349. + assert("zam-331", child != NULL);
  72350. + assert("zam-332", parent_coord->node != NULL);
  72351. + assert("umka-305", tree != NULL);
  72352. +
  72353. + /* it is trivial to `connect' root znode because it can't have
  72354. + neighbors */
  72355. + if (znode_above_root(parent_coord->node)) {
  72356. + child->left = NULL;
  72357. + child->right = NULL;
  72358. + ZF_SET(child, JNODE_LEFT_CONNECTED);
  72359. + ZF_SET(child, JNODE_RIGHT_CONNECTED);
  72360. +
  72361. + ON_DEBUG(child->left_version =
  72362. + atomic_inc_return(&delim_key_version);
  72363. + child->right_version =
  72364. + atomic_inc_return(&delim_key_version););
  72365. +
  72366. + return 0;
  72367. + }
  72368. +
  72369. + /* load parent node */
  72370. + coord_clear_iplug(parent_coord);
  72371. + ret = zload(parent_coord->node);
  72372. +
  72373. + if (ret != 0)
  72374. + return ret;
  72375. +
  72376. + /* protect `connected' state check by tree_lock */
  72377. + read_lock_tree(tree);
  72378. +
  72379. + if (!znode_is_right_connected(child)) {
  72380. + read_unlock_tree(tree);
  72381. + /* connect right (default is right) */
  72382. + ret = connect_one_side(parent_coord, child, GN_NO_ALLOC);
  72383. + if (ret)
  72384. + goto zrelse_and_ret;
  72385. +
  72386. + read_lock_tree(tree);
  72387. + }
  72388. +
  72389. + ret = znode_is_left_connected(child);
  72390. +
  72391. + read_unlock_tree(tree);
  72392. +
  72393. + if (!ret) {
  72394. + ret =
  72395. + connect_one_side(parent_coord, child,
  72396. + GN_NO_ALLOC | GN_GO_LEFT);
  72397. + } else
  72398. + ret = 0;
  72399. +
  72400. + zrelse_and_ret:
  72401. + zrelse(parent_coord->node);
  72402. +
  72403. + return ret;
  72404. +}
  72405. +
  72406. +/* this function is like renew_sibling_link() but allocates neighbor node if
  72407. + it doesn't exist and `connects' it. It may require making two steps in
  72408. + horizontal direction, first one for neighbor node finding/allocation,
  72409. + second one is for finding neighbor of neighbor to connect freshly allocated
  72410. + znode. */
  72411. +/* Audited by: umka (2002.06.14), umka (2002.06.15) */
  72412. +static int
  72413. +renew_neighbor(coord_t * coord, znode * node, tree_level level, int flags)
  72414. +{
  72415. + coord_t local;
  72416. + lock_handle empty[2];
  72417. + reiser4_tree *tree = znode_get_tree(node);
  72418. + znode *neighbor = NULL;
  72419. + int nr_locked = 0;
  72420. + int ret;
  72421. +
  72422. + assert("umka-250", coord != NULL);
  72423. + assert("umka-251", node != NULL);
  72424. + assert("umka-307", tree != NULL);
  72425. + assert("umka-308", level <= tree->height);
  72426. +
  72427. + /* umka (2002.06.14)
  72428. + Here probably should be a check for given "level" validness.
  72429. + Something like assert("xxx-yyy", level < REAL_MAX_ZTREE_HEIGHT);
  72430. + */
  72431. +
  72432. + coord_dup(&local, coord);
  72433. +
  72434. + ret =
  72435. + renew_sibling_link(&local, &empty[0], node, level,
  72436. + flags & ~GN_NO_ALLOC, &nr_locked);
  72437. + if (ret)
  72438. + goto out;
  72439. +
  72440. + /* tree lock is not needed here because we keep parent node(s) locked
  72441. + and reference to neighbor znode incremented */
  72442. + neighbor = (flags & GN_GO_LEFT) ? node->left : node->right;
  72443. +
  72444. + read_lock_tree(tree);
  72445. + ret = znode_is_connected(neighbor);
  72446. + read_unlock_tree(tree);
  72447. + if (ret) {
  72448. + ret = 0;
  72449. + goto out;
  72450. + }
  72451. +
  72452. + ret =
  72453. + renew_sibling_link(&local, &empty[nr_locked], neighbor, level,
  72454. + flags | GN_NO_ALLOC, &nr_locked);
  72455. + /* second renew_sibling_link() call is used for znode connection only,
  72456. + so we can live with these errors */
  72457. + if (-ENOENT == ret || -E_NO_NEIGHBOR == ret)
  72458. + ret = 0;
  72459. +
  72460. + out:
  72461. +
  72462. + for (--nr_locked; nr_locked >= 0; --nr_locked) {
  72463. + zrelse(empty[nr_locked].node);
  72464. + longterm_unlock_znode(&empty[nr_locked]);
  72465. + }
  72466. +
  72467. + if (neighbor != NULL)
  72468. + /* decrement znode reference counter without actually
  72469. + releasing it. */
  72470. + atomic_dec(&ZJNODE(neighbor)->x_count);
  72471. +
  72472. + return ret;
  72473. +}
  72474. +
  72475. +/*
  72476. + reiser4_get_neighbor() -- lock node's neighbor.
  72477. +
  72478. + reiser4_get_neighbor() locks node's neighbor (left or right one, depends on
  72479. + given parameter) using sibling link to it. If sibling link is not available
  72480. + (i.e. neighbor znode is not in cache) and flags allow read blocks, we go one
  72481. + level up for information about neighbor's disk address. We lock node's
  72482. + parent, if it is common parent for both 'node' and its neighbor, neighbor's
  72483. + disk address is in next (to left or to right) down link from link that points
  72484. + to original node. If not, we need to lock parent's neighbor, read its content
  72485. + and take first(last) downlink with neighbor's disk address. That locking
  72486. + could be done by using sibling link and lock_neighbor() function, if sibling
  72487. + link exists. In another case we have to go level up again until we find
  72488. + common parent or valid sibling link. Then go down
  72489. + allocating/connecting/locking/reading nodes until neighbor of first one is
  72490. + locked.
  72491. +
  72492. + @neighbor: result lock handle,
  72493. + @node: a node which we lock neighbor of,
  72494. + @lock_mode: lock mode {LM_READ, LM_WRITE},
  72495. + @flags: logical OR of {GN_*} (see description above) subset.
  72496. +
  72497. + @return: 0 if success, negative value if lock was impossible due to an error
  72498. + or lack of neighbor node.
  72499. +*/
  72500. +
  72501. +/* Audited by: umka (2002.06.14), umka (2002.06.15) */
  72502. +int
  72503. +reiser4_get_neighbor(lock_handle * neighbor, znode * node,
  72504. + znode_lock_mode lock_mode, int flags)
  72505. +{
  72506. + reiser4_tree *tree = znode_get_tree(node);
  72507. + lock_handle path[REAL_MAX_ZTREE_HEIGHT];
  72508. +
  72509. + coord_t coord;
  72510. +
  72511. + tree_level base_level;
  72512. + tree_level h = 0;
  72513. + int ret;
  72514. +
  72515. + assert("umka-252", tree != NULL);
  72516. + assert("umka-253", neighbor != NULL);
  72517. + assert("umka-254", node != NULL);
  72518. +
  72519. + base_level = znode_get_level(node);
  72520. +
  72521. + assert("umka-310", base_level <= tree->height);
  72522. +
  72523. + coord_init_zero(&coord);
  72524. +
  72525. + again:
  72526. + /* first, we try to use simple lock_neighbor() which requires sibling
  72527. + link existence */
  72528. + read_lock_tree(tree);
  72529. + ret = lock_side_neighbor(neighbor, node, lock_mode, flags, 1);
  72530. + read_unlock_tree(tree);
  72531. + if (!ret) {
  72532. + /* load znode content if it was specified */
  72533. + if (flags & GN_LOAD_NEIGHBOR) {
  72534. + ret = zload(node);
  72535. + if (ret)
  72536. + longterm_unlock_znode(neighbor);
  72537. + }
  72538. + return ret;
  72539. + }
  72540. +
  72541. + /* only -ENOENT means we may look upward and try to connect
  72542. + @node with its neighbor (if @flags allow us to do it) */
  72543. + if (ret != -ENOENT || !(flags & GN_CAN_USE_UPPER_LEVELS))
  72544. + return ret;
  72545. +
  72546. + /* before establishing of sibling link we lock parent node; it is
  72547. + required by renew_neighbor() to work. */
  72548. + init_lh(&path[0]);
  72549. + ret = reiser4_get_parent(&path[0], node, ZNODE_READ_LOCK);
  72550. + if (ret)
  72551. + return ret;
  72552. + if (znode_above_root(path[0].node)) {
  72553. + longterm_unlock_znode(&path[0]);
  72554. + return RETERR(-E_NO_NEIGHBOR);
  72555. + }
  72556. +
  72557. + while (1) {
  72558. + znode *child = (h == 0) ? node : path[h - 1].node;
  72559. + znode *parent = path[h].node;
  72560. +
  72561. + ret = zload(parent);
  72562. + if (ret)
  72563. + break;
  72564. +
  72565. + ret = find_child_ptr(parent, child, &coord);
  72566. +
  72567. + if (ret) {
  72568. + zrelse(parent);
  72569. + break;
  72570. + }
  72571. +
  72572. + /* try to establish missing sibling link */
  72573. + ret = renew_neighbor(&coord, child, h + base_level, flags);
  72574. +
  72575. + zrelse(parent);
  72576. +
  72577. + switch (ret) {
  72578. + case 0:
  72579. + /* unlocking of parent znode prevents simple
  72580. + deadlock situation */
  72581. + done_lh(&path[h]);
  72582. +
  72583. + /* depend on tree level we stay on we repeat first
  72584. + locking attempt ... */
  72585. + if (h == 0)
  72586. + goto again;
  72587. +
  72588. + /* ... or repeat establishing of sibling link at
  72589. + one level below. */
  72590. + --h;
  72591. + break;
  72592. +
  72593. + case -ENOENT:
  72594. + /* sibling link is not available -- we go
  72595. + upward. */
  72596. + init_lh(&path[h + 1]);
  72597. + ret =
  72598. + reiser4_get_parent(&path[h + 1], parent,
  72599. + ZNODE_READ_LOCK);
  72600. + if (ret)
  72601. + goto fail;
  72602. + ++h;
  72603. + if (znode_above_root(path[h].node)) {
  72604. + ret = RETERR(-E_NO_NEIGHBOR);
  72605. + goto fail;
  72606. + }
  72607. + break;
  72608. +
  72609. + case -E_DEADLOCK:
  72610. + /* there was lock request from hi-pri locker. if
  72611. + it is possible we unlock last parent node and
  72612. + re-lock it again. */
  72613. + for (; reiser4_check_deadlock(); h--) {
  72614. + done_lh(&path[h]);
  72615. + if (h == 0)
  72616. + goto fail;
  72617. + }
  72618. +
  72619. + break;
  72620. +
  72621. + default: /* other errors. */
  72622. + goto fail;
  72623. + }
  72624. + }
  72625. + fail:
  72626. + ON_DEBUG(check_lock_node_data(node));
  72627. + ON_DEBUG(check_lock_data());
  72628. +
  72629. + /* unlock path */
  72630. + do {
  72631. + /* FIXME-Zam: when we get here from case -E_DEADLOCK's goto
  72632. + fail; path[0] is already done_lh-ed, therefore
  72633. + longterm_unlock_znode(&path[h]); is not applicable */
  72634. + done_lh(&path[h]);
  72635. + --h;
  72636. + } while (h + 1 != 0);
  72637. +
  72638. + return ret;
  72639. +}
  72640. +
  72641. +/* remove node from sibling list */
  72642. +/* Audited by: umka (2002.06.14) */
  72643. +void sibling_list_remove(znode * node)
  72644. +{
  72645. + reiser4_tree *tree;
  72646. +
  72647. + tree = znode_get_tree(node);
  72648. + assert("umka-255", node != NULL);
  72649. + assert_rw_write_locked(&(tree->tree_lock));
  72650. + assert("nikita-3275", check_sibling_list(node));
  72651. +
  72652. + write_lock_dk(tree);
  72653. + if (znode_is_right_connected(node) && node->right != NULL &&
  72654. + znode_is_left_connected(node) && node->left != NULL) {
  72655. + assert("zam-32245",
  72656. + keyeq(znode_get_rd_key(node),
  72657. + znode_get_ld_key(node->right)));
  72658. + znode_set_rd_key(node->left, znode_get_ld_key(node->right));
  72659. + }
  72660. + write_unlock_dk(tree);
  72661. +
  72662. + if (znode_is_right_connected(node) && node->right != NULL) {
  72663. + assert("zam-322", znode_is_left_connected(node->right));
  72664. + node->right->left = node->left;
  72665. + ON_DEBUG(node->right->left_version =
  72666. + atomic_inc_return(&delim_key_version);
  72667. + );
  72668. + }
  72669. + if (znode_is_left_connected(node) && node->left != NULL) {
  72670. + assert("zam-323", znode_is_right_connected(node->left));
  72671. + node->left->right = node->right;
  72672. + ON_DEBUG(node->left->right_version =
  72673. + atomic_inc_return(&delim_key_version);
  72674. + );
  72675. + }
  72676. +
  72677. + ZF_CLR(node, JNODE_LEFT_CONNECTED);
  72678. + ZF_CLR(node, JNODE_RIGHT_CONNECTED);
  72679. + ON_DEBUG(node->left = node->right = NULL;
  72680. + node->left_version = atomic_inc_return(&delim_key_version);
  72681. + node->right_version = atomic_inc_return(&delim_key_version););
  72682. + assert("nikita-3276", check_sibling_list(node));
  72683. +}
  72684. +
  72685. +/* disconnect node from sibling list */
  72686. +void sibling_list_drop(znode * node)
  72687. +{
  72688. + znode *right;
  72689. + znode *left;
  72690. +
  72691. + assert("nikita-2464", node != NULL);
  72692. + assert("nikita-3277", check_sibling_list(node));
  72693. +
  72694. + right = node->right;
  72695. + if (right != NULL) {
  72696. + assert("nikita-2465", znode_is_left_connected(right));
  72697. + right->left = NULL;
  72698. + ON_DEBUG(right->left_version =
  72699. + atomic_inc_return(&delim_key_version);
  72700. + );
  72701. + }
  72702. + left = node->left;
  72703. + if (left != NULL) {
  72704. + assert("zam-323", znode_is_right_connected(left));
  72705. + left->right = NULL;
  72706. + ON_DEBUG(left->right_version =
  72707. + atomic_inc_return(&delim_key_version);
  72708. + );
  72709. + }
  72710. + ZF_CLR(node, JNODE_LEFT_CONNECTED);
  72711. + ZF_CLR(node, JNODE_RIGHT_CONNECTED);
  72712. + ON_DEBUG(node->left = node->right = NULL;
  72713. + node->left_version = atomic_inc_return(&delim_key_version);
  72714. + node->right_version = atomic_inc_return(&delim_key_version););
  72715. +}
  72716. +
  72717. +/* Insert new node into sibling list. Regular balancing inserts new node
  72718. + after (at right side) existing and locked node (@before), except one case
  72719. + of adding new tree root node. @before should be NULL in that case. */
  72720. +void sibling_list_insert_nolock(znode * new, znode * before)
  72721. +{
  72722. + assert("zam-334", new != NULL);
  72723. + assert("nikita-3298", !znode_is_left_connected(new));
  72724. + assert("nikita-3299", !znode_is_right_connected(new));
  72725. + assert("nikita-3300", new->left == NULL);
  72726. + assert("nikita-3301", new->right == NULL);
  72727. + assert("nikita-3278", check_sibling_list(new));
  72728. + assert("nikita-3279", check_sibling_list(before));
  72729. +
  72730. + if (before != NULL) {
  72731. + assert("zam-333", znode_is_connected(before));
  72732. + new->right = before->right;
  72733. + new->left = before;
  72734. + ON_DEBUG(new->right_version =
  72735. + atomic_inc_return(&delim_key_version);
  72736. + new->left_version =
  72737. + atomic_inc_return(&delim_key_version););
  72738. + if (before->right != NULL) {
  72739. + before->right->left = new;
  72740. + ON_DEBUG(before->right->left_version =
  72741. + atomic_inc_return(&delim_key_version);
  72742. + );
  72743. + }
  72744. + before->right = new;
  72745. + ON_DEBUG(before->right_version =
  72746. + atomic_inc_return(&delim_key_version);
  72747. + );
  72748. + } else {
  72749. + new->right = NULL;
  72750. + new->left = NULL;
  72751. + ON_DEBUG(new->right_version =
  72752. + atomic_inc_return(&delim_key_version);
  72753. + new->left_version =
  72754. + atomic_inc_return(&delim_key_version););
  72755. + }
  72756. + ZF_SET(new, JNODE_LEFT_CONNECTED);
  72757. + ZF_SET(new, JNODE_RIGHT_CONNECTED);
  72758. + assert("nikita-3280", check_sibling_list(new));
  72759. + assert("nikita-3281", check_sibling_list(before));
  72760. +}
  72761. +
  72762. +/*
  72763. + Local variables:
  72764. + c-indentation-style: "K&R"
  72765. + mode-name: "LC"
  72766. + c-basic-offset: 8
  72767. + tab-width: 8
  72768. + fill-column: 80
  72769. + End:
  72770. +*/
  72771. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/tree_walk.h linux-5.16.14/fs/reiser4/tree_walk.h
  72772. --- linux-5.16.14.orig/fs/reiser4/tree_walk.h 1970-01-01 01:00:00.000000000 +0100
  72773. +++ linux-5.16.14/fs/reiser4/tree_walk.h 2022-03-12 13:26:19.690892821 +0100
  72774. @@ -0,0 +1,125 @@
  72775. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  72776. +
  72777. +/* definitions of reiser4 tree walk functions */
  72778. +
  72779. +#ifndef __FS_REISER4_TREE_WALK_H__
  72780. +#define __FS_REISER4_TREE_WALK_H__
  72781. +
  72782. +#include "debug.h"
  72783. +#include "forward.h"
  72784. +
  72785. +/* establishes horizontal links between cached znodes */
  72786. +int connect_znode(coord_t * coord, znode * node);
  72787. +
  72788. +/* tree traversal functions (reiser4_get_parent(), reiser4_get_neighbor())
  72789. + have the following common arguments:
  72790. +
  72791. + return codes:
  72792. +
  72793. + @return : 0 - OK,
  72794. +
  72795. +ZAM-FIXME-HANS: wrong return code name. Change them all.
  72796. + -ENOENT - neighbor is not in cache, what is detected by sibling
  72797. + link absence.
  72798. +
  72799. + -E_NO_NEIGHBOR - we are sure that neighbor (or parent) node cannot be
  72800. + found (because we are left-/right- most node of the
  72801. + tree, for example). Also, this return code is for
  72802. + reiser4_get_parent() when we see no parent link -- it
  72803. + means that our node is root node.
  72804. +
  72805. + -E_DEADLOCK - deadlock detected (request from high-priority process
  72806. + received), other error codes are conformed to
  72807. + /usr/include/asm/errno.h .
  72808. +*/
  72809. +
  72810. +int
  72811. +reiser4_get_parent_flags(lock_handle * result, znode * node,
  72812. + znode_lock_mode mode, int flags);
  72813. +
  72814. +/* bits definition for reiser4_get_neighbor function `flags' arg. */
  72815. +typedef enum {
  72816. + /* If sibling pointer is NULL, this flag allows get_neighbor() to try to
  72817. + * find not allocated not connected neigbor by going though upper
  72818. + * levels */
  72819. + GN_CAN_USE_UPPER_LEVELS = 0x1,
  72820. + /* locking left neighbor instead of right one */
  72821. + GN_GO_LEFT = 0x2,
  72822. + /* automatically load neighbor node content */
  72823. + GN_LOAD_NEIGHBOR = 0x4,
  72824. + /* return -E_REPEAT if can't lock */
  72825. + GN_TRY_LOCK = 0x8,
  72826. + /* used internally in tree_walk.c, causes renew_sibling to not
  72827. + allocate neighbor znode, but only search for it in znode cache */
  72828. + GN_NO_ALLOC = 0x10,
  72829. + /* do not go across atom boundaries */
  72830. + GN_SAME_ATOM = 0x20,
  72831. + /* allow to lock not connected nodes */
  72832. + GN_ALLOW_NOT_CONNECTED = 0x40,
  72833. + /* Avoid synchronous jload, instead, call jstartio() and return -E_REPEAT. */
  72834. + GN_ASYNC = 0x80
  72835. +} znode_get_neigbor_flags;
  72836. +
  72837. +/* A commonly used wrapper for reiser4_get_parent_flags(). */
  72838. +static inline int reiser4_get_parent(lock_handle * result, znode * node,
  72839. + znode_lock_mode mode)
  72840. +{
  72841. + return reiser4_get_parent_flags(result, node, mode,
  72842. + GN_ALLOW_NOT_CONNECTED);
  72843. +}
  72844. +
  72845. +int reiser4_get_neighbor(lock_handle * neighbor, znode * node,
  72846. + znode_lock_mode lock_mode, int flags);
  72847. +
  72848. +/* there are wrappers for most common usages of reiser4_get_neighbor() */
  72849. +static inline int
  72850. +reiser4_get_left_neighbor(lock_handle * result, znode * node, int lock_mode,
  72851. + int flags)
  72852. +{
  72853. + return reiser4_get_neighbor(result, node, lock_mode,
  72854. + flags | GN_GO_LEFT);
  72855. +}
  72856. +
  72857. +static inline int
  72858. +reiser4_get_right_neighbor(lock_handle * result, znode * node, int lock_mode,
  72859. + int flags)
  72860. +{
  72861. + ON_DEBUG(check_lock_node_data(node));
  72862. + ON_DEBUG(check_lock_data());
  72863. + return reiser4_get_neighbor(result, node, lock_mode,
  72864. + flags & (~GN_GO_LEFT));
  72865. +}
  72866. +
  72867. +extern void sibling_list_remove(znode * node);
  72868. +extern void sibling_list_drop(znode * node);
  72869. +extern void sibling_list_insert_nolock(znode * new, znode * before);
  72870. +extern void link_left_and_right(znode * left, znode * right);
  72871. +
  72872. +/* Functions called by tree_walk() when tree_walk() ... */
  72873. +struct tree_walk_actor {
  72874. + /* ... meets a formatted node, */
  72875. + int (*process_znode) (tap_t *, void *);
  72876. + /* ... meets an extent, */
  72877. + int (*process_extent) (tap_t *, void *);
  72878. + /* ... begins tree traversal or repeats it after -E_REPEAT was returned by
  72879. + * node or extent processing functions. */
  72880. + int (*before) (void *);
  72881. +};
  72882. +
  72883. +#if REISER4_DEBUG
  72884. +int check_sibling_list(znode * node);
  72885. +#else
  72886. +#define check_sibling_list(n) (1)
  72887. +#endif
  72888. +
  72889. +#endif /* __FS_REISER4_TREE_WALK_H__ */
  72890. +
  72891. +/*
  72892. + Local variables:
  72893. + c-indentation-style: "K&R"
  72894. + mode-name: "LC"
  72895. + c-basic-offset: 8
  72896. + tab-width: 8
  72897. + fill-column: 120
  72898. + End:
  72899. +*/
  72900. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/txnmgr.c linux-5.16.14/fs/reiser4/txnmgr.c
  72901. --- linux-5.16.14.orig/fs/reiser4/txnmgr.c 1970-01-01 01:00:00.000000000 +0100
  72902. +++ linux-5.16.14/fs/reiser4/txnmgr.c 2022-03-12 13:26:19.691892823 +0100
  72903. @@ -0,0 +1,3163 @@
  72904. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  72905. + * reiser4/README */
  72906. +
  72907. +/* Joshua MacDonald wrote the first draft of this code. */
  72908. +
  72909. +/* ZAM-LONGTERM-FIXME-HANS: The locking in this file is badly designed, and a
  72910. +filesystem scales only as well as its worst locking design. You need to
  72911. +substantially restructure this code. Josh was not as experienced a programmer
  72912. +as you. Particularly review how the locking style differs from what you did
  72913. +for znodes usingt hi-lo priority locking, and present to me an opinion on
  72914. +whether the differences are well founded. */
  72915. +
  72916. +/* I cannot help but to disagree with the sentiment above. Locking of
  72917. + * transaction manager is _not_ badly designed, and, at the very least, is not
  72918. + * the scaling bottleneck. Scaling bottleneck is _exactly_ hi-lo priority
  72919. + * locking on znodes, especially on the root node of the tree. --nikita,
  72920. + * 2003.10.13 */
  72921. +
  72922. +/* The txnmgr is a set of interfaces that keep track of atoms and transcrash handles. The
  72923. + txnmgr processes capture_block requests and manages the relationship between jnodes and
  72924. + atoms through the various stages of a transcrash, and it also oversees the fusion and
  72925. + capture-on-copy processes. The main difficulty with this task is maintaining a
  72926. + deadlock-free lock ordering between atoms and jnodes/handles. The reason for the
  72927. + difficulty is that jnodes, handles, and atoms contain pointer circles, and the cycle
  72928. + must be broken. The main requirement is that atom-fusion be deadlock free, so once you
  72929. + hold the atom_lock you may then wait to acquire any jnode or handle lock. This implies
  72930. + that any time you check the atom-pointer of a jnode or handle and then try to lock that
  72931. + atom, you must use trylock() and possibly reverse the order.
  72932. +
  72933. + This code implements the design documented at:
  72934. +
  72935. + http://namesys.com/txn-doc.html
  72936. +
  72937. +ZAM-FIXME-HANS: update v4.html to contain all of the information present in the above (but updated), and then remove the
  72938. +above document and reference the new. Be sure to provide some credit to Josh. I already have some writings on this
  72939. +topic in v4.html, but they are lacking in details present in the above. Cure that. Remember to write for the bright 12
  72940. +year old --- define all technical terms used.
  72941. +
  72942. +*/
  72943. +
  72944. +/* Thoughts on the external transaction interface:
  72945. +
  72946. + In the current code, a TRANSCRASH handle is created implicitly by reiser4_init_context() (which
  72947. + creates state that lasts for the duration of a system call and is called at the start
  72948. + of ReiserFS methods implementing VFS operations), and closed by reiser4_exit_context(),
  72949. + occupying the scope of a single system call. We wish to give certain applications an
  72950. + interface to begin and close (commit) transactions. Since our implementation of
  72951. + transactions does not yet support isolation, allowing an application to open a
  72952. + transaction implies trusting it to later close the transaction. Part of the
  72953. + transaction interface will be aimed at enabling that trust, but the interface for
  72954. + actually using transactions is fairly narrow.
  72955. +
  72956. + BEGIN_TRANSCRASH: Returns a transcrash identifier. It should be possible to translate
  72957. + this identifier into a string that a shell-script could use, allowing you to start a
  72958. + transaction by issuing a command. Once open, the transcrash should be set in the task
  72959. + structure, and there should be options (I suppose) to allow it to be carried across
  72960. + fork/exec. A transcrash has several options:
  72961. +
  72962. + - READ_FUSING or WRITE_FUSING: The default policy is for txn-capture to capture only
  72963. + on writes (WRITE_FUSING) and allow "dirty reads". If the application wishes to
  72964. + capture on reads as well, it should set READ_FUSING.
  72965. +
  72966. + - TIMEOUT: Since a non-isolated transcrash cannot be undone, every transcrash must
  72967. + eventually close (or else the machine must crash). If the application dies an
  72968. + unexpected death with an open transcrash, for example, or if it hangs for a long
  72969. + duration, one solution (to avoid crashing the machine) is to simply close it anyway.
  72970. + This is a dangerous option, but it is one way to solve the problem until isolated
  72971. + transcrashes are available for untrusted applications.
  72972. +
  72973. + It seems to be what databases do, though it is unclear how one avoids a DoS attack
  72974. + creating a vulnerability based on resource starvation. Guaranteeing that some
  72975. + minimum amount of computational resources are made available would seem more correct
  72976. + than guaranteeing some amount of time. When we again have someone to code the work,
  72977. + this issue should be considered carefully. -Hans
  72978. +
  72979. + RESERVE_BLOCKS: A running transcrash should indicate to the transaction manager how
  72980. + many dirty blocks it expects. The reserve_blocks interface should be called at a point
  72981. + where it is safe for the application to fail, because the system may not be able to
  72982. + grant the allocation and the application must be able to back-out. For this reason,
  72983. + the number of reserve-blocks can also be passed as an argument to BEGIN_TRANSCRASH, but
  72984. + the application may also wish to extend the allocation after beginning its transcrash.
  72985. +
  72986. + CLOSE_TRANSCRASH: The application closes the transcrash when it is finished making
  72987. + modifications that require transaction protection. When isolated transactions are
  72988. + supported the CLOSE operation is replaced by either COMMIT or ABORT. For example, if a
  72989. + RESERVE_BLOCKS call fails for the application, it should "abort" by calling
  72990. + CLOSE_TRANSCRASH, even though it really commits any changes that were made (which is
  72991. + why, for safety, the application should call RESERVE_BLOCKS before making any changes).
  72992. +
  72993. + For actually implementing these out-of-system-call-scopped transcrashes, the
  72994. + reiser4_context has a "txn_handle *trans" pointer that may be set to an open
  72995. + transcrash. Currently there are no dynamically-allocated transcrashes, but there is a
  72996. + "struct kmem_cache *_txnh_slab" created for that purpose in this file.
  72997. +*/
  72998. +
  72999. +/* Extending the other system call interfaces for future transaction features:
  73000. +
  73001. + Specialized applications may benefit from passing flags to the ordinary system call
  73002. + interface such as read(), write(), or stat(). For example, the application specifies
  73003. + WRITE_FUSING by default but wishes to add that a certain read() command should be
  73004. + treated as READ_FUSING. But which read? Is it the directory-entry read, the stat-data
  73005. + read, or the file-data read? These issues are straight-forward, but there are a lot of
  73006. + them and adding the necessary flags-passing code will be tedious.
  73007. +
  73008. + When supporting isolated transactions, there is a corresponding READ_MODIFY_WRITE (RMW)
  73009. + flag, which specifies that although it is a read operation being requested, a
  73010. + write-lock should be taken. The reason is that read-locks are shared while write-locks
  73011. + are exclusive, so taking a read-lock when a later-write is known in advance will often
  73012. + leads to deadlock. If a reader knows it will write later, it should issue read
  73013. + requests with the RMW flag set.
  73014. +*/
  73015. +
  73016. +/*
  73017. + The znode/atom deadlock avoidance.
  73018. +
  73019. + FIXME(Zam): writing of this comment is in progress.
  73020. +
  73021. + The atom's special stage ASTAGE_CAPTURE_WAIT introduces a kind of atom's
  73022. + long-term locking, which makes reiser4 locking scheme more complex. It had
  73023. + deadlocks until we implement deadlock avoidance algorithms. That deadlocks
  73024. + looked as the following: one stopped thread waits for a long-term lock on
  73025. + znode, the thread who owns that lock waits when fusion with another atom will
  73026. + be allowed.
  73027. +
  73028. + The source of the deadlocks is an optimization of not capturing index nodes
  73029. + for read. Let's prove it. Suppose we have dumb node capturing scheme which
  73030. + unconditionally captures each block before locking it.
  73031. +
  73032. + That scheme has no deadlocks. Let's begin with the thread which stage is
  73033. + ASTAGE_CAPTURE_WAIT and it waits for a znode lock. The thread can't wait for
  73034. + a capture because it's stage allows fusion with any atom except which are
  73035. + being committed currently. A process of atom commit can't deadlock because
  73036. + atom commit procedure does not acquire locks and does not fuse with other
  73037. + atoms. Reiser4 does capturing right before going to sleep inside the
  73038. + longtertm_lock_znode() function, it means the znode which we want to lock is
  73039. + already captured and its atom is in ASTAGE_CAPTURE_WAIT stage. If we
  73040. + continue the analysis we understand that no one process in the sequence may
  73041. + waits atom fusion. Thereby there are no deadlocks of described kind.
  73042. +
  73043. + The capturing optimization makes the deadlocks possible. A thread can wait a
  73044. + lock which owner did not captured that node. The lock owner's current atom
  73045. + is not fused with the first atom and it does not get a ASTAGE_CAPTURE_WAIT
  73046. + state. A deadlock is possible when that atom meets another one which is in
  73047. + ASTAGE_CAPTURE_WAIT already.
  73048. +
  73049. + The deadlock avoidance scheme includes two algorithms:
  73050. +
  73051. + First algorithm is used when a thread captures a node which is locked but not
  73052. + captured by another thread. Those nodes are marked MISSED_IN_CAPTURE at the
  73053. + moment we skip their capturing. If such a node (marked MISSED_IN_CAPTURE) is
  73054. + being captured by a thread with current atom is in ASTAGE_CAPTURE_WAIT, the
  73055. + routine which forces all lock owners to join with current atom is executed.
  73056. +
  73057. + Second algorithm does not allow to skip capturing of already captured nodes.
  73058. +
  73059. + Both algorithms together prevent waiting a longterm lock without atom fusion
  73060. + with atoms of all lock owners, which is a key thing for getting atom/znode
  73061. + locking deadlocks.
  73062. +*/
  73063. +
  73064. +/*
  73065. + * Transactions and mmap(2).
  73066. + *
  73067. + * 1. Transactions are not supported for accesses through mmap(2), because
  73068. + * this would effectively amount to user-level transactions whose duration
  73069. + * is beyond control of the kernel.
  73070. + *
  73071. + * 2. That said, we still want to preserve some decency with regard to
  73072. + * mmap(2). During normal write(2) call, following sequence of events
  73073. + * happens:
  73074. + *
  73075. + * 1. page is created;
  73076. + *
  73077. + * 2. jnode is created, dirtied and captured into current atom.
  73078. + *
  73079. + * 3. extent is inserted and modified.
  73080. + *
  73081. + * Steps (2) and (3) take place under long term lock on the twig node.
  73082. + *
  73083. + * When file is accessed through mmap(2) page is always created during
  73084. + * page fault.
  73085. + * After this (in reiser4_readpage_dispatch()->reiser4_readpage_extent()):
  73086. + *
  73087. + * 1. if access is made to non-hole page new jnode is created, (if
  73088. + * necessary)
  73089. + *
  73090. + * 2. if access is made to the hole page, jnode is not created (XXX
  73091. + * not clear why).
  73092. + *
  73093. + * Also, even if page is created by write page fault it is not marked
  73094. + * dirty immediately by handle_mm_fault(). Probably this is to avoid races
  73095. + * with page write-out.
  73096. + *
  73097. + * Dirty bit installed by hardware is only transferred to the struct page
  73098. + * later, when page is unmapped (in zap_pte_range(), or
  73099. + * try_to_unmap_one()).
  73100. + *
  73101. + * So, with mmap(2) we have to handle following irksome situations:
  73102. + *
  73103. + * 1. there exists modified page (clean or dirty) without jnode
  73104. + *
  73105. + * 2. there exists modified page (clean or dirty) with clean jnode
  73106. + *
  73107. + * 3. clean page which is a part of atom can be transparently modified
  73108. + * at any moment through mapping without becoming dirty.
  73109. + *
  73110. + * (1) and (2) can lead to the out-of-memory situation: ->writepage()
  73111. + * doesn't know what to do with such pages and ->sync_sb()/->writepages()
  73112. + * don't see them, because these methods operate on atoms.
  73113. + *
  73114. + * (3) can lead to the loss of data: suppose we have dirty page with dirty
  73115. + * captured jnode captured by some atom. As part of early flush (for
  73116. + * example) page was written out. Dirty bit was cleared on both page and
  73117. + * jnode. After this page is modified through mapping, but kernel doesn't
  73118. + * notice and just discards page and jnode as part of commit. (XXX
  73119. + * actually it doesn't, because to reclaim page ->releasepage() has to be
  73120. + * called and before this dirty bit will be transferred to the struct
  73121. + * page).
  73122. + *
  73123. + */
  73124. +
  73125. +#include "debug.h"
  73126. +#include "txnmgr.h"
  73127. +#include "jnode.h"
  73128. +#include "znode.h"
  73129. +#include "block_alloc.h"
  73130. +#include "tree.h"
  73131. +#include "wander.h"
  73132. +#include "ktxnmgrd.h"
  73133. +#include "super.h"
  73134. +#include "page_cache.h"
  73135. +#include "reiser4.h"
  73136. +#include "vfs_ops.h"
  73137. +#include "inode.h"
  73138. +#include "flush.h"
  73139. +#include "discard.h"
  73140. +
  73141. +#include <asm/atomic.h>
  73142. +#include <linux/types.h>
  73143. +#include <linux/fs.h>
  73144. +#include <linux/mm.h>
  73145. +#include <linux/slab.h>
  73146. +#include <linux/pagemap.h>
  73147. +#include <linux/writeback.h>
  73148. +#include <linux/swap.h> /* for totalram_pages */
  73149. +
  73150. +static void atom_free(txn_atom * atom);
  73151. +
  73152. +static int commit_txnh(txn_handle * txnh);
  73153. +
  73154. +static void wakeup_atom_waitfor_list(txn_atom * atom);
  73155. +static void wakeup_atom_waiting_list(txn_atom * atom);
  73156. +
  73157. +static void capture_assign_txnh_nolock(txn_atom * atom, txn_handle * txnh);
  73158. +
  73159. +static void capture_assign_block_nolock(txn_atom * atom, jnode * node);
  73160. +
  73161. +static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node);
  73162. +
  73163. +static int capture_init_fusion(jnode * node, txn_handle * txnh,
  73164. + txn_capture mode);
  73165. +
  73166. +static int capture_fuse_wait(txn_handle *, txn_atom *, txn_atom *, txn_capture);
  73167. +
  73168. +static void capture_fuse_into(txn_atom * small, txn_atom * large);
  73169. +
  73170. +void reiser4_invalidate_list(struct list_head *);
  73171. +
  73172. +/* GENERIC STRUCTURES */
  73173. +
  73174. +typedef struct _txn_wait_links txn_wait_links;
  73175. +
  73176. +struct _txn_wait_links {
  73177. + lock_stack *_lock_stack;
  73178. + struct list_head _fwaitfor_link;
  73179. + struct list_head _fwaiting_link;
  73180. + int (*waitfor_cb) (txn_atom * atom, struct _txn_wait_links * wlinks);
  73181. + int (*waiting_cb) (txn_atom * atom, struct _txn_wait_links * wlinks);
  73182. +};
  73183. +
  73184. +/* FIXME: In theory, we should be using the slab cache init & destructor
  73185. + methods instead of, e.g., jnode_init, etc. */
  73186. +static struct kmem_cache *_atom_slab = NULL;
  73187. +/* this is for user-visible, cross system-call transactions. */
  73188. +static struct kmem_cache *_txnh_slab = NULL;
  73189. +
  73190. +/**
  73191. + * init_txnmgr_static - create transaction manager slab caches
  73192. + *
  73193. + * Initializes caches of txn-atoms and txn_handle. It is part of reiser4 module
  73194. + * initialization.
  73195. + */
  73196. +int init_txnmgr_static(void)
  73197. +{
  73198. + assert("jmacd-600", _atom_slab == NULL);
  73199. + assert("jmacd-601", _txnh_slab == NULL);
  73200. +
  73201. + ON_DEBUG(atomic_set(&flush_cnt, 0));
  73202. +
  73203. + _atom_slab = kmem_cache_create("txn_atom", sizeof(txn_atom), 0,
  73204. + SLAB_HWCACHE_ALIGN |
  73205. + SLAB_RECLAIM_ACCOUNT, NULL);
  73206. + if (_atom_slab == NULL)
  73207. + return RETERR(-ENOMEM);
  73208. +
  73209. + _txnh_slab = kmem_cache_create("txn_handle", sizeof(txn_handle), 0,
  73210. + SLAB_HWCACHE_ALIGN, NULL);
  73211. + if (_txnh_slab == NULL) {
  73212. + kmem_cache_destroy(_atom_slab);
  73213. + _atom_slab = NULL;
  73214. + return RETERR(-ENOMEM);
  73215. + }
  73216. +
  73217. + return 0;
  73218. +}
  73219. +
  73220. +/**
  73221. + * done_txnmgr_static - delete txn_atom and txn_handle caches
  73222. + *
  73223. + * This is called on reiser4 module unloading or system shutdown.
  73224. + */
  73225. +void done_txnmgr_static(void)
  73226. +{
  73227. + destroy_reiser4_cache(&_atom_slab);
  73228. + destroy_reiser4_cache(&_txnh_slab);
  73229. +}
  73230. +
  73231. +/**
  73232. + * init_txnmgr - initialize a new transaction manager
  73233. + * @mgr: pointer to transaction manager embedded in reiser4 super block
  73234. + *
  73235. + * This is called on mount. Makes necessary initializations.
  73236. + */
  73237. +void reiser4_init_txnmgr(txn_mgr *mgr)
  73238. +{
  73239. + assert("umka-169", mgr != NULL);
  73240. +
  73241. + mgr->atom_count = 0;
  73242. + mgr->id_count = 1;
  73243. + INIT_LIST_HEAD(&mgr->atoms_list);
  73244. + spin_lock_init(&mgr->tmgr_lock);
  73245. + mutex_init(&mgr->commit_mutex);
  73246. +}
  73247. +
  73248. +/**
  73249. + * reiser4_done_txnmgr - stop transaction manager
  73250. + * @mgr: pointer to transaction manager embedded in reiser4 super block
  73251. + *
  73252. + * This is called on umount. Does sanity checks.
  73253. + */
  73254. +void reiser4_done_txnmgr(txn_mgr *mgr)
  73255. +{
  73256. + assert("umka-170", mgr != NULL);
  73257. + assert("umka-1701", list_empty_careful(&mgr->atoms_list));
  73258. + assert("umka-1702", mgr->atom_count == 0);
  73259. +}
  73260. +
  73261. +/* Initialize a transaction handle. */
  73262. +/* Audited by: umka (2002.06.13) */
  73263. +static void txnh_init(txn_handle * txnh, txn_mode mode)
  73264. +{
  73265. + assert("umka-171", txnh != NULL);
  73266. +
  73267. + txnh->mode = mode;
  73268. + txnh->atom = NULL;
  73269. + reiser4_ctx_gfp_mask_set();
  73270. + txnh->flags = 0;
  73271. + spin_lock_init(&txnh->hlock);
  73272. + INIT_LIST_HEAD(&txnh->txnh_link);
  73273. +}
  73274. +
  73275. +#if REISER4_DEBUG
  73276. +/* Check if a transaction handle is clean. */
  73277. +static int txnh_isclean(txn_handle * txnh)
  73278. +{
  73279. + assert("umka-172", txnh != NULL);
  73280. + return txnh->atom == NULL &&
  73281. + LOCK_CNT_NIL(spin_locked_txnh);
  73282. +}
  73283. +#endif
  73284. +
  73285. +/* Initialize an atom. */
  73286. +static void atom_init(txn_atom * atom)
  73287. +{
  73288. + int level;
  73289. +
  73290. + assert("umka-173", atom != NULL);
  73291. +
  73292. + memset(atom, 0, sizeof(txn_atom));
  73293. +
  73294. + atom->stage = ASTAGE_FREE;
  73295. + atom->start_time = jiffies;
  73296. +
  73297. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1)
  73298. + INIT_LIST_HEAD(ATOM_DIRTY_LIST(atom, level));
  73299. +
  73300. + INIT_LIST_HEAD(ATOM_CLEAN_LIST(atom));
  73301. + INIT_LIST_HEAD(ATOM_OVRWR_LIST(atom));
  73302. + INIT_LIST_HEAD(ATOM_WB_LIST(atom));
  73303. + INIT_LIST_HEAD(&atom->inodes);
  73304. + spin_lock_init(&(atom->alock));
  73305. + /* list of transaction handles */
  73306. + INIT_LIST_HEAD(&atom->txnh_list);
  73307. + /* link to transaction manager's list of atoms */
  73308. + INIT_LIST_HEAD(&atom->atom_link);
  73309. + INIT_LIST_HEAD(&atom->fwaitfor_list);
  73310. + INIT_LIST_HEAD(&atom->fwaiting_list);
  73311. + blocknr_set_init(&atom->wandered_map);
  73312. +
  73313. + atom_dset_init(atom);
  73314. +
  73315. + init_atom_fq_parts(atom);
  73316. +}
  73317. +
  73318. +#if REISER4_DEBUG
  73319. +/* Check if an atom is clean. */
  73320. +static int atom_isclean(txn_atom * atom)
  73321. +{
  73322. + int level;
  73323. +
  73324. + assert("umka-174", atom != NULL);
  73325. +
  73326. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
  73327. + if (!list_empty_careful(ATOM_DIRTY_LIST(atom, level))) {
  73328. + return 0;
  73329. + }
  73330. + }
  73331. +
  73332. + return atom->stage == ASTAGE_FREE &&
  73333. + atom->txnh_count == 0 &&
  73334. + atom->capture_count == 0 &&
  73335. + atomic_read(&atom->refcount) == 0 &&
  73336. + (&atom->atom_link == atom->atom_link.next &&
  73337. + &atom->atom_link == atom->atom_link.prev) &&
  73338. + list_empty_careful(&atom->txnh_list) &&
  73339. + list_empty_careful(ATOM_CLEAN_LIST(atom)) &&
  73340. + list_empty_careful(ATOM_OVRWR_LIST(atom)) &&
  73341. + list_empty_careful(ATOM_WB_LIST(atom)) &&
  73342. + list_empty_careful(&atom->fwaitfor_list) &&
  73343. + list_empty_careful(&atom->fwaiting_list) &&
  73344. + atom_fq_parts_are_clean(atom);
  73345. +}
  73346. +#endif
  73347. +
  73348. +/* Begin a transaction in this context. Currently this uses the reiser4_context's
  73349. + trans_in_ctx, which means that transaction handles are stack-allocated. Eventually
  73350. + this will be extended to allow transaction handles to span several contexts. */
  73351. +/* Audited by: umka (2002.06.13) */
  73352. +void reiser4_txn_begin(reiser4_context * context)
  73353. +{
  73354. + assert("jmacd-544", context->trans == NULL);
  73355. +
  73356. + context->trans = &context->trans_in_ctx;
  73357. +
  73358. + /* FIXME_LATER_JMACD Currently there's no way to begin a TXN_READ_FUSING
  73359. + transcrash. Default should be TXN_WRITE_FUSING. Also, the _trans variable is
  73360. + stack allocated right now, but we would like to allow for dynamically allocated
  73361. + transcrashes that span multiple system calls.
  73362. + */
  73363. + txnh_init(context->trans, TXN_WRITE_FUSING);
  73364. +}
  73365. +
  73366. +/* Finish a transaction handle context. */
  73367. +int reiser4_txn_end(reiser4_context * context)
  73368. +{
  73369. + long ret = 0;
  73370. + txn_handle *txnh;
  73371. +
  73372. + assert("umka-283", context != NULL);
  73373. + assert("nikita-3012", reiser4_schedulable());
  73374. + assert("vs-24", context == get_current_context());
  73375. + assert("nikita-2967", lock_stack_isclean(get_current_lock_stack()));
  73376. +
  73377. + txnh = context->trans;
  73378. + if (txnh != NULL) {
  73379. + if (txnh->atom != NULL)
  73380. + ret = commit_txnh(txnh);
  73381. + assert("jmacd-633", txnh_isclean(txnh));
  73382. + context->trans = NULL;
  73383. + }
  73384. + return ret;
  73385. +}
  73386. +
  73387. +void reiser4_txn_restart(reiser4_context * context)
  73388. +{
  73389. + reiser4_txn_end(context);
  73390. + reiser4_preempt_point();
  73391. + reiser4_txn_begin(context);
  73392. +}
  73393. +
  73394. +void reiser4_txn_restart_current(void)
  73395. +{
  73396. + reiser4_txn_restart(get_current_context());
  73397. +}
  73398. +
  73399. +/* TXN_ATOM */
  73400. +
  73401. +/* Get the atom belonging to a txnh, which is not locked. Return txnh locked. Locks atom, if atom
  73402. + is not NULL. This performs the necessary spin_trylock to break the lock-ordering cycle. May
  73403. + return NULL. */
  73404. +static txn_atom *txnh_get_atom(txn_handle * txnh)
  73405. +{
  73406. + txn_atom *atom;
  73407. +
  73408. + assert("umka-180", txnh != NULL);
  73409. + assert_spin_not_locked(&(txnh->hlock));
  73410. +
  73411. + while (1) {
  73412. + spin_lock_txnh(txnh);
  73413. + atom = txnh->atom;
  73414. +
  73415. + if (atom == NULL)
  73416. + break;
  73417. +
  73418. + if (spin_trylock_atom(atom))
  73419. + break;
  73420. +
  73421. + atomic_inc(&atom->refcount);
  73422. +
  73423. + spin_unlock_txnh(txnh);
  73424. + spin_lock_atom(atom);
  73425. + spin_lock_txnh(txnh);
  73426. +
  73427. + if (txnh->atom == atom) {
  73428. + atomic_dec(&atom->refcount);
  73429. + break;
  73430. + }
  73431. +
  73432. + spin_unlock_txnh(txnh);
  73433. + atom_dec_and_unlock(atom);
  73434. + }
  73435. +
  73436. + return atom;
  73437. +}
  73438. +
  73439. +/* Get the current atom and spinlock it if current atom present. May return NULL */
  73440. +txn_atom *get_current_atom_locked_nocheck(void)
  73441. +{
  73442. + reiser4_context *cx;
  73443. + txn_atom *atom;
  73444. + txn_handle *txnh;
  73445. +
  73446. + cx = get_current_context();
  73447. + assert("zam-437", cx != NULL);
  73448. +
  73449. + txnh = cx->trans;
  73450. + assert("zam-435", txnh != NULL);
  73451. +
  73452. + atom = txnh_get_atom(txnh);
  73453. +
  73454. + spin_unlock_txnh(txnh);
  73455. + return atom;
  73456. +}
  73457. +
  73458. +/* Get the atom belonging to a jnode, which is initially locked. Return with
  73459. + both jnode and atom locked. This performs the necessary spin_trylock to
  73460. + break the lock-ordering cycle. Assumes the jnode is already locked, and
  73461. + returns NULL if atom is not set. */
  73462. +txn_atom *jnode_get_atom(jnode * node)
  73463. +{
  73464. + txn_atom *atom;
  73465. +
  73466. + assert("umka-181", node != NULL);
  73467. +
  73468. + while (1) {
  73469. + assert_spin_locked(&(node->guard));
  73470. +
  73471. + atom = node->atom;
  73472. + /* node is not in any atom */
  73473. + if (atom == NULL)
  73474. + break;
  73475. +
  73476. + /* If atom is not locked, grab the lock and return */
  73477. + if (spin_trylock_atom(atom))
  73478. + break;
  73479. +
  73480. + /* At least one jnode belongs to this atom it guarantees that
  73481. + * atom->refcount > 0, we can safely increment refcount. */
  73482. + atomic_inc(&atom->refcount);
  73483. + spin_unlock_jnode(node);
  73484. +
  73485. + /* re-acquire spin locks in the right order */
  73486. + spin_lock_atom(atom);
  73487. + spin_lock_jnode(node);
  73488. +
  73489. + /* check if node still points to the same atom. */
  73490. + if (node->atom == atom) {
  73491. + atomic_dec(&atom->refcount);
  73492. + break;
  73493. + }
  73494. +
  73495. + /* releasing of atom lock and reference requires not holding
  73496. + * locks on jnodes. */
  73497. + spin_unlock_jnode(node);
  73498. +
  73499. + /* We do not sure that this atom has extra references except our
  73500. + * one, so we should call proper function which may free atom if
  73501. + * last reference is released. */
  73502. + atom_dec_and_unlock(atom);
  73503. +
  73504. + /* lock jnode again for getting valid node->atom pointer
  73505. + * value. */
  73506. + spin_lock_jnode(node);
  73507. + }
  73508. +
  73509. + return atom;
  73510. +}
  73511. +
  73512. +/* Returns true if @node is dirty and part of the same atom as one of its neighbors. Used
  73513. + by flush code to indicate whether the next node (in some direction) is suitable for
  73514. + flushing. */
  73515. +int
  73516. +same_slum_check(jnode * node, jnode * check, int alloc_check, int alloc_value)
  73517. +{
  73518. + int compat;
  73519. + txn_atom *atom;
  73520. +
  73521. + assert("umka-182", node != NULL);
  73522. + assert("umka-183", check != NULL);
  73523. +
  73524. + /* Not sure what this function is supposed to do if supplied with @check that is
  73525. + neither formatted nor unformatted (bitmap or so). */
  73526. + assert("nikita-2373", jnode_is_znode(check)
  73527. + || jnode_is_unformatted(check));
  73528. +
  73529. + /* Need a lock on CHECK to get its atom and to check various state bits.
  73530. + Don't need a lock on NODE once we get the atom lock. */
  73531. + /* It is not enough to lock two nodes and check (node->atom ==
  73532. + check->atom) because atom could be locked and being fused at that
  73533. + moment, jnodes of the atom of that state (being fused) can point to
  73534. + different objects, but the atom is the same. */
  73535. + spin_lock_jnode(check);
  73536. +
  73537. + atom = jnode_get_atom(check);
  73538. +
  73539. + if (atom == NULL) {
  73540. + compat = 0;
  73541. + } else {
  73542. + compat = (node->atom == atom && JF_ISSET(check, JNODE_DIRTY));
  73543. +
  73544. + if (compat && jnode_is_znode(check)) {
  73545. + compat &= znode_is_connected(JZNODE(check));
  73546. + }
  73547. +
  73548. + if (compat && alloc_check) {
  73549. + compat &= (alloc_value == jnode_is_flushprepped(check));
  73550. + }
  73551. +
  73552. + spin_unlock_atom(atom);
  73553. + }
  73554. +
  73555. + spin_unlock_jnode(check);
  73556. +
  73557. + return compat;
  73558. +}
  73559. +
  73560. +/* Decrement the atom's reference count and if it falls to zero, free it. */
  73561. +void atom_dec_and_unlock(txn_atom * atom)
  73562. +{
  73563. + txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
  73564. +
  73565. + assert("umka-186", atom != NULL);
  73566. + assert_spin_locked(&(atom->alock));
  73567. + assert("zam-1039", atomic_read(&atom->refcount) > 0);
  73568. +
  73569. + if (atomic_dec_and_test(&atom->refcount)) {
  73570. + /* take txnmgr lock and atom lock in proper order. */
  73571. + if (!spin_trylock_txnmgr(mgr)) {
  73572. + /* This atom should exist after we re-acquire its
  73573. + * spinlock, so we increment its reference counter. */
  73574. + atomic_inc(&atom->refcount);
  73575. + spin_unlock_atom(atom);
  73576. + spin_lock_txnmgr(mgr);
  73577. + spin_lock_atom(atom);
  73578. +
  73579. + if (!atomic_dec_and_test(&atom->refcount)) {
  73580. + spin_unlock_atom(atom);
  73581. + spin_unlock_txnmgr(mgr);
  73582. + return;
  73583. + }
  73584. + }
  73585. + assert_spin_locked(&(mgr->tmgr_lock));
  73586. + atom_free(atom);
  73587. + spin_unlock_txnmgr(mgr);
  73588. + } else
  73589. + spin_unlock_atom(atom);
  73590. +}
  73591. +
  73592. +/* Create new atom and connect it to given transaction handle. This adds the
  73593. + atom to the transaction manager's list and sets its reference count to 1, an
  73594. + artificial reference which is kept until it commits. We play strange games
  73595. + to avoid allocation under jnode & txnh spinlocks.*/
  73596. +
  73597. +static int atom_begin_and_assign_to_txnh(txn_atom ** atom_alloc, txn_handle * txnh)
  73598. +{
  73599. + txn_atom *atom;
  73600. + txn_mgr *mgr;
  73601. +
  73602. + if (REISER4_DEBUG && rofs_tree(current_tree)) {
  73603. + warning("nikita-3366", "Creating atom on rofs");
  73604. + dump_stack();
  73605. + }
  73606. +
  73607. + if (*atom_alloc == NULL) {
  73608. + (*atom_alloc) = kmem_cache_alloc(_atom_slab,
  73609. + reiser4_ctx_gfp_mask_get());
  73610. +
  73611. + if (*atom_alloc == NULL)
  73612. + return RETERR(-ENOMEM);
  73613. + }
  73614. +
  73615. + /* and, also, txnmgr spin lock should be taken before jnode and txnh
  73616. + locks. */
  73617. + mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
  73618. + spin_lock_txnmgr(mgr);
  73619. + spin_lock_txnh(txnh);
  73620. +
  73621. + /* Check whether new atom still needed */
  73622. + if (txnh->atom != NULL) {
  73623. + /* NOTE-NIKITA probably it is rather better to free
  73624. + * atom_alloc here than thread it up to reiser4_try_capture() */
  73625. +
  73626. + spin_unlock_txnh(txnh);
  73627. + spin_unlock_txnmgr(mgr);
  73628. +
  73629. + return -E_REPEAT;
  73630. + }
  73631. +
  73632. + atom = *atom_alloc;
  73633. + *atom_alloc = NULL;
  73634. +
  73635. + atom_init(atom);
  73636. +
  73637. + assert("jmacd-17", atom_isclean(atom));
  73638. +
  73639. + /*
  73640. + * lock ordering is broken here. It is ok, as long as @atom is new
  73641. + * and inaccessible for others. We can't use spin_lock_atom or
  73642. + * spin_lock(&atom->alock) because they care about locking
  73643. + * dependencies. spin_trylock_lock doesn't.
  73644. + */
  73645. + check_me("", spin_trylock_atom(atom));
  73646. +
  73647. + /* add atom to the end of transaction manager's list of atoms */
  73648. + list_add_tail(&atom->atom_link, &mgr->atoms_list);
  73649. + atom->atom_id = mgr->id_count++;
  73650. + mgr->atom_count += 1;
  73651. +
  73652. + /* Release txnmgr lock */
  73653. + spin_unlock_txnmgr(mgr);
  73654. +
  73655. + /* One reference until it commits. */
  73656. + atomic_inc(&atom->refcount);
  73657. + atom->stage = ASTAGE_CAPTURE_FUSE;
  73658. + atom->super = reiser4_get_current_sb();
  73659. + capture_assign_txnh_nolock(atom, txnh);
  73660. +
  73661. + spin_unlock_atom(atom);
  73662. + spin_unlock_txnh(txnh);
  73663. +
  73664. + return -E_REPEAT;
  73665. +}
  73666. +
  73667. +/* Return true if an atom is currently "open". */
  73668. +static int atom_isopen(const txn_atom * atom)
  73669. +{
  73670. + assert("umka-185", atom != NULL);
  73671. +
  73672. + return atom->stage > 0 && atom->stage < ASTAGE_PRE_COMMIT;
  73673. +}
  73674. +
  73675. +/* Return the number of pointers to this atom that must be updated during fusion. This
  73676. + approximates the amount of work to be done. Fusion chooses the atom with fewer
  73677. + pointers to fuse into the atom with more pointers. */
  73678. +static int atom_pointer_count(const txn_atom * atom)
  73679. +{
  73680. + assert("umka-187", atom != NULL);
  73681. +
  73682. + /* This is a measure of the amount of work needed to fuse this atom
  73683. + * into another. */
  73684. + return atom->txnh_count + atom->capture_count;
  73685. +}
  73686. +
  73687. +/* Called holding the atom lock, this removes the atom from the transaction manager list
  73688. + and frees it. */
  73689. +static void atom_free(txn_atom * atom)
  73690. +{
  73691. + txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
  73692. +
  73693. + assert("umka-188", atom != NULL);
  73694. + assert_spin_locked(&(atom->alock));
  73695. +
  73696. + /* Remove from the txn_mgr's atom list */
  73697. + assert_spin_locked(&(mgr->tmgr_lock));
  73698. + mgr->atom_count -= 1;
  73699. + list_del_init(&atom->atom_link);
  73700. +
  73701. + /* Clean the atom */
  73702. + assert("jmacd-16",
  73703. + (atom->stage == ASTAGE_INVALID || atom->stage == ASTAGE_DONE));
  73704. + atom->stage = ASTAGE_FREE;
  73705. +
  73706. + blocknr_set_destroy(&atom->wandered_map);
  73707. +
  73708. + atom_dset_destroy(atom);
  73709. +
  73710. + assert("jmacd-16", atom_isclean(atom));
  73711. +
  73712. + spin_unlock_atom(atom);
  73713. +
  73714. + kmem_cache_free(_atom_slab, atom);
  73715. +}
  73716. +
  73717. +static int atom_is_dotard(const txn_atom * atom)
  73718. +{
  73719. + return time_after(jiffies, atom->start_time +
  73720. + get_current_super_private()->tmgr.atom_max_age);
  73721. +}
  73722. +
  73723. +static int atom_can_be_committed(txn_atom * atom)
  73724. +{
  73725. + assert_spin_locked(&(atom->alock));
  73726. + assert("zam-885", atom->txnh_count > atom->nr_waiters);
  73727. + return atom->txnh_count == atom->nr_waiters + 1;
  73728. +}
  73729. +
  73730. +/* Return true if an atom should commit now. This is determined by aging, atom
  73731. + size or atom flags. */
  73732. +static int atom_should_commit(const txn_atom * atom)
  73733. +{
  73734. + assert("umka-189", atom != NULL);
  73735. + return
  73736. + (atom->flags & ATOM_FORCE_COMMIT) ||
  73737. + ((unsigned)atom_pointer_count(atom) >
  73738. + get_current_super_private()->tmgr.atom_max_size)
  73739. + || atom_is_dotard(atom);
  73740. +}
  73741. +
  73742. +/* return 1 if current atom exists and requires commit. */
  73743. +int current_atom_should_commit(void)
  73744. +{
  73745. + txn_atom *atom;
  73746. + int result = 0;
  73747. +
  73748. + atom = get_current_atom_locked_nocheck();
  73749. + if (atom) {
  73750. + result = atom_should_commit(atom);
  73751. + spin_unlock_atom(atom);
  73752. + }
  73753. + return result;
  73754. +}
  73755. +
  73756. +static int atom_should_commit_asap(const txn_atom * atom)
  73757. +{
  73758. + unsigned int captured;
  73759. + unsigned int pinnedpages;
  73760. +
  73761. + assert("nikita-3309", atom != NULL);
  73762. +
  73763. + captured = (unsigned)atom->capture_count;
  73764. + pinnedpages = (captured >> PAGE_SHIFT) * sizeof(znode);
  73765. +
  73766. + return (pinnedpages > (totalram_pages() >> 3)) || (atom->flushed > 100);
  73767. +}
  73768. +
  73769. +static jnode *find_first_dirty_in_list(struct list_head *head, int flags)
  73770. +{
  73771. + jnode *first_dirty;
  73772. +
  73773. + list_for_each_entry(first_dirty, head, capture_link) {
  73774. + if (!(flags & JNODE_FLUSH_COMMIT)) {
  73775. + /*
  73776. + * skip jnodes which "heard banshee" or having active
  73777. + * I/O
  73778. + */
  73779. + if (JF_ISSET(first_dirty, JNODE_HEARD_BANSHEE) ||
  73780. + JF_ISSET(first_dirty, JNODE_WRITEBACK))
  73781. + continue;
  73782. + }
  73783. + return first_dirty;
  73784. + }
  73785. + return NULL;
  73786. +}
  73787. +
  73788. +/* Get first dirty node from the atom's dirty_nodes[n] lists; return NULL if atom has no dirty
  73789. + nodes on atom's lists */
  73790. +jnode *find_first_dirty_jnode(txn_atom * atom, int flags)
  73791. +{
  73792. + jnode *first_dirty;
  73793. + tree_level level;
  73794. +
  73795. + assert_spin_locked(&(atom->alock));
  73796. +
  73797. + /* The flush starts from LEAF_LEVEL (=1). */
  73798. + for (level = 1; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
  73799. + if (list_empty_careful(ATOM_DIRTY_LIST(atom, level)))
  73800. + continue;
  73801. +
  73802. + first_dirty =
  73803. + find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, level),
  73804. + flags);
  73805. + if (first_dirty)
  73806. + return first_dirty;
  73807. + }
  73808. +
  73809. + /* znode-above-root is on the list #0. */
  73810. + return find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, 0), flags);
  73811. +}
  73812. +
  73813. +static void dispatch_wb_list(txn_atom * atom, flush_queue_t * fq)
  73814. +{
  73815. + jnode *cur;
  73816. +
  73817. + assert("zam-905", atom_is_protected(atom));
  73818. +
  73819. + cur = list_entry(ATOM_WB_LIST(atom)->next, jnode, capture_link);
  73820. + while (ATOM_WB_LIST(atom) != &cur->capture_link) {
  73821. + jnode *next = list_entry(cur->capture_link.next, jnode, capture_link);
  73822. +
  73823. + spin_lock_jnode(cur);
  73824. + if (!JF_ISSET(cur, JNODE_WRITEBACK)) {
  73825. + if (JF_ISSET(cur, JNODE_DIRTY)) {
  73826. + queue_jnode(fq, cur);
  73827. + } else {
  73828. + /* move jnode to atom's clean list */
  73829. + list_move_tail(&cur->capture_link,
  73830. + ATOM_CLEAN_LIST(atom));
  73831. + }
  73832. + }
  73833. + spin_unlock_jnode(cur);
  73834. +
  73835. + cur = next;
  73836. + }
  73837. +}
  73838. +
  73839. +/* Scan current atom->writeback_nodes list, re-submit dirty and !writeback
  73840. + * jnodes to disk. */
  73841. +static int submit_wb_list(void)
  73842. +{
  73843. + int ret;
  73844. + flush_queue_t *fq;
  73845. +
  73846. + fq = get_fq_for_current_atom();
  73847. + if (IS_ERR(fq))
  73848. + return PTR_ERR(fq);
  73849. +
  73850. + dispatch_wb_list(fq->atom, fq);
  73851. + spin_unlock_atom(fq->atom);
  73852. +
  73853. + ret = reiser4_write_fq(fq, NULL, 1);
  73854. + reiser4_fq_put(fq);
  73855. +
  73856. + return ret;
  73857. +}
  73858. +
  73859. +/* Wait completion of all writes, re-submit atom writeback list if needed. */
  73860. +static int current_atom_complete_writes(void)
  73861. +{
  73862. + int ret;
  73863. +
  73864. + /* Each jnode from that list was modified and dirtied when it had i/o
  73865. + * request running already. After i/o completion we have to resubmit
  73866. + * them to disk again.*/
  73867. + ret = submit_wb_list();
  73868. + if (ret < 0)
  73869. + return ret;
  73870. +
  73871. + /* Wait all i/o completion */
  73872. + ret = current_atom_finish_all_fq();
  73873. + if (ret)
  73874. + return ret;
  73875. +
  73876. + /* Scan wb list again; all i/o should be completed, we re-submit dirty
  73877. + * nodes to disk */
  73878. + ret = submit_wb_list();
  73879. + if (ret < 0)
  73880. + return ret;
  73881. +
  73882. + /* Wait all nodes we just submitted */
  73883. + return current_atom_finish_all_fq();
  73884. +}
  73885. +
  73886. +#if REISER4_DEBUG
  73887. +
  73888. +static void reiser4_info_atom(const char *prefix, const txn_atom * atom)
  73889. +{
  73890. + if (atom == NULL) {
  73891. + printk("%s: no atom\n", prefix);
  73892. + return;
  73893. + }
  73894. +
  73895. + printk("%s: refcount: %i id: %i flags: %x txnh_count: %i"
  73896. + " capture_count: %i stage: %x start: %lu, flushed: %i\n", prefix,
  73897. + atomic_read(&atom->refcount), atom->atom_id, atom->flags,
  73898. + atom->txnh_count, atom->capture_count, atom->stage,
  73899. + atom->start_time, atom->flushed);
  73900. +}
  73901. +
  73902. +#else /* REISER4_DEBUG */
  73903. +
  73904. +static inline void reiser4_info_atom(const char *prefix, const txn_atom * atom) {}
  73905. +
  73906. +#endif /* REISER4_DEBUG */
  73907. +
  73908. +#define TOOMANYFLUSHES (1 << 13)
  73909. +
  73910. +/* Called with the atom locked and no open "active" transaction handlers except
  73911. + ours, this function calls flush_current_atom() until all dirty nodes are
  73912. + processed. Then it initiates commit processing.
  73913. +
  73914. + Called by the single remaining open "active" txnh, which is closing. Other
  73915. + open txnhs belong to processes which wait atom commit in commit_txnh()
  73916. + routine. They are counted as "waiters" in atom->nr_waiters. Therefore as
  73917. + long as we hold the atom lock none of the jnodes can be captured and/or
  73918. + locked.
  73919. +
  73920. + Return value is an error code if commit fails.
  73921. +*/
  73922. +static int commit_current_atom(long *nr_submitted, txn_atom ** atom)
  73923. +{
  73924. + reiser4_super_info_data *sbinfo = get_current_super_private();
  73925. + long ret = 0;
  73926. + /* how many times jnode_flush() was called as a part of attempt to
  73927. + * commit this atom. */
  73928. + int flushiters;
  73929. +
  73930. + assert("zam-888", atom != NULL && *atom != NULL);
  73931. + assert_spin_locked(&((*atom)->alock));
  73932. + assert("zam-887", get_current_context()->trans->atom == *atom);
  73933. + assert("jmacd-151", atom_isopen(*atom));
  73934. +
  73935. + assert("nikita-3184",
  73936. + get_current_super_private()->delete_mutex_owner != current);
  73937. +
  73938. + for (flushiters = 0;; ++flushiters) {
  73939. + ret =
  73940. + flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS |
  73941. + JNODE_FLUSH_COMMIT,
  73942. + LONG_MAX /* nr_to_write */ ,
  73943. + nr_submitted, atom, NULL);
  73944. + if (ret != -E_REPEAT)
  73945. + break;
  73946. +
  73947. + /* if atom's dirty list contains one znode which is
  73948. + HEARD_BANSHEE and is locked we have to allow lock owner to
  73949. + continue and uncapture that znode */
  73950. + reiser4_preempt_point();
  73951. +
  73952. + *atom = get_current_atom_locked();
  73953. + if (flushiters > TOOMANYFLUSHES && IS_POW(flushiters)) {
  73954. + warning("nikita-3176",
  73955. + "Flushing like mad: %i", flushiters);
  73956. + reiser4_info_atom("atom", *atom);
  73957. + DEBUGON(flushiters > (1 << 20));
  73958. + }
  73959. + }
  73960. +
  73961. + if (ret)
  73962. + return ret;
  73963. +
  73964. + assert_spin_locked(&((*atom)->alock));
  73965. +
  73966. + if (!atom_can_be_committed(*atom)) {
  73967. + spin_unlock_atom(*atom);
  73968. + return RETERR(-E_REPEAT);
  73969. + }
  73970. +
  73971. + if ((*atom)->capture_count == 0)
  73972. + goto done;
  73973. +
  73974. + /* Up to this point we have been flushing and after flush is called we
  73975. + return -E_REPEAT. Now we can commit. We cannot return -E_REPEAT
  73976. + at this point, commit should be successful. */
  73977. + reiser4_atom_set_stage(*atom, ASTAGE_PRE_COMMIT);
  73978. + ON_DEBUG(((*atom)->committer = current));
  73979. + spin_unlock_atom(*atom);
  73980. +
  73981. + ret = current_atom_complete_writes();
  73982. + if (ret)
  73983. + return ret;
  73984. +
  73985. + assert("zam-906", list_empty(ATOM_WB_LIST(*atom)));
  73986. +
  73987. + /* isolate critical code path which should be executed by only one
  73988. + * thread using tmgr mutex */
  73989. + mutex_lock(&sbinfo->tmgr.commit_mutex);
  73990. +
  73991. + ret = reiser4_write_logs(nr_submitted);
  73992. + if (ret < 0)
  73993. + reiser4_panic("zam-597", "write log failed (%ld)\n", ret);
  73994. +
  73995. + /* The atom->ovrwr_nodes list is processed under commit mutex held
  73996. + because of bitmap nodes which are captured by special way in
  73997. + reiser4_pre_commit_hook_bitmap(), that way does not include
  73998. + capture_fuse_wait() as a capturing of other nodes does -- the commit
  73999. + mutex is used for transaction isolation instead. */
  74000. + reiser4_invalidate_list(ATOM_OVRWR_LIST(*atom));
  74001. + mutex_unlock(&sbinfo->tmgr.commit_mutex);
  74002. +
  74003. + reiser4_invalidate_list(ATOM_CLEAN_LIST(*atom));
  74004. + reiser4_invalidate_list(ATOM_WB_LIST(*atom));
  74005. + assert("zam-927", list_empty(&(*atom)->inodes));
  74006. +
  74007. + spin_lock_atom(*atom);
  74008. + done:
  74009. + reiser4_atom_set_stage(*atom, ASTAGE_DONE);
  74010. + ON_DEBUG((*atom)->committer = NULL);
  74011. +
  74012. + /* Atom's state changes, so wake up everybody waiting for this
  74013. + event. */
  74014. + wakeup_atom_waiting_list(*atom);
  74015. +
  74016. + /* Decrement the "until commit" reference, at least one txnh (the caller) is
  74017. + still open. */
  74018. + atomic_dec(&(*atom)->refcount);
  74019. +
  74020. + assert("jmacd-1070", atomic_read(&(*atom)->refcount) > 0);
  74021. + assert("jmacd-1062", (*atom)->capture_count == 0);
  74022. + BUG_ON((*atom)->capture_count != 0);
  74023. + assert_spin_locked(&((*atom)->alock));
  74024. +
  74025. + return ret;
  74026. +}
  74027. +
  74028. +/* TXN_TXNH */
  74029. +
  74030. +/**
  74031. + * force_commit_atom - commit current atom and wait commit completion
  74032. + * @txnh:
  74033. + *
  74034. + * Commits current atom and wait commit completion; current atom and @txnh have
  74035. + * to be spinlocked before call, this function unlocks them on exit.
  74036. + */
  74037. +int force_commit_atom(txn_handle *txnh)
  74038. +{
  74039. + txn_atom *atom;
  74040. +
  74041. + assert("zam-837", txnh != NULL);
  74042. + assert_spin_locked(&(txnh->hlock));
  74043. + assert("nikita-2966", lock_stack_isclean(get_current_lock_stack()));
  74044. +
  74045. + atom = txnh->atom;
  74046. +
  74047. + assert("zam-834", atom != NULL);
  74048. + assert_spin_locked(&(atom->alock));
  74049. +
  74050. + /*
  74051. + * Set flags for atom and txnh: forcing atom commit and waiting for
  74052. + * commit completion
  74053. + */
  74054. + txnh->flags |= TXNH_WAIT_COMMIT;
  74055. + atom->flags |= ATOM_FORCE_COMMIT;
  74056. +
  74057. + spin_unlock_txnh(txnh);
  74058. + spin_unlock_atom(atom);
  74059. +
  74060. + /* commit is here */
  74061. + reiser4_txn_restart_current();
  74062. + return 0;
  74063. +}
  74064. +
  74065. +/* Called to force commit of any outstanding atoms. @commit_all_atoms controls
  74066. + * should we commit all atoms including new ones which are created after this
  74067. + * functions is called. */
  74068. +int txnmgr_force_commit_all(struct super_block *super, int commit_all_atoms)
  74069. +{
  74070. + int ret;
  74071. + txn_atom *atom;
  74072. + txn_mgr *mgr;
  74073. + txn_handle *txnh;
  74074. + unsigned long start_time = jiffies;
  74075. + reiser4_context *ctx = get_current_context();
  74076. +
  74077. + assert("nikita-2965", lock_stack_isclean(get_current_lock_stack()));
  74078. + assert("nikita-3058", reiser4_commit_check_locks());
  74079. +
  74080. + reiser4_txn_restart_current();
  74081. +
  74082. + mgr = &get_super_private(super)->tmgr;
  74083. +
  74084. + txnh = ctx->trans;
  74085. +
  74086. + again:
  74087. +
  74088. + spin_lock_txnmgr(mgr);
  74089. +
  74090. + list_for_each_entry(atom, &mgr->atoms_list, atom_link) {
  74091. + spin_lock_atom(atom);
  74092. +
  74093. + /* Commit any atom which can be committed. If @commit_new_atoms
  74094. + * is not set we commit only atoms which were created before
  74095. + * this call is started. */
  74096. + if (commit_all_atoms
  74097. + || time_before_eq(atom->start_time, start_time)) {
  74098. + if (atom->stage <= ASTAGE_POST_COMMIT) {
  74099. + spin_unlock_txnmgr(mgr);
  74100. +
  74101. + if (atom->stage < ASTAGE_PRE_COMMIT) {
  74102. + spin_lock_txnh(txnh);
  74103. + /* Add force-context txnh */
  74104. + capture_assign_txnh_nolock(atom, txnh);
  74105. + ret = force_commit_atom(txnh);
  74106. + if (ret)
  74107. + return ret;
  74108. + } else
  74109. + /* wait atom commit */
  74110. + reiser4_atom_wait_event(atom);
  74111. +
  74112. + goto again;
  74113. + }
  74114. + }
  74115. +
  74116. + spin_unlock_atom(atom);
  74117. + }
  74118. +
  74119. +#if REISER4_DEBUG
  74120. + if (commit_all_atoms) {
  74121. + reiser4_super_info_data *sbinfo = get_super_private(super);
  74122. + spin_lock_reiser4_super(sbinfo);
  74123. + assert("zam-813",
  74124. + sbinfo->blocks_fake_allocated_unformatted == 0);
  74125. + assert("zam-812", sbinfo->blocks_fake_allocated == 0);
  74126. + spin_unlock_reiser4_super(sbinfo);
  74127. + }
  74128. +#endif
  74129. +
  74130. + spin_unlock_txnmgr(mgr);
  74131. +
  74132. + return 0;
  74133. +}
  74134. +
  74135. +/* check whether commit_some_atoms() can commit @atom. Locking is up to the
  74136. + * caller */
  74137. +static int atom_is_committable(txn_atom * atom)
  74138. +{
  74139. + return
  74140. + atom->stage < ASTAGE_PRE_COMMIT &&
  74141. + atom->txnh_count == atom->nr_waiters && atom_should_commit(atom);
  74142. +}
  74143. +
  74144. +/* called periodically from ktxnmgrd to commit old atoms. Releases ktxnmgrd spin
  74145. + * lock at exit */
  74146. +int commit_some_atoms(txn_mgr * mgr)
  74147. +{
  74148. + int ret = 0;
  74149. + txn_atom *atom;
  74150. + txn_handle *txnh;
  74151. + reiser4_context *ctx;
  74152. + struct list_head *pos, *tmp;
  74153. +
  74154. + ctx = get_current_context();
  74155. + assert("nikita-2444", ctx != NULL);
  74156. +
  74157. + txnh = ctx->trans;
  74158. + spin_lock_txnmgr(mgr);
  74159. +
  74160. + /*
  74161. + * this is to avoid gcc complain that atom might be used
  74162. + * uninitialized
  74163. + */
  74164. + atom = NULL;
  74165. +
  74166. + /* look for atom to commit */
  74167. + list_for_each_safe(pos, tmp, &mgr->atoms_list) {
  74168. + atom = list_entry(pos, txn_atom, atom_link);
  74169. + /*
  74170. + * first test without taking atom spin lock, whether it is
  74171. + * eligible for committing at all
  74172. + */
  74173. + if (atom_is_committable(atom)) {
  74174. + /* now, take spin lock and re-check */
  74175. + spin_lock_atom(atom);
  74176. + if (atom_is_committable(atom))
  74177. + break;
  74178. + spin_unlock_atom(atom);
  74179. + }
  74180. + }
  74181. +
  74182. + ret = (&mgr->atoms_list == pos);
  74183. + spin_unlock_txnmgr(mgr);
  74184. +
  74185. + if (ret) {
  74186. + /* nothing found */
  74187. + spin_unlock(&mgr->daemon->guard);
  74188. + return 0;
  74189. + }
  74190. +
  74191. + spin_lock_txnh(txnh);
  74192. +
  74193. + BUG_ON(atom == NULL);
  74194. + /* Set the atom to force committing */
  74195. + atom->flags |= ATOM_FORCE_COMMIT;
  74196. +
  74197. + /* Add force-context txnh */
  74198. + capture_assign_txnh_nolock(atom, txnh);
  74199. +
  74200. + spin_unlock_txnh(txnh);
  74201. + spin_unlock_atom(atom);
  74202. +
  74203. + /* we are about to release daemon spin lock, notify daemon it
  74204. + has to rescan atoms */
  74205. + mgr->daemon->rescan = 1;
  74206. + spin_unlock(&mgr->daemon->guard);
  74207. + reiser4_txn_restart_current();
  74208. + return 0;
  74209. +}
  74210. +
  74211. +static int txn_try_to_fuse_small_atom(txn_mgr * tmgr, txn_atom * atom)
  74212. +{
  74213. + int atom_stage;
  74214. + txn_atom *atom_2;
  74215. + int repeat;
  74216. +
  74217. + assert("zam-1051", atom->stage < ASTAGE_PRE_COMMIT);
  74218. +
  74219. + atom_stage = atom->stage;
  74220. + repeat = 0;
  74221. +
  74222. + if (!spin_trylock_txnmgr(tmgr)) {
  74223. + atomic_inc(&atom->refcount);
  74224. + spin_unlock_atom(atom);
  74225. + spin_lock_txnmgr(tmgr);
  74226. + spin_lock_atom(atom);
  74227. + repeat = 1;
  74228. + if (atom->stage != atom_stage) {
  74229. + spin_unlock_txnmgr(tmgr);
  74230. + atom_dec_and_unlock(atom);
  74231. + return -E_REPEAT;
  74232. + }
  74233. + atomic_dec(&atom->refcount);
  74234. + }
  74235. +
  74236. + list_for_each_entry(atom_2, &tmgr->atoms_list, atom_link) {
  74237. + if (atom == atom_2)
  74238. + continue;
  74239. + /*
  74240. + * if trylock does not succeed we just do not fuse with that
  74241. + * atom.
  74242. + */
  74243. + if (spin_trylock_atom(atom_2)) {
  74244. + if (atom_2->stage < ASTAGE_PRE_COMMIT) {
  74245. + spin_unlock_txnmgr(tmgr);
  74246. + capture_fuse_into(atom_2, atom);
  74247. + /* all locks are lost we can only repeat here */
  74248. + return -E_REPEAT;
  74249. + }
  74250. + spin_unlock_atom(atom_2);
  74251. + }
  74252. + }
  74253. + atom->flags |= ATOM_CANCEL_FUSION;
  74254. + spin_unlock_txnmgr(tmgr);
  74255. + if (repeat) {
  74256. + spin_unlock_atom(atom);
  74257. + return -E_REPEAT;
  74258. + }
  74259. + return 0;
  74260. +}
  74261. +
  74262. +/* Calls jnode_flush for current atom if it exists; if not, just take another
  74263. + atom and call jnode_flush() for him. If current transaction handle has
  74264. + already assigned atom (current atom) we have to close current transaction
  74265. + prior to switch to another atom or do something with current atom. This
  74266. + code tries to flush current atom.
  74267. +
  74268. + flush_some_atom() is called as part of memory clearing process. It is
  74269. + invoked from balance_dirty_pages(), pdflushd, and entd.
  74270. +
  74271. + If we can flush no nodes, atom is committed, because this frees memory.
  74272. +
  74273. + If atom is too large or too old it is committed also.
  74274. +*/
  74275. +int
  74276. +flush_some_atom(jnode * start, long *nr_submitted, const struct writeback_control *wbc,
  74277. + int flags)
  74278. +{
  74279. + reiser4_context *ctx = get_current_context();
  74280. + txn_mgr *tmgr = &get_super_private(ctx->super)->tmgr;
  74281. + txn_handle *txnh = ctx->trans;
  74282. + txn_atom *atom;
  74283. + int ret;
  74284. +
  74285. + BUG_ON(wbc->nr_to_write == 0);
  74286. + BUG_ON(*nr_submitted != 0);
  74287. + assert("zam-1042", txnh != NULL);
  74288. +repeat:
  74289. + if (txnh->atom == NULL) {
  74290. + /* current atom is not available, take first from txnmgr */
  74291. + spin_lock_txnmgr(tmgr);
  74292. +
  74293. + /* traverse the list of all atoms */
  74294. + list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
  74295. + /* lock atom before checking its state */
  74296. + spin_lock_atom(atom);
  74297. +
  74298. + /*
  74299. + * we need an atom which is not being committed and
  74300. + * which has no flushers (jnode_flush() add one flusher
  74301. + * at the beginning and subtract one at the end).
  74302. + */
  74303. + if (atom->stage < ASTAGE_PRE_COMMIT &&
  74304. + atom->nr_flushers == 0) {
  74305. + spin_lock_txnh(txnh);
  74306. + capture_assign_txnh_nolock(atom, txnh);
  74307. + spin_unlock_txnh(txnh);
  74308. +
  74309. + goto found;
  74310. + }
  74311. +
  74312. + spin_unlock_atom(atom);
  74313. + }
  74314. +
  74315. + /*
  74316. + * Write throttling is case of no one atom can be
  74317. + * flushed/committed.
  74318. + */
  74319. + if (!ctx->flush_bd_task) {
  74320. + list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
  74321. + spin_lock_atom(atom);
  74322. + /* Repeat the check from the above. */
  74323. + if (atom->stage < ASTAGE_PRE_COMMIT
  74324. + && atom->nr_flushers == 0) {
  74325. + spin_lock_txnh(txnh);
  74326. + capture_assign_txnh_nolock(atom, txnh);
  74327. + spin_unlock_txnh(txnh);
  74328. +
  74329. + goto found;
  74330. + }
  74331. + if (atom->stage <= ASTAGE_POST_COMMIT) {
  74332. + spin_unlock_txnmgr(tmgr);
  74333. + /*
  74334. + * we just wait until atom's flusher
  74335. + * makes a progress in flushing or
  74336. + * committing the atom
  74337. + */
  74338. + reiser4_atom_wait_event(atom);
  74339. + goto repeat;
  74340. + }
  74341. + spin_unlock_atom(atom);
  74342. + }
  74343. + }
  74344. + spin_unlock_txnmgr(tmgr);
  74345. + return 0;
  74346. + found:
  74347. + spin_unlock_txnmgr(tmgr);
  74348. + } else
  74349. + atom = get_current_atom_locked();
  74350. +
  74351. + BUG_ON(atom->super != ctx->super);
  74352. + assert("vs-35", atom->super == ctx->super);
  74353. + if (start) {
  74354. + spin_lock_jnode(start);
  74355. + ret = (atom == start->atom) ? 1 : 0;
  74356. + spin_unlock_jnode(start);
  74357. + if (ret == 0)
  74358. + start = NULL;
  74359. + }
  74360. + ret = flush_current_atom(flags, wbc->nr_to_write, nr_submitted, &atom, start);
  74361. + if (ret == 0) {
  74362. + /* flush_current_atom returns 0 only if it submitted for write
  74363. + nothing */
  74364. + BUG_ON(*nr_submitted != 0);
  74365. + if (*nr_submitted == 0 || atom_should_commit_asap(atom)) {
  74366. + if (atom->capture_count < tmgr->atom_min_size &&
  74367. + !(atom->flags & ATOM_CANCEL_FUSION)) {
  74368. + ret = txn_try_to_fuse_small_atom(tmgr, atom);
  74369. + if (ret == -E_REPEAT) {
  74370. + reiser4_preempt_point();
  74371. + goto repeat;
  74372. + }
  74373. + }
  74374. + /* if early flushing could not make more nodes clean,
  74375. + * or atom is too old/large,
  74376. + * we force current atom to commit */
  74377. + /* wait for commit completion but only if this
  74378. + * wouldn't stall pdflushd and ent thread. */
  74379. + if (!ctx->entd)
  74380. + txnh->flags |= TXNH_WAIT_COMMIT;
  74381. + atom->flags |= ATOM_FORCE_COMMIT;
  74382. + }
  74383. + spin_unlock_atom(atom);
  74384. + } else if (ret == -E_REPEAT) {
  74385. + if (*nr_submitted == 0) {
  74386. + /* let others who hampers flushing (hold longterm locks,
  74387. + for instance) to free the way for flush */
  74388. + reiser4_preempt_point();
  74389. + goto repeat;
  74390. + }
  74391. + ret = 0;
  74392. + }
  74393. +/*
  74394. + if (*nr_submitted > wbc->nr_to_write)
  74395. + warning("", "asked for %ld, written %ld\n", wbc->nr_to_write, *nr_submitted);
  74396. +*/
  74397. + reiser4_txn_restart(ctx);
  74398. +
  74399. + return ret;
  74400. +}
  74401. +
  74402. +/* Remove processed nodes from atom's clean list (thereby remove them from transaction). */
  74403. +void reiser4_invalidate_list(struct list_head *head)
  74404. +{
  74405. + while (!list_empty(head)) {
  74406. + jnode *node;
  74407. +
  74408. + node = list_entry(head->next, jnode, capture_link);
  74409. + spin_lock_jnode(node);
  74410. + reiser4_uncapture_block(node);
  74411. + jput(node);
  74412. + }
  74413. +}
  74414. +
  74415. +static void init_wlinks(txn_wait_links * wlinks)
  74416. +{
  74417. + wlinks->_lock_stack = get_current_lock_stack();
  74418. + INIT_LIST_HEAD(&wlinks->_fwaitfor_link);
  74419. + INIT_LIST_HEAD(&wlinks->_fwaiting_link);
  74420. + wlinks->waitfor_cb = NULL;
  74421. + wlinks->waiting_cb = NULL;
  74422. +}
  74423. +
  74424. +/* Add atom to the atom's waitfor list and wait for somebody to wake us up; */
  74425. +void reiser4_atom_wait_event(txn_atom * atom)
  74426. +{
  74427. + txn_wait_links _wlinks;
  74428. +
  74429. + assert_spin_locked(&(atom->alock));
  74430. + assert("nikita-3156",
  74431. + lock_stack_isclean(get_current_lock_stack()) ||
  74432. + atom->nr_running_queues > 0);
  74433. +
  74434. + init_wlinks(&_wlinks);
  74435. + list_add_tail(&_wlinks._fwaitfor_link, &atom->fwaitfor_list);
  74436. + atomic_inc(&atom->refcount);
  74437. + spin_unlock_atom(atom);
  74438. +
  74439. + reiser4_prepare_to_sleep(_wlinks._lock_stack);
  74440. + reiser4_go_to_sleep(_wlinks._lock_stack);
  74441. +
  74442. + spin_lock_atom(atom);
  74443. + list_del(&_wlinks._fwaitfor_link);
  74444. + atom_dec_and_unlock(atom);
  74445. +}
  74446. +
  74447. +void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage)
  74448. +{
  74449. + assert("nikita-3535", atom != NULL);
  74450. + assert_spin_locked(&(atom->alock));
  74451. + assert("nikita-3536", stage <= ASTAGE_INVALID);
  74452. + /* Excelsior! */
  74453. + assert("nikita-3537", stage >= atom->stage);
  74454. + if (atom->stage != stage) {
  74455. + atom->stage = stage;
  74456. + reiser4_atom_send_event(atom);
  74457. + }
  74458. +}
  74459. +
  74460. +/* wake all threads which wait for an event */
  74461. +void reiser4_atom_send_event(txn_atom * atom)
  74462. +{
  74463. + assert_spin_locked(&(atom->alock));
  74464. + wakeup_atom_waitfor_list(atom);
  74465. +}
  74466. +
  74467. +/* Informs txn manager code that owner of this txn_handle should wait atom commit completion (for
  74468. + example, because it does fsync(2)) */
  74469. +static int should_wait_commit(txn_handle * h)
  74470. +{
  74471. + return h->flags & TXNH_WAIT_COMMIT;
  74472. +}
  74473. +
  74474. +typedef struct commit_data {
  74475. + txn_atom *atom;
  74476. + txn_handle *txnh;
  74477. + long nr_written;
  74478. + /* as an optimization we start committing atom by first trying to
  74479. + * flush it few times without switching into ASTAGE_CAPTURE_WAIT. This
  74480. + * allows to reduce stalls due to other threads waiting for atom in
  74481. + * ASTAGE_CAPTURE_WAIT stage. ->preflush is counter of these
  74482. + * preliminary flushes. */
  74483. + int preflush;
  74484. + /* have we waited on atom. */
  74485. + int wait;
  74486. + int failed;
  74487. + int wake_ktxnmgrd_up;
  74488. +} commit_data;
  74489. +
  74490. +/*
  74491. + * Called from commit_txnh() repeatedly, until either error happens, or atom
  74492. + * commits successfully.
  74493. + */
  74494. +static int try_commit_txnh(commit_data * cd)
  74495. +{
  74496. + int result;
  74497. +
  74498. + assert("nikita-2968", lock_stack_isclean(get_current_lock_stack()));
  74499. +
  74500. + /* Get the atom and txnh locked. */
  74501. + cd->atom = txnh_get_atom(cd->txnh);
  74502. + assert("jmacd-309", cd->atom != NULL);
  74503. + spin_unlock_txnh(cd->txnh);
  74504. +
  74505. + if (cd->wait) {
  74506. + cd->atom->nr_waiters--;
  74507. + cd->wait = 0;
  74508. + }
  74509. +
  74510. + if (cd->atom->stage == ASTAGE_DONE)
  74511. + return 0;
  74512. +
  74513. + if (cd->failed)
  74514. + return 0;
  74515. +
  74516. + if (atom_should_commit(cd->atom)) {
  74517. + /* if atom is _very_ large schedule it for commit as soon as
  74518. + * possible. */
  74519. + if (atom_should_commit_asap(cd->atom)) {
  74520. + /*
  74521. + * When atom is in PRE_COMMIT or later stage following
  74522. + * invariant (encoded in atom_can_be_committed())
  74523. + * holds: there is exactly one non-waiter transaction
  74524. + * handle opened on this atom. When thread wants to
  74525. + * wait until atom commits (for example sync()) it
  74526. + * waits on atom event after increasing
  74527. + * atom->nr_waiters (see blow in this function). It
  74528. + * cannot be guaranteed that atom is already committed
  74529. + * after receiving event, so loop has to be
  74530. + * re-started. But if atom switched into PRE_COMMIT
  74531. + * stage and became too large, we cannot change its
  74532. + * state back to CAPTURE_WAIT (atom stage can only
  74533. + * increase monotonically), hence this check.
  74534. + */
  74535. + if (cd->atom->stage < ASTAGE_CAPTURE_WAIT)
  74536. + reiser4_atom_set_stage(cd->atom,
  74537. + ASTAGE_CAPTURE_WAIT);
  74538. + cd->atom->flags |= ATOM_FORCE_COMMIT;
  74539. + }
  74540. + if (cd->txnh->flags & TXNH_DONT_COMMIT) {
  74541. + /*
  74542. + * this thread (transaction handle that is) doesn't
  74543. + * want to commit atom. Notify waiters that handle is
  74544. + * closed. This can happen, for example, when we are
  74545. + * under VFS directory lock and don't want to commit
  74546. + * atom right now to avoid stalling other threads
  74547. + * working in the same directory.
  74548. + */
  74549. +
  74550. + /* Wake the ktxnmgrd up if the ktxnmgrd is needed to
  74551. + * commit this atom: no atom waiters and only one
  74552. + * (our) open transaction handle. */
  74553. + cd->wake_ktxnmgrd_up =
  74554. + cd->atom->txnh_count == 1 &&
  74555. + cd->atom->nr_waiters == 0;
  74556. + reiser4_atom_send_event(cd->atom);
  74557. + result = 0;
  74558. + } else if (!atom_can_be_committed(cd->atom)) {
  74559. + if (should_wait_commit(cd->txnh)) {
  74560. + /* sync(): wait for commit */
  74561. + cd->atom->nr_waiters++;
  74562. + cd->wait = 1;
  74563. + reiser4_atom_wait_event(cd->atom);
  74564. + result = RETERR(-E_REPEAT);
  74565. + } else {
  74566. + result = 0;
  74567. + }
  74568. + } else if (cd->preflush > 0 && !is_current_ktxnmgrd()) {
  74569. + /*
  74570. + * optimization: flush atom without switching it into
  74571. + * ASTAGE_CAPTURE_WAIT.
  74572. + *
  74573. + * But don't do this for ktxnmgrd, because ktxnmgrd
  74574. + * should never block on atom fusion.
  74575. + */
  74576. + result = flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS,
  74577. + LONG_MAX, &cd->nr_written,
  74578. + &cd->atom, NULL);
  74579. + if (result == 0) {
  74580. + spin_unlock_atom(cd->atom);
  74581. + cd->preflush = 0;
  74582. + result = RETERR(-E_REPEAT);
  74583. + } else /* Atoms wasn't flushed
  74584. + * completely. Rinse. Repeat. */
  74585. + --cd->preflush;
  74586. + } else {
  74587. + /* We change atom state to ASTAGE_CAPTURE_WAIT to
  74588. + prevent atom fusion and count ourself as an active
  74589. + flusher */
  74590. + reiser4_atom_set_stage(cd->atom, ASTAGE_CAPTURE_WAIT);
  74591. + cd->atom->flags |= ATOM_FORCE_COMMIT;
  74592. +
  74593. + result =
  74594. + commit_current_atom(&cd->nr_written, &cd->atom);
  74595. + if (result != 0 && result != -E_REPEAT)
  74596. + cd->failed = 1;
  74597. + }
  74598. + } else
  74599. + result = 0;
  74600. +
  74601. +#if REISER4_DEBUG
  74602. + if (result == 0)
  74603. + assert_spin_locked(&(cd->atom->alock));
  74604. +#endif
  74605. +
  74606. + /* perfectly valid assertion, except that when atom/txnh is not locked
  74607. + * fusion can take place, and cd->atom points nowhere. */
  74608. + /*
  74609. + assert("jmacd-1028", ergo(result != 0, spin_atom_is_not_locked(cd->atom)));
  74610. + */
  74611. + return result;
  74612. +}
  74613. +
  74614. +/* Called to commit a transaction handle. This decrements the atom's number of open
  74615. + handles and if it is the last handle to commit and the atom should commit, initiates
  74616. + atom commit. if commit does not fail, return number of written blocks */
  74617. +static int commit_txnh(txn_handle * txnh)
  74618. +{
  74619. + commit_data cd;
  74620. + assert("umka-192", txnh != NULL);
  74621. +
  74622. + memset(&cd, 0, sizeof cd);
  74623. + cd.txnh = txnh;
  74624. + cd.preflush = 10;
  74625. +
  74626. + /* calls try_commit_txnh() until either atom commits, or error
  74627. + * happens */
  74628. + while (try_commit_txnh(&cd) != 0)
  74629. + reiser4_preempt_point();
  74630. +
  74631. + spin_lock_txnh(txnh);
  74632. +
  74633. + cd.atom->txnh_count -= 1;
  74634. + txnh->atom = NULL;
  74635. + /* remove transaction handle from atom's list of transaction handles */
  74636. + list_del_init(&txnh->txnh_link);
  74637. +
  74638. + spin_unlock_txnh(txnh);
  74639. + atom_dec_and_unlock(cd.atom);
  74640. + /* if we don't want to do a commit (TXNH_DONT_COMMIT is set, probably
  74641. + * because it takes time) by current thread, we do that work
  74642. + * asynchronously by ktxnmgrd daemon. */
  74643. + if (cd.wake_ktxnmgrd_up)
  74644. + ktxnmgrd_kick(&get_current_super_private()->tmgr);
  74645. +
  74646. + return 0;
  74647. +}
  74648. +
  74649. +/* TRY_CAPTURE */
  74650. +
  74651. +/* This routine attempts a single block-capture request. It may return -E_REPEAT if some
  74652. + condition indicates that the request should be retried, and it may block if the
  74653. + txn_capture mode does not include the TXN_CAPTURE_NONBLOCKING request flag.
  74654. +
  74655. + This routine encodes the basic logic of block capturing described by:
  74656. +
  74657. + http://namesys.com/v4/v4.html
  74658. +
  74659. + Our goal here is to ensure that any two blocks that contain dependent modifications
  74660. + should commit at the same time. This function enforces this discipline by initiating
  74661. + fusion whenever a transaction handle belonging to one atom requests to read or write a
  74662. + block belonging to another atom (TXN_CAPTURE_WRITE or TXN_CAPTURE_READ_ATOMIC).
  74663. +
  74664. + In addition, this routine handles the initial assignment of atoms to blocks and
  74665. + transaction handles. These are possible outcomes of this function:
  74666. +
  74667. + 1. The block and handle are already part of the same atom: return immediate success
  74668. +
  74669. + 2. The block is assigned but the handle is not: call capture_assign_txnh to assign
  74670. + the handle to the block's atom.
  74671. +
  74672. + 3. The handle is assigned but the block is not: call capture_assign_block to assign
  74673. + the block to the handle's atom.
  74674. +
  74675. + 4. Both handle and block are assigned, but to different atoms: call capture_init_fusion
  74676. + to fuse atoms.
  74677. +
  74678. + 5. Neither block nor handle are assigned: create a new atom and assign them both.
  74679. +
  74680. + 6. A read request for a non-captured block: return immediate success.
  74681. +
  74682. + This function acquires and releases the handle's spinlock. This function is called
  74683. + under the jnode lock and if the return value is 0, it returns with the jnode lock still
  74684. + held. If the return is -E_REPEAT or some other error condition, the jnode lock is
  74685. + released. The external interface (reiser4_try_capture) manages re-aquiring the jnode
  74686. + lock in the failure case.
  74687. +*/
  74688. +static int try_capture_block(
  74689. + txn_handle * txnh, jnode * node, txn_capture mode,
  74690. + txn_atom ** atom_alloc)
  74691. +{
  74692. + txn_atom *block_atom;
  74693. + txn_atom *txnh_atom;
  74694. +
  74695. + /* Should not call capture for READ_NONCOM requests, handled in reiser4_try_capture. */
  74696. + assert("jmacd-567", CAPTURE_TYPE(mode) != TXN_CAPTURE_READ_NONCOM);
  74697. +
  74698. + /* FIXME-ZAM-HANS: FIXME_LATER_JMACD Should assert that atom->tree ==
  74699. + * node->tree somewhere. */
  74700. + assert("umka-194", txnh != NULL);
  74701. + assert("umka-195", node != NULL);
  74702. +
  74703. + /* The jnode is already locked! Being called from reiser4_try_capture(). */
  74704. + assert_spin_locked(&(node->guard));
  74705. + block_atom = node->atom;
  74706. +
  74707. + /* Get txnh spinlock, this allows us to compare txn_atom pointers but it doesn't
  74708. + let us touch the atoms themselves. */
  74709. + spin_lock_txnh(txnh);
  74710. + txnh_atom = txnh->atom;
  74711. + /* Process of capturing continues into one of four branches depends on
  74712. + which atoms from (block atom (node->atom), current atom (txnh->atom))
  74713. + exist. */
  74714. + if (txnh_atom == NULL) {
  74715. + if (block_atom == NULL) {
  74716. + spin_unlock_txnh(txnh);
  74717. + spin_unlock_jnode(node);
  74718. + /* assign empty atom to the txnh and repeat */
  74719. + return atom_begin_and_assign_to_txnh(atom_alloc, txnh);
  74720. + } else {
  74721. + atomic_inc(&block_atom->refcount);
  74722. + /* node spin-lock isn't needed anymore */
  74723. + spin_unlock_jnode(node);
  74724. + if (!spin_trylock_atom(block_atom)) {
  74725. + spin_unlock_txnh(txnh);
  74726. + spin_lock_atom(block_atom);
  74727. + spin_lock_txnh(txnh);
  74728. + }
  74729. + /* re-check state after getting txnh and the node
  74730. + * atom spin-locked */
  74731. + if (node->atom != block_atom || txnh->atom != NULL) {
  74732. + spin_unlock_txnh(txnh);
  74733. + atom_dec_and_unlock(block_atom);
  74734. + return RETERR(-E_REPEAT);
  74735. + }
  74736. + atomic_dec(&block_atom->refcount);
  74737. + if (block_atom->stage > ASTAGE_CAPTURE_WAIT ||
  74738. + (block_atom->stage == ASTAGE_CAPTURE_WAIT &&
  74739. + block_atom->txnh_count != 0))
  74740. + return capture_fuse_wait(txnh, block_atom, NULL, mode);
  74741. + capture_assign_txnh_nolock(block_atom, txnh);
  74742. + spin_unlock_txnh(txnh);
  74743. + spin_unlock_atom(block_atom);
  74744. + return RETERR(-E_REPEAT);
  74745. + }
  74746. + } else {
  74747. + /* It is time to perform deadlock prevention check over the
  74748. + node we want to capture. It is possible this node was locked
  74749. + for read without capturing it. The optimization which allows
  74750. + to do it helps us in keeping atoms independent as long as
  74751. + possible but it may cause lock/fuse deadlock problems.
  74752. +
  74753. + A number of similar deadlock situations with locked but not
  74754. + captured nodes were found. In each situation there are two
  74755. + or more threads: one of them does flushing while another one
  74756. + does routine balancing or tree lookup. The flushing thread
  74757. + (F) sleeps in long term locking request for node (N), another
  74758. + thread (A) sleeps in trying to capture some node already
  74759. + belonging the atom F, F has a state which prevents
  74760. + immediately fusion .
  74761. +
  74762. + Deadlocks of this kind cannot happen if node N was properly
  74763. + captured by thread A. The F thread fuse atoms before locking
  74764. + therefore current atom of thread F and current atom of thread
  74765. + A became the same atom and thread A may proceed. This does
  74766. + not work if node N was not captured because the fusion of
  74767. + atom does not happens.
  74768. +
  74769. + The following scheme solves the deadlock: If
  74770. + longterm_lock_znode locks and does not capture a znode, that
  74771. + znode is marked as MISSED_IN_CAPTURE. A node marked this way
  74772. + is processed by the code below which restores the missed
  74773. + capture and fuses current atoms of all the node lock owners
  74774. + by calling the fuse_not_fused_lock_owners() function. */
  74775. + if (JF_ISSET(node, JNODE_MISSED_IN_CAPTURE)) {
  74776. + JF_CLR(node, JNODE_MISSED_IN_CAPTURE);
  74777. + if (jnode_is_znode(node) && znode_is_locked(JZNODE(node))) {
  74778. + spin_unlock_txnh(txnh);
  74779. + spin_unlock_jnode(node);
  74780. + fuse_not_fused_lock_owners(txnh, JZNODE(node));
  74781. + return RETERR(-E_REPEAT);
  74782. + }
  74783. + }
  74784. + if (block_atom == NULL) {
  74785. + atomic_inc(&txnh_atom->refcount);
  74786. + spin_unlock_txnh(txnh);
  74787. + if (!spin_trylock_atom(txnh_atom)) {
  74788. + spin_unlock_jnode(node);
  74789. + spin_lock_atom(txnh_atom);
  74790. + spin_lock_jnode(node);
  74791. + }
  74792. + if (txnh->atom != txnh_atom || node->atom != NULL
  74793. + || JF_ISSET(node, JNODE_IS_DYING)) {
  74794. + spin_unlock_jnode(node);
  74795. + atom_dec_and_unlock(txnh_atom);
  74796. + return RETERR(-E_REPEAT);
  74797. + }
  74798. + atomic_dec(&txnh_atom->refcount);
  74799. + capture_assign_block_nolock(txnh_atom, node);
  74800. + spin_unlock_atom(txnh_atom);
  74801. + } else {
  74802. + if (txnh_atom != block_atom) {
  74803. + if (mode & TXN_CAPTURE_DONT_FUSE) {
  74804. + spin_unlock_txnh(txnh);
  74805. + spin_unlock_jnode(node);
  74806. + /* we are in a "no-fusion" mode and @node is
  74807. + * already part of transaction. */
  74808. + return RETERR(-E_NO_NEIGHBOR);
  74809. + }
  74810. + return capture_init_fusion(node, txnh, mode);
  74811. + }
  74812. + spin_unlock_txnh(txnh);
  74813. + }
  74814. + }
  74815. + return 0;
  74816. +}
  74817. +
  74818. +static txn_capture
  74819. +build_capture_mode(jnode * node, znode_lock_mode lock_mode, txn_capture flags)
  74820. +{
  74821. + txn_capture cap_mode;
  74822. +
  74823. + assert_spin_locked(&(node->guard));
  74824. +
  74825. + /* FIXME_JMACD No way to set TXN_CAPTURE_READ_MODIFY yet. */
  74826. +
  74827. + if (lock_mode == ZNODE_WRITE_LOCK) {
  74828. + cap_mode = TXN_CAPTURE_WRITE;
  74829. + } else if (node->atom != NULL) {
  74830. + cap_mode = TXN_CAPTURE_WRITE;
  74831. + } else if (0 && /* txnh->mode == TXN_READ_FUSING && */
  74832. + jnode_get_level(node) == LEAF_LEVEL) {
  74833. + /* NOTE-NIKITA TXN_READ_FUSING is not currently used */
  74834. + /* We only need a READ_FUSING capture at the leaf level. This
  74835. + is because the internal levels of the tree (twigs included)
  74836. + are redundant from the point of the user that asked for a
  74837. + read-fusing transcrash. The user only wants to read-fuse
  74838. + atoms due to reading uncommitted data that another user has
  74839. + written. It is the file system that reads/writes the
  74840. + internal tree levels, the user only reads/writes leaves. */
  74841. + cap_mode = TXN_CAPTURE_READ_ATOMIC;
  74842. + } else {
  74843. + /* In this case (read lock at a non-leaf) there's no reason to
  74844. + * capture. */
  74845. + /* cap_mode = TXN_CAPTURE_READ_NONCOM; */
  74846. + return 0;
  74847. + }
  74848. +
  74849. + cap_mode |= (flags & (TXN_CAPTURE_NONBLOCKING | TXN_CAPTURE_DONT_FUSE));
  74850. + assert("nikita-3186", cap_mode != 0);
  74851. + return cap_mode;
  74852. +}
  74853. +
  74854. +/* This is an external interface to try_capture_block(), it calls
  74855. + try_capture_block() repeatedly as long as -E_REPEAT is returned.
  74856. +
  74857. + @node: node to capture,
  74858. + @lock_mode: read or write lock is used in capture mode calculation,
  74859. + @flags: see txn_capture flags enumeration,
  74860. + @can_coc : can copy-on-capture
  74861. +
  74862. + @return: 0 - node was successfully captured, -E_REPEAT - capture request
  74863. + cannot be processed immediately as it was requested in flags,
  74864. + < 0 - other errors.
  74865. +*/
  74866. +int reiser4_try_capture(jnode *node, znode_lock_mode lock_mode,
  74867. + txn_capture flags)
  74868. +{
  74869. + txn_atom *atom_alloc = NULL;
  74870. + txn_capture cap_mode;
  74871. + txn_handle *txnh = get_current_context()->trans;
  74872. + int ret;
  74873. +
  74874. + assert_spin_locked(&(node->guard));
  74875. +
  74876. + repeat:
  74877. + if (JF_ISSET(node, JNODE_IS_DYING))
  74878. + return RETERR(-EINVAL);
  74879. + if (node->atom != NULL && txnh->atom == node->atom)
  74880. + return 0;
  74881. + cap_mode = build_capture_mode(node, lock_mode, flags);
  74882. + if (cap_mode == 0 ||
  74883. + (!(cap_mode & TXN_CAPTURE_WTYPES) && node->atom == NULL)) {
  74884. + /* Mark this node as "MISSED". It helps in further deadlock
  74885. + * analysis */
  74886. + if (jnode_is_znode(node))
  74887. + JF_SET(node, JNODE_MISSED_IN_CAPTURE);
  74888. + return 0;
  74889. + }
  74890. + /* Repeat try_capture as long as -E_REPEAT is returned. */
  74891. + ret = try_capture_block(txnh, node, cap_mode, &atom_alloc);
  74892. + /* Regardless of non_blocking:
  74893. +
  74894. + If ret == 0 then jnode is still locked.
  74895. + If ret != 0 then jnode is unlocked.
  74896. + */
  74897. +#if REISER4_DEBUG
  74898. + if (ret == 0)
  74899. + assert_spin_locked(&(node->guard));
  74900. + else
  74901. + assert_spin_not_locked(&(node->guard));
  74902. +#endif
  74903. + assert_spin_not_locked(&(txnh->guard));
  74904. +
  74905. + if (ret == -E_REPEAT) {
  74906. + /* E_REPEAT implies all locks were released, therefore we need
  74907. + to take the jnode's lock again. */
  74908. + spin_lock_jnode(node);
  74909. +
  74910. + /* Although this may appear to be a busy loop, it is not.
  74911. + There are several conditions that cause E_REPEAT to be
  74912. + returned by the call to try_capture_block, all cases
  74913. + indicating some kind of state change that means you should
  74914. + retry the request and will get a different result. In some
  74915. + cases this could be avoided with some extra code, but
  74916. + generally it is done because the necessary locks were
  74917. + released as a result of the operation and repeating is the
  74918. + simplest thing to do (less bug potential). The cases are:
  74919. + atom fusion returns E_REPEAT after it completes (jnode and
  74920. + txnh were unlocked); race conditions in assign_block,
  74921. + assign_txnh, and init_fusion return E_REPEAT (trylock
  74922. + failure); after going to sleep in capture_fuse_wait
  74923. + (request was blocked but may now succeed). I'm not quite
  74924. + sure how capture_copy works yet, but it may also return
  74925. + E_REPEAT. When the request is legitimately blocked, the
  74926. + requestor goes to sleep in fuse_wait, so this is not a busy
  74927. + loop. */
  74928. + /* NOTE-NIKITA: still don't understand:
  74929. +
  74930. + try_capture_block->capture_assign_txnh->spin_trylock_atom->E_REPEAT
  74931. +
  74932. + looks like busy loop?
  74933. + */
  74934. + goto repeat;
  74935. + }
  74936. +
  74937. + /* free extra atom object that was possibly allocated by
  74938. + try_capture_block().
  74939. +
  74940. + Do this before acquiring jnode spin lock to
  74941. + minimize time spent under lock. --nikita */
  74942. + if (atom_alloc != NULL) {
  74943. + kmem_cache_free(_atom_slab, atom_alloc);
  74944. + }
  74945. +
  74946. + if (ret != 0) {
  74947. + if (ret == -E_BLOCK) {
  74948. + assert("nikita-3360",
  74949. + cap_mode & TXN_CAPTURE_NONBLOCKING);
  74950. + ret = -E_REPEAT;
  74951. + }
  74952. +
  74953. + /* Failure means jnode is not locked. FIXME_LATER_JMACD May
  74954. + want to fix the above code to avoid releasing the lock and
  74955. + re-acquiring it, but there are cases were failure occurs
  74956. + when the lock is not held, and those cases would need to be
  74957. + modified to re-take the lock. */
  74958. + spin_lock_jnode(node);
  74959. + }
  74960. +
  74961. + /* Jnode is still locked. */
  74962. + assert_spin_locked(&(node->guard));
  74963. + return ret;
  74964. +}
  74965. +
  74966. +static void release_two_atoms(txn_atom *one, txn_atom *two)
  74967. +{
  74968. + spin_unlock_atom(one);
  74969. + atom_dec_and_unlock(two);
  74970. + spin_lock_atom(one);
  74971. + atom_dec_and_unlock(one);
  74972. +}
  74973. +
  74974. +/* This function sets up a call to try_capture_block and repeats as long as -E_REPEAT is
  74975. + returned by that routine. The txn_capture request mode is computed here depending on
  74976. + the transaction handle's type and the lock request. This is called from the depths of
  74977. + the lock manager with the jnode lock held and it always returns with the jnode lock
  74978. + held.
  74979. +*/
  74980. +
  74981. +/* fuse all 'active' atoms of lock owners of given node. */
  74982. +static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node)
  74983. +{
  74984. + lock_handle *lh;
  74985. + int repeat;
  74986. + txn_atom *atomh, *atomf;
  74987. + reiser4_context *me = get_current_context();
  74988. + reiser4_context *ctx = NULL;
  74989. +
  74990. + assert_spin_not_locked(&(ZJNODE(node)->guard));
  74991. + assert_spin_not_locked(&(txnh->hlock));
  74992. +
  74993. + repeat:
  74994. + repeat = 0;
  74995. + atomh = txnh_get_atom(txnh);
  74996. + spin_unlock_txnh(txnh);
  74997. + assert("zam-692", atomh != NULL);
  74998. +
  74999. + spin_lock_zlock(&node->lock);
  75000. + /* inspect list of lock owners */
  75001. + list_for_each_entry(lh, &node->lock.owners, owners_link) {
  75002. + ctx = get_context_by_lock_stack(lh->owner);
  75003. + if (ctx == me)
  75004. + continue;
  75005. + /* below we use two assumptions to avoid addition spin-locks
  75006. + for checking the condition :
  75007. +
  75008. + 1) if the lock stack has lock, the transaction should be
  75009. + opened, i.e. ctx->trans != NULL;
  75010. +
  75011. + 2) reading of well-aligned ctx->trans->atom is atomic, if it
  75012. + equals to the address of spin-locked atomh, we take that
  75013. + the atoms are the same, nothing has to be captured. */
  75014. + if (atomh != ctx->trans->atom) {
  75015. + reiser4_wake_up(lh->owner);
  75016. + repeat = 1;
  75017. + break;
  75018. + }
  75019. + }
  75020. + if (repeat) {
  75021. + if (!spin_trylock_txnh(ctx->trans)) {
  75022. + spin_unlock_zlock(&node->lock);
  75023. + spin_unlock_atom(atomh);
  75024. + goto repeat;
  75025. + }
  75026. + atomf = ctx->trans->atom;
  75027. + if (atomf == NULL) {
  75028. + capture_assign_txnh_nolock(atomh, ctx->trans);
  75029. + /* release zlock lock _after_ assigning the atom to the
  75030. + * transaction handle, otherwise the lock owner thread
  75031. + * may unlock all znodes, exit kernel context and here
  75032. + * we would access an invalid transaction handle. */
  75033. + spin_unlock_zlock(&node->lock);
  75034. + spin_unlock_atom(atomh);
  75035. + spin_unlock_txnh(ctx->trans);
  75036. + goto repeat;
  75037. + }
  75038. + assert("zam-1059", atomf != atomh);
  75039. + spin_unlock_zlock(&node->lock);
  75040. + atomic_inc(&atomh->refcount);
  75041. + atomic_inc(&atomf->refcount);
  75042. + spin_unlock_txnh(ctx->trans);
  75043. + if (atomf > atomh) {
  75044. + spin_lock_atom_nested(atomf);
  75045. + } else {
  75046. + spin_unlock_atom(atomh);
  75047. + spin_lock_atom(atomf);
  75048. + spin_lock_atom_nested(atomh);
  75049. + }
  75050. + if (atomh == atomf || !atom_isopen(atomh) || !atom_isopen(atomf)) {
  75051. + release_two_atoms(atomf, atomh);
  75052. + goto repeat;
  75053. + }
  75054. + atomic_dec(&atomh->refcount);
  75055. + atomic_dec(&atomf->refcount);
  75056. + capture_fuse_into(atomf, atomh);
  75057. + goto repeat;
  75058. + }
  75059. + spin_unlock_zlock(&node->lock);
  75060. + spin_unlock_atom(atomh);
  75061. +}
  75062. +
  75063. +/* This is the interface to capture unformatted nodes via their struct page
  75064. + reference. Currently it is only used in reiser4_invalidatepage */
  75065. +int try_capture_page_to_invalidate(struct page *pg)
  75066. +{
  75067. + int ret;
  75068. + jnode *node;
  75069. +
  75070. + assert("umka-292", pg != NULL);
  75071. + assert("nikita-2597", PageLocked(pg));
  75072. +
  75073. + if (IS_ERR(node = jnode_of_page(pg))) {
  75074. + return PTR_ERR(node);
  75075. + }
  75076. +
  75077. + spin_lock_jnode(node);
  75078. + unlock_page(pg);
  75079. +
  75080. + ret = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  75081. + spin_unlock_jnode(node);
  75082. + jput(node);
  75083. + lock_page(pg);
  75084. + return ret;
  75085. +}
  75086. +
  75087. +/* This informs the transaction manager when a node is deleted. Add the block to the
  75088. + atom's delete set and uncapture the block.
  75089. +
  75090. +VS-FIXME-HANS: this E_REPEAT paradigm clutters the code and creates a need for
  75091. +explanations. find all the functions that use it, and unless there is some very
  75092. +good reason to use it (I have not noticed one so far and I doubt it exists, but maybe somewhere somehow....),
  75093. +move the loop to inside the function.
  75094. +
  75095. +VS-FIXME-HANS: can this code be at all streamlined? In particular, can you lock and unlock the jnode fewer times?
  75096. + */
  75097. +void reiser4_uncapture_page(struct page *pg)
  75098. +{
  75099. + jnode *node;
  75100. + txn_atom *atom;
  75101. +
  75102. + assert("umka-199", pg != NULL);
  75103. + assert("nikita-3155", PageLocked(pg));
  75104. +
  75105. + clear_page_dirty_for_io(pg);
  75106. +
  75107. + reiser4_wait_page_writeback(pg);
  75108. +
  75109. + node = jprivate(pg);
  75110. + BUG_ON(node == NULL);
  75111. +
  75112. + spin_lock_jnode(node);
  75113. +
  75114. + atom = jnode_get_atom(node);
  75115. + if (atom == NULL) {
  75116. + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
  75117. + spin_unlock_jnode(node);
  75118. + return;
  75119. + }
  75120. +
  75121. + /* We can remove jnode from transaction even if it is on flush queue
  75122. + * prepped list, we only need to be sure that flush queue is not being
  75123. + * written by reiser4_write_fq(). reiser4_write_fq() does not use atom
  75124. + * spin lock for protection of the prepped nodes list, instead
  75125. + * write_fq() increments atom's nr_running_queues counters for the time
  75126. + * when prepped list is not protected by spin lock. Here we check this
  75127. + * counter if we want to remove jnode from flush queue and, if the
  75128. + * counter is not zero, wait all reiser4_write_fq() for this atom to
  75129. + * complete. This is not significant overhead. */
  75130. + while (JF_ISSET(node, JNODE_FLUSH_QUEUED) && atom->nr_running_queues) {
  75131. + spin_unlock_jnode(node);
  75132. + /*
  75133. + * at this moment we want to wait for "atom event", viz. wait
  75134. + * until @node can be removed from flush queue. But
  75135. + * reiser4_atom_wait_event() cannot be called with page locked,
  75136. + * because it deadlocks with jnode_extent_write(). Unlock page,
  75137. + * after making sure (through get_page()) that it cannot
  75138. + * be released from memory.
  75139. + */
  75140. + get_page(pg);
  75141. + unlock_page(pg);
  75142. + reiser4_atom_wait_event(atom);
  75143. + lock_page(pg);
  75144. + /*
  75145. + * page may has been detached by ->writepage()->releasepage().
  75146. + */
  75147. + reiser4_wait_page_writeback(pg);
  75148. + spin_lock_jnode(node);
  75149. + put_page(pg);
  75150. + atom = jnode_get_atom(node);
  75151. +/* VS-FIXME-HANS: improve the commenting in this function */
  75152. + if (atom == NULL) {
  75153. + spin_unlock_jnode(node);
  75154. + return;
  75155. + }
  75156. + }
  75157. + reiser4_uncapture_block(node);
  75158. + spin_unlock_atom(atom);
  75159. + jput(node);
  75160. +}
  75161. +
  75162. +/* this is used in extent's kill hook to uncapture and unhash jnodes attached to
  75163. + * inode's tree of jnodes */
  75164. +void reiser4_uncapture_jnode(jnode * node)
  75165. +{
  75166. + txn_atom *atom;
  75167. +
  75168. + assert_spin_locked(&(node->guard));
  75169. + assert("", node->pg == 0);
  75170. +
  75171. + atom = jnode_get_atom(node);
  75172. + if (atom == NULL) {
  75173. + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
  75174. + spin_unlock_jnode(node);
  75175. + return;
  75176. + }
  75177. +
  75178. + reiser4_uncapture_block(node);
  75179. + spin_unlock_atom(atom);
  75180. + jput(node);
  75181. +}
  75182. +
  75183. +/* No-locking version of assign_txnh. Sets the transaction handle's atom pointer,
  75184. + increases atom refcount and txnh_count, adds to txnh_list. */
  75185. +static void capture_assign_txnh_nolock(txn_atom *atom, txn_handle *txnh)
  75186. +{
  75187. + assert("umka-200", atom != NULL);
  75188. + assert("umka-201", txnh != NULL);
  75189. +
  75190. + assert_spin_locked(&(txnh->hlock));
  75191. + assert_spin_locked(&(atom->alock));
  75192. + assert("jmacd-824", txnh->atom == NULL);
  75193. + assert("nikita-3540", atom_isopen(atom));
  75194. + BUG_ON(txnh->atom != NULL);
  75195. +
  75196. + atomic_inc(&atom->refcount);
  75197. + txnh->atom = atom;
  75198. + reiser4_ctx_gfp_mask_set();
  75199. + list_add_tail(&txnh->txnh_link, &atom->txnh_list);
  75200. + atom->txnh_count += 1;
  75201. +}
  75202. +
  75203. +/* No-locking version of assign_block. Sets the block's atom pointer, references the
  75204. + block, adds it to the clean or dirty capture_jnode list, increments capture_count. */
  75205. +static void capture_assign_block_nolock(txn_atom *atom, jnode *node)
  75206. +{
  75207. + assert("umka-202", atom != NULL);
  75208. + assert("umka-203", node != NULL);
  75209. + assert_spin_locked(&(node->guard));
  75210. + assert_spin_locked(&(atom->alock));
  75211. + assert("jmacd-323", node->atom == NULL);
  75212. + BUG_ON(!list_empty_careful(&node->capture_link));
  75213. + assert("nikita-3470", !JF_ISSET(node, JNODE_DIRTY));
  75214. +
  75215. + /* Pointer from jnode to atom is not counted in atom->refcount. */
  75216. + node->atom = atom;
  75217. +
  75218. + list_add_tail(&node->capture_link, ATOM_CLEAN_LIST(atom));
  75219. + atom->capture_count += 1;
  75220. + /* reference to jnode is acquired by atom. */
  75221. + jref(node);
  75222. +
  75223. + ON_DEBUG(count_jnode(atom, node, NOT_CAPTURED, CLEAN_LIST, 1));
  75224. +
  75225. + LOCK_CNT_INC(t_refs);
  75226. +}
  75227. +
  75228. +/* common code for dirtying both unformatted jnodes and formatted znodes. */
  75229. +static void do_jnode_make_dirty(jnode * node, txn_atom * atom)
  75230. +{
  75231. + assert_spin_locked(&(node->guard));
  75232. + assert_spin_locked(&(atom->alock));
  75233. + assert("jmacd-3981", !JF_ISSET(node, JNODE_DIRTY));
  75234. +
  75235. + JF_SET(node, JNODE_DIRTY);
  75236. +
  75237. + if (!JF_ISSET(node, JNODE_CLUSTER_PAGE))
  75238. + get_current_context()->nr_marked_dirty++;
  75239. +
  75240. + /* We grab2flush_reserve one additional block only if node was
  75241. + not CREATED and jnode_flush did not sort it into neither
  75242. + relocate set nor overwrite one. If node is in overwrite or
  75243. + relocate set we assume that atom's flush reserved counter was
  75244. + already adjusted. */
  75245. + if (!JF_ISSET(node, JNODE_CREATED) && !JF_ISSET(node, JNODE_RELOC)
  75246. + && !JF_ISSET(node, JNODE_OVRWR) && jnode_is_leaf(node)
  75247. + && !jnode_is_cluster_page(node)) {
  75248. + assert("vs-1093", !reiser4_blocknr_is_fake(&node->blocknr));
  75249. + assert("vs-1506", *jnode_get_block(node) != 0);
  75250. + grabbed2flush_reserved_nolock(atom, (__u64) 1);
  75251. + JF_SET(node, JNODE_FLUSH_RESERVED);
  75252. + }
  75253. +
  75254. + if (!JF_ISSET(node, JNODE_FLUSH_QUEUED)) {
  75255. + /* If the atom is not set yet, it will be added to the appropriate list in
  75256. + capture_assign_block_nolock. */
  75257. + /* Sometimes a node is set dirty before being captured -- the case for new
  75258. + jnodes. In that case the jnode will be added to the appropriate list
  75259. + in capture_assign_block_nolock. Another reason not to re-link jnode is
  75260. + that jnode is on a flush queue (see flush.c for details) */
  75261. +
  75262. + int level = jnode_get_level(node);
  75263. +
  75264. + assert("nikita-3152", !JF_ISSET(node, JNODE_OVRWR));
  75265. + assert("zam-654", atom->stage < ASTAGE_PRE_COMMIT);
  75266. + assert("nikita-2607", 0 <= level);
  75267. + assert("nikita-2606", level <= REAL_MAX_ZTREE_HEIGHT);
  75268. +
  75269. + /* move node to atom's dirty list */
  75270. + list_move_tail(&node->capture_link, ATOM_DIRTY_LIST(atom, level));
  75271. + ON_DEBUG(count_jnode
  75272. + (atom, node, NODE_LIST(node), DIRTY_LIST, 1));
  75273. + }
  75274. +}
  75275. +
  75276. +/* Set the dirty status for this (spin locked) jnode. */
  75277. +void jnode_make_dirty_locked(jnode * node)
  75278. +{
  75279. + assert("umka-204", node != NULL);
  75280. + assert_spin_locked(&(node->guard));
  75281. +
  75282. + if (REISER4_DEBUG && rofs_jnode(node)) {
  75283. + warning("nikita-3365", "Dirtying jnode on rofs");
  75284. + dump_stack();
  75285. + }
  75286. +
  75287. + /* Fast check for already dirty node */
  75288. + if (!JF_ISSET(node, JNODE_DIRTY)) {
  75289. + txn_atom *atom;
  75290. +
  75291. + atom = jnode_get_atom(node);
  75292. + assert("vs-1094", atom);
  75293. + /* Check jnode dirty status again because node spin lock might
  75294. + * be released inside jnode_get_atom(). */
  75295. + if (likely(!JF_ISSET(node, JNODE_DIRTY)))
  75296. + do_jnode_make_dirty(node, atom);
  75297. + spin_unlock_atom(atom);
  75298. + }
  75299. +}
  75300. +
  75301. +/* Set the dirty status for this znode. */
  75302. +void znode_make_dirty(znode * z)
  75303. +{
  75304. + jnode *node;
  75305. + struct page *page;
  75306. +
  75307. + assert("umka-204", z != NULL);
  75308. + assert("nikita-3290", znode_above_root(z) || znode_is_loaded(z));
  75309. + assert("nikita-3560", znode_is_write_locked(z));
  75310. +
  75311. + node = ZJNODE(z);
  75312. + /* znode is longterm locked, we can check dirty bit without spinlock */
  75313. + if (JF_ISSET(node, JNODE_DIRTY)) {
  75314. + /* znode is dirty already. All we have to do is to change znode version */
  75315. + z->version = znode_build_version(jnode_get_tree(node));
  75316. + return;
  75317. + }
  75318. +
  75319. + spin_lock_jnode(node);
  75320. + jnode_make_dirty_locked(node);
  75321. + page = jnode_page(node);
  75322. + if (page != NULL) {
  75323. + /* this is useful assertion (allows one to check that no
  75324. + * modifications are lost due to update of in-flight page),
  75325. + * but it requires locking on page to check PG_writeback
  75326. + * bit. */
  75327. + /* assert("nikita-3292",
  75328. + !PageWriteback(page) || ZF_ISSET(z, JNODE_WRITEBACK)); */
  75329. + get_page(page);
  75330. +
  75331. + /* jnode lock is not needed for the rest of
  75332. + * znode_set_dirty(). */
  75333. + spin_unlock_jnode(node);
  75334. + /* reiser4 file write code calls set_page_dirty for
  75335. + * unformatted nodes, for formatted nodes we do it here. */
  75336. + set_page_dirty_notag(page);
  75337. + put_page(page);
  75338. + /* bump version counter in znode */
  75339. + z->version = znode_build_version(jnode_get_tree(node));
  75340. + } else {
  75341. + assert("zam-596", znode_above_root(JZNODE(node)));
  75342. + spin_unlock_jnode(node);
  75343. + }
  75344. +
  75345. + assert("nikita-1900", znode_is_write_locked(z));
  75346. + assert("jmacd-9777", node->atom != NULL);
  75347. +}
  75348. +
  75349. +int reiser4_sync_atom(txn_atom * atom)
  75350. +{
  75351. + int result;
  75352. + txn_handle *txnh;
  75353. +
  75354. + txnh = get_current_context()->trans;
  75355. +
  75356. + result = 0;
  75357. + if (atom != NULL) {
  75358. + if (atom->stage < ASTAGE_PRE_COMMIT) {
  75359. + spin_lock_txnh(txnh);
  75360. + capture_assign_txnh_nolock(atom, txnh);
  75361. + result = force_commit_atom(txnh);
  75362. + } else if (atom->stage < ASTAGE_POST_COMMIT) {
  75363. + /* wait atom commit */
  75364. + reiser4_atom_wait_event(atom);
  75365. + /* try once more */
  75366. + result = RETERR(-E_REPEAT);
  75367. + } else
  75368. + spin_unlock_atom(atom);
  75369. + }
  75370. + return result;
  75371. +}
  75372. +
  75373. +#if REISER4_DEBUG
  75374. +
  75375. +/* move jnode form one list to another
  75376. + call this after atom->capture_count is updated */
  75377. +void
  75378. +count_jnode(txn_atom * atom, jnode * node, atom_list old_list,
  75379. + atom_list new_list, int check_lists)
  75380. +{
  75381. + struct list_head *pos;
  75382. +
  75383. + assert("zam-1018", atom_is_protected(atom));
  75384. + assert_spin_locked(&(node->guard));
  75385. + assert("", NODE_LIST(node) == old_list);
  75386. +
  75387. + switch (NODE_LIST(node)) {
  75388. + case NOT_CAPTURED:
  75389. + break;
  75390. + case DIRTY_LIST:
  75391. + assert("", atom->dirty > 0);
  75392. + atom->dirty--;
  75393. + break;
  75394. + case CLEAN_LIST:
  75395. + assert("", atom->clean > 0);
  75396. + atom->clean--;
  75397. + break;
  75398. + case FQ_LIST:
  75399. + assert("", atom->fq > 0);
  75400. + atom->fq--;
  75401. + break;
  75402. + case WB_LIST:
  75403. + assert("", atom->wb > 0);
  75404. + atom->wb--;
  75405. + break;
  75406. + case OVRWR_LIST:
  75407. + assert("", atom->ovrwr > 0);
  75408. + atom->ovrwr--;
  75409. + break;
  75410. + default:
  75411. + impossible("", "");
  75412. + }
  75413. +
  75414. + switch (new_list) {
  75415. + case NOT_CAPTURED:
  75416. + break;
  75417. + case DIRTY_LIST:
  75418. + atom->dirty++;
  75419. + break;
  75420. + case CLEAN_LIST:
  75421. + atom->clean++;
  75422. + break;
  75423. + case FQ_LIST:
  75424. + atom->fq++;
  75425. + break;
  75426. + case WB_LIST:
  75427. + atom->wb++;
  75428. + break;
  75429. + case OVRWR_LIST:
  75430. + atom->ovrwr++;
  75431. + break;
  75432. + default:
  75433. + impossible("", "");
  75434. + }
  75435. + ASSIGN_NODE_LIST(node, new_list);
  75436. + if (0 && check_lists) {
  75437. + int count;
  75438. + tree_level level;
  75439. +
  75440. + count = 0;
  75441. +
  75442. + /* flush queue list */
  75443. + /* reiser4_check_fq(atom); */
  75444. +
  75445. + /* dirty list */
  75446. + count = 0;
  75447. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
  75448. + list_for_each(pos, ATOM_DIRTY_LIST(atom, level))
  75449. + count++;
  75450. + }
  75451. + if (count != atom->dirty)
  75452. + warning("", "dirty counter %d, real %d\n", atom->dirty,
  75453. + count);
  75454. +
  75455. + /* clean list */
  75456. + count = 0;
  75457. + list_for_each(pos, ATOM_CLEAN_LIST(atom))
  75458. + count++;
  75459. + if (count != atom->clean)
  75460. + warning("", "clean counter %d, real %d\n", atom->clean,
  75461. + count);
  75462. +
  75463. + /* wb list */
  75464. + count = 0;
  75465. + list_for_each(pos, ATOM_WB_LIST(atom))
  75466. + count++;
  75467. + if (count != atom->wb)
  75468. + warning("", "wb counter %d, real %d\n", atom->wb,
  75469. + count);
  75470. +
  75471. + /* overwrite list */
  75472. + count = 0;
  75473. + list_for_each(pos, ATOM_OVRWR_LIST(atom))
  75474. + count++;
  75475. +
  75476. + if (count != atom->ovrwr)
  75477. + warning("", "ovrwr counter %d, real %d\n", atom->ovrwr,
  75478. + count);
  75479. + }
  75480. + assert("vs-1624", atom->num_queued == atom->fq);
  75481. + if (atom->capture_count !=
  75482. + atom->dirty + atom->clean + atom->ovrwr + atom->wb + atom->fq) {
  75483. + printk
  75484. + ("count %d, dirty %d clean %d ovrwr %d wb %d fq %d\n",
  75485. + atom->capture_count, atom->dirty, atom->clean, atom->ovrwr,
  75486. + atom->wb, atom->fq);
  75487. + assert("vs-1622",
  75488. + atom->capture_count ==
  75489. + atom->dirty + atom->clean + atom->ovrwr + atom->wb +
  75490. + atom->fq);
  75491. + }
  75492. +}
  75493. +
  75494. +#endif
  75495. +
  75496. +int reiser4_capture_super_block(struct super_block *s)
  75497. +{
  75498. + int result;
  75499. + znode *uber;
  75500. + lock_handle lh;
  75501. +
  75502. + init_lh(&lh);
  75503. + result = get_uber_znode(reiser4_get_tree(s),
  75504. + ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI, &lh);
  75505. + if (result)
  75506. + return result;
  75507. +
  75508. + uber = lh.node;
  75509. + /* Grabbing one block for superblock */
  75510. + result = reiser4_grab_space_force((__u64) 1, BA_RESERVED);
  75511. + if (result != 0)
  75512. + return result;
  75513. +
  75514. + znode_make_dirty(uber);
  75515. +
  75516. + done_lh(&lh);
  75517. + return 0;
  75518. +}
  75519. +
  75520. +/* Wakeup every handle on the atom's WAITFOR list */
  75521. +static void wakeup_atom_waitfor_list(txn_atom * atom)
  75522. +{
  75523. + txn_wait_links *wlinks;
  75524. +
  75525. + assert("umka-210", atom != NULL);
  75526. +
  75527. + /* atom is locked */
  75528. + list_for_each_entry(wlinks, &atom->fwaitfor_list, _fwaitfor_link) {
  75529. + if (wlinks->waitfor_cb == NULL ||
  75530. + wlinks->waitfor_cb(atom, wlinks))
  75531. + /* Wake up. */
  75532. + reiser4_wake_up(wlinks->_lock_stack);
  75533. + }
  75534. +}
  75535. +
  75536. +/* Wakeup every handle on the atom's WAITING list */
  75537. +static void wakeup_atom_waiting_list(txn_atom * atom)
  75538. +{
  75539. + txn_wait_links *wlinks;
  75540. +
  75541. + assert("umka-211", atom != NULL);
  75542. +
  75543. + /* atom is locked */
  75544. + list_for_each_entry(wlinks, &atom->fwaiting_list, _fwaiting_link) {
  75545. + if (wlinks->waiting_cb == NULL ||
  75546. + wlinks->waiting_cb(atom, wlinks))
  75547. + /* Wake up. */
  75548. + reiser4_wake_up(wlinks->_lock_stack);
  75549. + }
  75550. +}
  75551. +
  75552. +/* helper function used by capture_fuse_wait() to avoid "spurious wake-ups" */
  75553. +static int wait_for_fusion(txn_atom * atom, txn_wait_links * wlinks)
  75554. +{
  75555. + assert("nikita-3330", atom != NULL);
  75556. + assert_spin_locked(&(atom->alock));
  75557. +
  75558. + /* atom->txnh_count == 1 is for waking waiters up if we are releasing
  75559. + * last transaction handle. */
  75560. + return atom->stage != ASTAGE_CAPTURE_WAIT || atom->txnh_count == 1;
  75561. +}
  75562. +
  75563. +/* The general purpose of this function is to wait on the first of two possible events.
  75564. + The situation is that a handle (and its atom atomh) is blocked trying to capture a
  75565. + block (i.e., node) but the node's atom (atomf) is in the CAPTURE_WAIT state. The
  75566. + handle's atom (atomh) is not in the CAPTURE_WAIT state. However, atomh could fuse with
  75567. + another atom or, due to age, enter the CAPTURE_WAIT state itself, at which point it
  75568. + needs to unblock the handle to avoid deadlock. When the txnh is unblocked it will
  75569. + proceed and fuse the two atoms in the CAPTURE_WAIT state.
  75570. +
  75571. + In other words, if either atomh or atomf change state, the handle will be awakened,
  75572. + thus there are two lists per atom: WAITING and WAITFOR.
  75573. +
  75574. + This is also called by capture_assign_txnh with (atomh == NULL) to wait for atomf to
  75575. + close but it is not assigned to an atom of its own.
  75576. +
  75577. + Lock ordering in this method: all four locks are held: JNODE_LOCK, TXNH_LOCK,
  75578. + BOTH_ATOM_LOCKS. Result: all four locks are released.
  75579. +*/
  75580. +static int capture_fuse_wait(txn_handle * txnh, txn_atom * atomf,
  75581. + txn_atom * atomh, txn_capture mode)
  75582. +{
  75583. + int ret;
  75584. + txn_wait_links wlinks;
  75585. +
  75586. + assert("umka-213", txnh != NULL);
  75587. + assert("umka-214", atomf != NULL);
  75588. +
  75589. + if ((mode & TXN_CAPTURE_NONBLOCKING) != 0) {
  75590. + spin_unlock_txnh(txnh);
  75591. + spin_unlock_atom(atomf);
  75592. +
  75593. + if (atomh) {
  75594. + spin_unlock_atom(atomh);
  75595. + }
  75596. +
  75597. + return RETERR(-E_BLOCK);
  75598. + }
  75599. +
  75600. + /* Initialize the waiting list links. */
  75601. + init_wlinks(&wlinks);
  75602. +
  75603. + /* Add txnh to atomf's waitfor list, unlock atomf. */
  75604. + list_add_tail(&wlinks._fwaitfor_link, &atomf->fwaitfor_list);
  75605. + wlinks.waitfor_cb = wait_for_fusion;
  75606. + atomic_inc(&atomf->refcount);
  75607. + spin_unlock_atom(atomf);
  75608. +
  75609. + if (atomh) {
  75610. + /* Add txnh to atomh's waiting list, unlock atomh. */
  75611. + list_add_tail(&wlinks._fwaiting_link, &atomh->fwaiting_list);
  75612. + atomic_inc(&atomh->refcount);
  75613. + spin_unlock_atom(atomh);
  75614. + }
  75615. +
  75616. + /* Go to sleep. */
  75617. + spin_unlock_txnh(txnh);
  75618. +
  75619. + ret = reiser4_prepare_to_sleep(wlinks._lock_stack);
  75620. + if (ret == 0) {
  75621. + reiser4_go_to_sleep(wlinks._lock_stack);
  75622. + ret = RETERR(-E_REPEAT);
  75623. + }
  75624. +
  75625. + /* Remove from the waitfor list. */
  75626. + spin_lock_atom(atomf);
  75627. +
  75628. + list_del(&wlinks._fwaitfor_link);
  75629. + atom_dec_and_unlock(atomf);
  75630. +
  75631. + if (atomh) {
  75632. + /* Remove from the waiting list. */
  75633. + spin_lock_atom(atomh);
  75634. + list_del(&wlinks._fwaiting_link);
  75635. + atom_dec_and_unlock(atomh);
  75636. + }
  75637. + return ret;
  75638. +}
  75639. +
  75640. +static void lock_two_atoms(txn_atom * one, txn_atom * two)
  75641. +{
  75642. + assert("zam-1067", one != two);
  75643. +
  75644. + /* lock the atom with lesser address first */
  75645. + if (one < two) {
  75646. + spin_lock_atom(one);
  75647. + spin_lock_atom_nested(two);
  75648. + } else {
  75649. + spin_lock_atom(two);
  75650. + spin_lock_atom_nested(one);
  75651. + }
  75652. +}
  75653. +
  75654. +/* Perform the necessary work to prepare for fusing two atoms, which involves
  75655. + * acquiring two atom locks in the proper order. If one of the node's atom is
  75656. + * blocking fusion (i.e., it is in the CAPTURE_WAIT stage) and the handle's
  75657. + * atom is not then the handle's request is put to sleep. If the node's atom
  75658. + * is committing, then the node can be copy-on-captured. Otherwise, pick the
  75659. + * atom with fewer pointers to be fused into the atom with more pointer and
  75660. + * call capture_fuse_into.
  75661. + */
  75662. +static int capture_init_fusion(jnode *node, txn_handle *txnh, txn_capture mode)
  75663. +{
  75664. + txn_atom * txnh_atom = txnh->atom;
  75665. + txn_atom * block_atom = node->atom;
  75666. +
  75667. + atomic_inc(&txnh_atom->refcount);
  75668. + atomic_inc(&block_atom->refcount);
  75669. +
  75670. + spin_unlock_txnh(txnh);
  75671. + spin_unlock_jnode(node);
  75672. +
  75673. + lock_two_atoms(txnh_atom, block_atom);
  75674. +
  75675. + if (txnh->atom != txnh_atom || node->atom != block_atom ) {
  75676. + release_two_atoms(txnh_atom, block_atom);
  75677. + return RETERR(-E_REPEAT);
  75678. + }
  75679. +
  75680. + atomic_dec(&txnh_atom->refcount);
  75681. + atomic_dec(&block_atom->refcount);
  75682. +
  75683. + assert ("zam-1066", atom_isopen(txnh_atom));
  75684. +
  75685. + if (txnh_atom->stage >= block_atom->stage ||
  75686. + (block_atom->stage == ASTAGE_CAPTURE_WAIT && block_atom->txnh_count == 0)) {
  75687. + capture_fuse_into(txnh_atom, block_atom);
  75688. + return RETERR(-E_REPEAT);
  75689. + }
  75690. + spin_lock_txnh(txnh);
  75691. + return capture_fuse_wait(txnh, block_atom, txnh_atom, mode);
  75692. +}
  75693. +
  75694. +/* This function splices together two jnode lists (small and large) and sets all jnodes in
  75695. + the small list to point to the large atom. Returns the length of the list. */
  75696. +static int
  75697. +capture_fuse_jnode_lists(txn_atom *large, struct list_head *large_head,
  75698. + struct list_head *small_head)
  75699. +{
  75700. + int count = 0;
  75701. + jnode *node;
  75702. +
  75703. + assert("umka-218", large != NULL);
  75704. + assert("umka-219", large_head != NULL);
  75705. + assert("umka-220", small_head != NULL);
  75706. + /* small atom should be locked also. */
  75707. + assert_spin_locked(&(large->alock));
  75708. +
  75709. + /* For every jnode on small's capture list... */
  75710. + list_for_each_entry(node, small_head, capture_link) {
  75711. + count += 1;
  75712. +
  75713. + /* With the jnode lock held, update atom pointer. */
  75714. + spin_lock_jnode(node);
  75715. + node->atom = large;
  75716. + spin_unlock_jnode(node);
  75717. + }
  75718. +
  75719. + /* Splice the lists. */
  75720. + list_splice_init(small_head, large_head->prev);
  75721. +
  75722. + return count;
  75723. +}
  75724. +
  75725. +/* This function splices together two txnh lists (small and large) and sets all txn handles in
  75726. + the small list to point to the large atom. Returns the length of the list. */
  75727. +static int
  75728. +capture_fuse_txnh_lists(txn_atom *large, struct list_head *large_head,
  75729. + struct list_head *small_head)
  75730. +{
  75731. + int count = 0;
  75732. + txn_handle *txnh;
  75733. +
  75734. + assert("umka-221", large != NULL);
  75735. + assert("umka-222", large_head != NULL);
  75736. + assert("umka-223", small_head != NULL);
  75737. +
  75738. + /* Adjust every txnh to the new atom. */
  75739. + list_for_each_entry(txnh, small_head, txnh_link) {
  75740. + count += 1;
  75741. +
  75742. + /* With the txnh lock held, update atom pointer. */
  75743. + spin_lock_txnh(txnh);
  75744. + txnh->atom = large;
  75745. + spin_unlock_txnh(txnh);
  75746. + }
  75747. +
  75748. + /* Splice the txn_handle list. */
  75749. + list_splice_init(small_head, large_head->prev);
  75750. +
  75751. + return count;
  75752. +}
  75753. +
  75754. +/* This function fuses two atoms. The captured nodes and handles belonging to SMALL are
  75755. + added to LARGE and their ->atom pointers are all updated. The associated counts are
  75756. + updated as well, and any waiting handles belonging to either are awakened. Finally the
  75757. + smaller atom's refcount is decremented.
  75758. +*/
  75759. +static void capture_fuse_into(txn_atom * small, txn_atom * large)
  75760. +{
  75761. + int level;
  75762. + unsigned zcount = 0;
  75763. + unsigned tcount = 0;
  75764. +
  75765. + assert("umka-224", small != NULL);
  75766. + assert("umka-225", small != NULL);
  75767. +
  75768. + assert_spin_locked(&(large->alock));
  75769. + assert_spin_locked(&(small->alock));
  75770. +
  75771. + assert("jmacd-201", atom_isopen(small));
  75772. + assert("jmacd-202", atom_isopen(large));
  75773. +
  75774. + /* Splice and update the per-level dirty jnode lists */
  75775. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
  75776. + zcount +=
  75777. + capture_fuse_jnode_lists(large,
  75778. + ATOM_DIRTY_LIST(large, level),
  75779. + ATOM_DIRTY_LIST(small, level));
  75780. + }
  75781. +
  75782. + /* Splice and update the [clean,dirty] jnode and txnh lists */
  75783. + zcount +=
  75784. + capture_fuse_jnode_lists(large, ATOM_CLEAN_LIST(large),
  75785. + ATOM_CLEAN_LIST(small));
  75786. + zcount +=
  75787. + capture_fuse_jnode_lists(large, ATOM_OVRWR_LIST(large),
  75788. + ATOM_OVRWR_LIST(small));
  75789. + zcount +=
  75790. + capture_fuse_jnode_lists(large, ATOM_WB_LIST(large),
  75791. + ATOM_WB_LIST(small));
  75792. + zcount +=
  75793. + capture_fuse_jnode_lists(large, &large->inodes, &small->inodes);
  75794. + tcount +=
  75795. + capture_fuse_txnh_lists(large, &large->txnh_list,
  75796. + &small->txnh_list);
  75797. +
  75798. + /* Check our accounting. */
  75799. + assert("jmacd-1063",
  75800. + zcount + small->num_queued == small->capture_count);
  75801. + assert("jmacd-1065", tcount == small->txnh_count);
  75802. +
  75803. + /* sum numbers of waiters threads */
  75804. + large->nr_waiters += small->nr_waiters;
  75805. + small->nr_waiters = 0;
  75806. +
  75807. + /* splice flush queues */
  75808. + reiser4_fuse_fq(large, small);
  75809. +
  75810. + /* update counter of jnode on every atom' list */
  75811. + ON_DEBUG(large->dirty += small->dirty;
  75812. + small->dirty = 0;
  75813. + large->clean += small->clean;
  75814. + small->clean = 0;
  75815. + large->ovrwr += small->ovrwr;
  75816. + small->ovrwr = 0;
  75817. + large->wb += small->wb;
  75818. + small->wb = 0;
  75819. + large->fq += small->fq;
  75820. + small->fq = 0;);
  75821. +
  75822. + /* count flushers in result atom */
  75823. + large->nr_flushers += small->nr_flushers;
  75824. + small->nr_flushers = 0;
  75825. +
  75826. + /* update counts of flushed nodes */
  75827. + large->flushed += small->flushed;
  75828. + small->flushed = 0;
  75829. +
  75830. + /* Transfer list counts to large. */
  75831. + large->txnh_count += small->txnh_count;
  75832. + large->capture_count += small->capture_count;
  75833. +
  75834. + /* Add all txnh references to large. */
  75835. + atomic_add(small->txnh_count, &large->refcount);
  75836. + atomic_sub(small->txnh_count, &small->refcount);
  75837. +
  75838. + /* Reset small counts */
  75839. + small->txnh_count = 0;
  75840. + small->capture_count = 0;
  75841. +
  75842. + /* Assign the oldest start_time, merge flags. */
  75843. + large->start_time = min(large->start_time, small->start_time);
  75844. + large->flags |= small->flags;
  75845. +
  75846. + /* Merge blocknr sets. */
  75847. + blocknr_set_merge(&small->wandered_map, &large->wandered_map);
  75848. +
  75849. + /* Merge delete sets. */
  75850. + atom_dset_merge(small, large);
  75851. +
  75852. + /* Merge allocated/deleted file counts */
  75853. + large->nr_objects_deleted += small->nr_objects_deleted;
  75854. + large->nr_objects_created += small->nr_objects_created;
  75855. +
  75856. + small->nr_objects_deleted = 0;
  75857. + small->nr_objects_created = 0;
  75858. +
  75859. + /* Merge allocated blocks counts */
  75860. + large->nr_blocks_allocated += small->nr_blocks_allocated;
  75861. +
  75862. + large->nr_running_queues += small->nr_running_queues;
  75863. + small->nr_running_queues = 0;
  75864. +
  75865. + /* Merge blocks reserved for overwrite set. */
  75866. + large->flush_reserved += small->flush_reserved;
  75867. + small->flush_reserved = 0;
  75868. +
  75869. + if (large->stage < small->stage) {
  75870. + /* Large only needs to notify if it has changed state. */
  75871. + reiser4_atom_set_stage(large, small->stage);
  75872. + wakeup_atom_waiting_list(large);
  75873. + }
  75874. +
  75875. + reiser4_atom_set_stage(small, ASTAGE_INVALID);
  75876. +
  75877. + /* Notify any waiters--small needs to unload its wait lists. Waiters
  75878. + actually remove themselves from the list before returning from the
  75879. + fuse_wait function. */
  75880. + wakeup_atom_waiting_list(small);
  75881. +
  75882. + /* Unlock atoms */
  75883. + spin_unlock_atom(large);
  75884. + atom_dec_and_unlock(small);
  75885. +}
  75886. +
  75887. +/* TXNMGR STUFF */
  75888. +
  75889. +/* Release a block from the atom, reversing the effects of being captured,
  75890. + do not release atom's reference to jnode due to holding spin-locks.
  75891. + Currently this is only called when the atom commits.
  75892. +
  75893. + NOTE: this function does not release a (journal) reference to jnode
  75894. + due to locking optimizations, you should call jput() somewhere after
  75895. + calling reiser4_uncapture_block(). */
  75896. +void reiser4_uncapture_block(jnode * node)
  75897. +{
  75898. + txn_atom *atom;
  75899. +
  75900. + assert("umka-226", node != NULL);
  75901. + atom = node->atom;
  75902. + assert("umka-228", atom != NULL);
  75903. +
  75904. + assert("jmacd-1021", node->atom == atom);
  75905. + assert_spin_locked(&(node->guard));
  75906. + assert("jmacd-1023", atom_is_protected(atom));
  75907. +
  75908. + JF_CLR(node, JNODE_DIRTY);
  75909. + JF_CLR(node, JNODE_RELOC);
  75910. + JF_CLR(node, JNODE_OVRWR);
  75911. + JF_CLR(node, JNODE_CREATED);
  75912. + JF_CLR(node, JNODE_WRITEBACK);
  75913. + JF_CLR(node, JNODE_REPACK);
  75914. +
  75915. + list_del_init(&node->capture_link);
  75916. + if (JF_ISSET(node, JNODE_FLUSH_QUEUED)) {
  75917. + assert("zam-925", atom_isopen(atom));
  75918. + assert("vs-1623", NODE_LIST(node) == FQ_LIST);
  75919. + ON_DEBUG(atom->num_queued--);
  75920. + JF_CLR(node, JNODE_FLUSH_QUEUED);
  75921. + }
  75922. + atom->capture_count -= 1;
  75923. + ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), NOT_CAPTURED, 1));
  75924. + node->atom = NULL;
  75925. +
  75926. + spin_unlock_jnode(node);
  75927. + LOCK_CNT_DEC(t_refs);
  75928. +}
  75929. +
  75930. +/* Unconditional insert of jnode into atom's overwrite list. Currently used in
  75931. + bitmap-based allocator code for adding modified bitmap blocks the
  75932. + transaction. @atom and @node are spin locked */
  75933. +void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node)
  75934. +{
  75935. + assert("zam-538", atom_is_protected(atom));
  75936. + assert_spin_locked(&(node->guard));
  75937. + assert("zam-899", JF_ISSET(node, JNODE_OVRWR));
  75938. + assert("zam-543", node->atom == NULL);
  75939. + assert("vs-1433", !jnode_is_unformatted(node) && !jnode_is_znode(node));
  75940. +
  75941. + list_add(&node->capture_link, ATOM_OVRWR_LIST(atom));
  75942. + jref(node);
  75943. + node->atom = atom;
  75944. + atom->capture_count++;
  75945. + ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), OVRWR_LIST, 1));
  75946. +}
  75947. +
  75948. +static int count_deleted_blocks_actor(txn_atom * atom,
  75949. + const reiser4_block_nr * a,
  75950. + const reiser4_block_nr * b, void *data)
  75951. +{
  75952. + reiser4_block_nr *counter = data;
  75953. +
  75954. + assert("zam-995", data != NULL);
  75955. + assert("zam-996", a != NULL);
  75956. + if (b == NULL)
  75957. + *counter += 1;
  75958. + else
  75959. + *counter += *b;
  75960. + return 0;
  75961. +}
  75962. +
  75963. +reiser4_block_nr txnmgr_count_deleted_blocks(void)
  75964. +{
  75965. + reiser4_block_nr result;
  75966. + txn_mgr *tmgr = &get_super_private(reiser4_get_current_sb())->tmgr;
  75967. + txn_atom *atom;
  75968. +
  75969. + result = 0;
  75970. +
  75971. + spin_lock_txnmgr(tmgr);
  75972. + list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
  75973. + spin_lock_atom(atom);
  75974. + if (atom_isopen(atom))
  75975. + atom_dset_deferred_apply(atom, count_deleted_blocks_actor, &result, 0);
  75976. + spin_unlock_atom(atom);
  75977. + }
  75978. + spin_unlock_txnmgr(tmgr);
  75979. +
  75980. + return result;
  75981. +}
  75982. +
  75983. +void atom_dset_init(txn_atom *atom)
  75984. +{
  75985. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  75986. + blocknr_list_init(&atom->discard.delete_set);
  75987. + } else {
  75988. + blocknr_set_init(&atom->nodiscard.delete_set);
  75989. + }
  75990. +}
  75991. +
  75992. +void atom_dset_destroy(txn_atom *atom)
  75993. +{
  75994. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  75995. + blocknr_list_destroy(&atom->discard.delete_set);
  75996. + } else {
  75997. + blocknr_set_destroy(&atom->nodiscard.delete_set);
  75998. + }
  75999. +}
  76000. +
  76001. +void atom_dset_merge(txn_atom *from, txn_atom *to)
  76002. +{
  76003. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  76004. + blocknr_list_merge(&from->discard.delete_set, &to->discard.delete_set);
  76005. + } else {
  76006. + blocknr_set_merge(&from->nodiscard.delete_set, &to->nodiscard.delete_set);
  76007. + }
  76008. +}
  76009. +
  76010. +int atom_dset_deferred_apply(txn_atom* atom,
  76011. + blocknr_set_actor_f actor,
  76012. + void *data,
  76013. + int delete)
  76014. +{
  76015. + int ret;
  76016. +
  76017. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  76018. + ret = blocknr_list_iterator(atom,
  76019. + &atom->discard.delete_set,
  76020. + actor,
  76021. + data,
  76022. + delete);
  76023. + } else {
  76024. + ret = blocknr_set_iterator(atom,
  76025. + &atom->nodiscard.delete_set,
  76026. + actor,
  76027. + data,
  76028. + delete);
  76029. + }
  76030. +
  76031. + return ret;
  76032. +}
  76033. +
  76034. +extern int atom_dset_deferred_add_extent(txn_atom *atom,
  76035. + void **new_entry,
  76036. + const reiser4_block_nr *start,
  76037. + const reiser4_block_nr *len)
  76038. +{
  76039. + int ret;
  76040. +
  76041. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  76042. + ret = blocknr_list_add_extent(atom,
  76043. + &atom->discard.delete_set,
  76044. + (blocknr_list_entry**)new_entry,
  76045. + start,
  76046. + len);
  76047. + } else {
  76048. + ret = blocknr_set_add_extent(atom,
  76049. + &atom->nodiscard.delete_set,
  76050. + (blocknr_set_entry**)new_entry,
  76051. + start,
  76052. + len);
  76053. + }
  76054. +
  76055. + return ret;
  76056. +}
  76057. +
  76058. +/*
  76059. + * Local variables:
  76060. + * c-indentation-style: "K&R"
  76061. + * mode-name: "LC"
  76062. + * c-basic-offset: 8
  76063. + * tab-width: 8
  76064. + * fill-column: 79
  76065. + * End:
  76066. + */
  76067. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/txnmgr.h linux-5.16.14/fs/reiser4/txnmgr.h
  76068. --- linux-5.16.14.orig/fs/reiser4/txnmgr.h 1970-01-01 01:00:00.000000000 +0100
  76069. +++ linux-5.16.14/fs/reiser4/txnmgr.h 2022-03-12 13:26:19.691892823 +0100
  76070. @@ -0,0 +1,755 @@
  76071. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  76072. + * reiser4/README */
  76073. +
  76074. +/* data-types and function declarations for transaction manager. See txnmgr.c
  76075. + * for details. */
  76076. +
  76077. +#ifndef __REISER4_TXNMGR_H__
  76078. +#define __REISER4_TXNMGR_H__
  76079. +
  76080. +#include "forward.h"
  76081. +#include "dformat.h"
  76082. +
  76083. +#include <linux/fs.h>
  76084. +#include <linux/mm.h>
  76085. +#include <linux/types.h>
  76086. +#include <linux/spinlock.h>
  76087. +#include <asm/atomic.h>
  76088. +#include <linux/wait.h>
  76089. +
  76090. +/* TYPE DECLARATIONS */
  76091. +
  76092. +/* This enumeration describes the possible types of a capture request (reiser4_try_capture).
  76093. + A capture request dynamically assigns a block to the calling thread's transaction
  76094. + handle. */
  76095. +typedef enum {
  76096. + /* A READ_ATOMIC request indicates that a block will be read and that the caller's
  76097. + atom should fuse in order to ensure that the block commits atomically with the
  76098. + caller. */
  76099. + TXN_CAPTURE_READ_ATOMIC = (1 << 0),
  76100. +
  76101. + /* A READ_NONCOM request indicates that a block will be read and that the caller is
  76102. + willing to read a non-committed block without causing atoms to fuse. */
  76103. + TXN_CAPTURE_READ_NONCOM = (1 << 1),
  76104. +
  76105. + /* A READ_MODIFY request indicates that a block will be read but that the caller
  76106. + wishes for the block to be captured as it will be written. This capture request
  76107. + mode is not currently used, but eventually it will be useful for preventing
  76108. + deadlock in read-modify-write cycles. */
  76109. + TXN_CAPTURE_READ_MODIFY = (1 << 2),
  76110. +
  76111. + /* A WRITE capture request indicates that a block will be modified and that atoms
  76112. + should fuse to make the commit atomic. */
  76113. + TXN_CAPTURE_WRITE = (1 << 3),
  76114. +
  76115. + /* CAPTURE_TYPES is a mask of the four above capture types, used to separate the
  76116. + exclusive type designation from extra bits that may be supplied -- see
  76117. + below. */
  76118. + TXN_CAPTURE_TYPES = (TXN_CAPTURE_READ_ATOMIC |
  76119. + TXN_CAPTURE_READ_NONCOM | TXN_CAPTURE_READ_MODIFY |
  76120. + TXN_CAPTURE_WRITE),
  76121. +
  76122. + /* A subset of CAPTURE_TYPES, CAPTURE_WTYPES is a mask of request types that
  76123. + indicate modification will occur. */
  76124. + TXN_CAPTURE_WTYPES = (TXN_CAPTURE_READ_MODIFY | TXN_CAPTURE_WRITE),
  76125. +
  76126. + /* An option to reiser4_try_capture, NONBLOCKING indicates that the caller would
  76127. + prefer not to sleep waiting for an aging atom to commit. */
  76128. + TXN_CAPTURE_NONBLOCKING = (1 << 4),
  76129. +
  76130. + /* An option to reiser4_try_capture to prevent atom fusion, just simple
  76131. + capturing is allowed */
  76132. + TXN_CAPTURE_DONT_FUSE = (1 << 5)
  76133. +
  76134. + /* This macro selects only the exclusive capture request types, stripping out any
  76135. + options that were supplied (i.e., NONBLOCKING). */
  76136. +#define CAPTURE_TYPE(x) ((x) & TXN_CAPTURE_TYPES)
  76137. +} txn_capture;
  76138. +
  76139. +/* There are two kinds of transaction handle: WRITE_FUSING and READ_FUSING, the only
  76140. + difference is in the handling of read requests. A WRITE_FUSING transaction handle
  76141. + defaults read capture requests to TXN_CAPTURE_READ_NONCOM whereas a READ_FUSIONG
  76142. + transaction handle defaults to TXN_CAPTURE_READ_ATOMIC. */
  76143. +typedef enum {
  76144. + TXN_WRITE_FUSING = (1 << 0),
  76145. + TXN_READ_FUSING = (1 << 1) | TXN_WRITE_FUSING, /* READ implies WRITE */
  76146. +} txn_mode;
  76147. +
  76148. +/* Every atom has a stage, which is one of these exclusive values: */
  76149. +typedef enum {
  76150. + /* Initially an atom is free. */
  76151. + ASTAGE_FREE = 0,
  76152. +
  76153. + /* An atom begins by entering the CAPTURE_FUSE stage, where it proceeds to capture
  76154. + blocks and fuse with other atoms. */
  76155. + ASTAGE_CAPTURE_FUSE = 1,
  76156. +
  76157. + /* We need to have a ASTAGE_CAPTURE_SLOW in which an atom fuses with one node for every X nodes it flushes to disk where X > 1. */
  76158. +
  76159. + /* When an atom reaches a certain age it must do all it can to commit. An atom in
  76160. + the CAPTURE_WAIT stage refuses new transaction handles and prevents fusion from
  76161. + atoms in the CAPTURE_FUSE stage. */
  76162. + ASTAGE_CAPTURE_WAIT = 2,
  76163. +
  76164. + /* Waiting for I/O before commit. Copy-on-capture (see
  76165. + http://namesys.com/v4/v4.html). */
  76166. + ASTAGE_PRE_COMMIT = 3,
  76167. +
  76168. + /* Post-commit overwrite I/O. Steal-on-capture. */
  76169. + ASTAGE_POST_COMMIT = 4,
  76170. +
  76171. + /* Atom which waits for the removal of the last reference to (it? ) to
  76172. + * be deleted from memory */
  76173. + ASTAGE_DONE = 5,
  76174. +
  76175. + /* invalid atom. */
  76176. + ASTAGE_INVALID = 6,
  76177. +
  76178. +} txn_stage;
  76179. +
  76180. +/* Certain flags may be set in the txn_atom->flags field. */
  76181. +typedef enum {
  76182. + /* Indicates that the atom should commit as soon as possible. */
  76183. + ATOM_FORCE_COMMIT = (1 << 0),
  76184. + /* to avoid endless loop, mark the atom (which was considered as too
  76185. + * small) after failed attempt to fuse it. */
  76186. + ATOM_CANCEL_FUSION = (1 << 1)
  76187. +} txn_flags;
  76188. +
  76189. +/* Flags for controlling commit_txnh */
  76190. +typedef enum {
  76191. + /* Wait commit atom completion in commit_txnh */
  76192. + TXNH_WAIT_COMMIT = 0x2,
  76193. + /* Don't commit atom when this handle is closed */
  76194. + TXNH_DONT_COMMIT = 0x4
  76195. +} txn_handle_flags_t;
  76196. +
  76197. +/* TYPE DEFINITIONS */
  76198. +
  76199. +/* A note on lock ordering: the handle & jnode spinlock protects reading of their ->atom
  76200. + fields, so typically an operation on the atom through either of these objects must (1)
  76201. + lock the object, (2) read the atom pointer, (3) lock the atom.
  76202. +
  76203. + During atom fusion, the process holds locks on both atoms at once. Then, it iterates
  76204. + through the list of handles and pages held by the smaller of the two atoms. For each
  76205. + handle and page referencing the smaller atom, the fusing process must: (1) lock the
  76206. + object, and (2) update the atom pointer.
  76207. +
  76208. + You can see that there is a conflict of lock ordering here, so the more-complex
  76209. + procedure should have priority, i.e., the fusing process has priority so that it is
  76210. + guaranteed to make progress and to avoid restarts.
  76211. +
  76212. + This decision, however, means additional complexity for aquiring the atom lock in the
  76213. + first place.
  76214. +
  76215. + The general original procedure followed in the code was:
  76216. +
  76217. + TXN_OBJECT *obj = ...;
  76218. + TXN_ATOM *atom;
  76219. +
  76220. + spin_lock (& obj->_lock);
  76221. +
  76222. + atom = obj->_atom;
  76223. +
  76224. + if (! spin_trylock_atom (atom))
  76225. + {
  76226. + spin_unlock (& obj->_lock);
  76227. + RESTART OPERATION, THERE WAS A RACE;
  76228. + }
  76229. +
  76230. + ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED
  76231. +
  76232. + It has however been found that this wastes CPU a lot in a manner that is
  76233. + hard to profile. So, proper refcounting was added to atoms, and new
  76234. + standard locking sequence is like following:
  76235. +
  76236. + TXN_OBJECT *obj = ...;
  76237. + TXN_ATOM *atom;
  76238. +
  76239. + spin_lock (& obj->_lock);
  76240. +
  76241. + atom = obj->_atom;
  76242. +
  76243. + if (! spin_trylock_atom (atom))
  76244. + {
  76245. + atomic_inc (& atom->refcount);
  76246. + spin_unlock (& obj->_lock);
  76247. + spin_lock (&atom->_lock);
  76248. + atomic_dec (& atom->refcount);
  76249. + // HERE atom is locked
  76250. + spin_unlock (&atom->_lock);
  76251. + RESTART OPERATION, THERE WAS A RACE;
  76252. + }
  76253. +
  76254. + ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED
  76255. +
  76256. + (core of this is implemented in trylock_throttle() function)
  76257. +
  76258. + See the jnode_get_atom() function for a common case.
  76259. +
  76260. + As an additional (and important) optimization allowing to avoid restarts,
  76261. + it is possible to re-check required pre-conditions at the HERE point in
  76262. + code above and proceed without restarting if they are still satisfied.
  76263. +*/
  76264. +
  76265. +/* An atomic transaction: this is the underlying system representation
  76266. + of a transaction, not the one seen by clients.
  76267. +
  76268. + Invariants involving this data-type:
  76269. +
  76270. + [sb-fake-allocated]
  76271. +*/
  76272. +struct txn_atom {
  76273. + /* The spinlock protecting the atom, held during fusion and various other state
  76274. + changes. */
  76275. + spinlock_t alock;
  76276. +
  76277. + /* The atom's reference counter, increasing (in case of a duplication
  76278. + of an existing reference or when we are sure that some other
  76279. + reference exists) may be done without taking spinlock, decrementing
  76280. + of the ref. counter requires a spinlock to be held.
  76281. +
  76282. + Each transaction handle counts in ->refcount. All jnodes count as
  76283. + one reference acquired in atom_begin_andlock(), released in
  76284. + commit_current_atom().
  76285. + */
  76286. + atomic_t refcount;
  76287. +
  76288. + /* The atom_id identifies the atom in persistent records such as the log. */
  76289. + __u32 atom_id;
  76290. +
  76291. + /* Flags holding any of the txn_flags enumerated values (e.g.,
  76292. + ATOM_FORCE_COMMIT). */
  76293. + __u32 flags;
  76294. +
  76295. + /* Number of open handles. */
  76296. + __u32 txnh_count;
  76297. +
  76298. + /* The number of znodes captured by this atom. Equal to the sum of lengths of the
  76299. + dirty_nodes[level] and clean_nodes lists. */
  76300. + __u32 capture_count;
  76301. +
  76302. +#if REISER4_DEBUG
  76303. + int clean;
  76304. + int dirty;
  76305. + int ovrwr;
  76306. + int wb;
  76307. + int fq;
  76308. +#endif
  76309. +
  76310. + __u32 flushed;
  76311. +
  76312. + /* Current transaction stage. */
  76313. + txn_stage stage;
  76314. +
  76315. + /* Start time. */
  76316. + unsigned long start_time;
  76317. +
  76318. + /* The atom's delete sets.
  76319. + "simple" are blocknr_set instances and are used when discard is disabled.
  76320. + "discard" are blocknr_list instances and are used when discard is enabled. */
  76321. + union {
  76322. + struct {
  76323. + /* The atom's delete set. It collects block numbers of the nodes
  76324. + which were deleted during the transaction. */
  76325. + struct list_head delete_set;
  76326. + } nodiscard;
  76327. +
  76328. + struct {
  76329. + /* The atom's delete set. It collects all blocks that have been
  76330. + deallocated (both immediate and deferred) during the transaction.
  76331. + These blocks are considered for discarding at commit time.
  76332. + For details see discard.c */
  76333. + struct list_head delete_set;
  76334. + } discard;
  76335. + };
  76336. +
  76337. + /* The atom's wandered_block mapping. */
  76338. + struct list_head wandered_map;
  76339. +
  76340. + /* The transaction's list of dirty captured nodes--per level. Index
  76341. + by (level). dirty_nodes[0] is for znode-above-root */
  76342. + struct list_head dirty_nodes[REAL_MAX_ZTREE_HEIGHT + 1];
  76343. +
  76344. + /* The transaction's list of clean captured nodes. */
  76345. + struct list_head clean_nodes;
  76346. +
  76347. + /* The atom's overwrite set */
  76348. + struct list_head ovrwr_nodes;
  76349. +
  76350. + /* nodes which are being written to disk */
  76351. + struct list_head writeback_nodes;
  76352. +
  76353. + /* list of inodes */
  76354. + struct list_head inodes;
  76355. +
  76356. + /* List of handles associated with this atom. */
  76357. + struct list_head txnh_list;
  76358. +
  76359. + /* Transaction list link: list of atoms in the transaction manager. */
  76360. + struct list_head atom_link;
  76361. +
  76362. + /* List of handles waiting FOR this atom: see 'capture_fuse_wait' comment. */
  76363. + struct list_head fwaitfor_list;
  76364. +
  76365. + /* List of this atom's handles that are waiting: see 'capture_fuse_wait' comment. */
  76366. + struct list_head fwaiting_list;
  76367. +
  76368. + /* Numbers of objects which were deleted/created in this transaction
  76369. + thereby numbers of objects IDs which were released/deallocated. */
  76370. + int nr_objects_deleted;
  76371. + int nr_objects_created;
  76372. + /* number of blocks allocated during the transaction */
  76373. + __u64 nr_blocks_allocated;
  76374. + /* All atom's flush queue objects are on this list */
  76375. + struct list_head flush_queues;
  76376. +#if REISER4_DEBUG
  76377. + /* number of flush queues for this atom. */
  76378. + int nr_flush_queues;
  76379. + /* Number of jnodes which were removed from atom's lists and put
  76380. + on flush_queue */
  76381. + int num_queued;
  76382. +#endif
  76383. + /* number of threads who wait for this atom to complete commit */
  76384. + int nr_waiters;
  76385. + /* number of threads which do jnode_flush() over this atom */
  76386. + int nr_flushers;
  76387. + /* number of flush queues which are IN_USE and jnodes from fq->prepped
  76388. + are submitted to disk by the reiser4_write_fq() routine. */
  76389. + int nr_running_queues;
  76390. + /* A counter of grabbed unformatted nodes, see a description of the
  76391. + * reiser4 space reservation scheme at block_alloc.c */
  76392. + reiser4_block_nr flush_reserved;
  76393. +#if REISER4_DEBUG
  76394. + void *committer;
  76395. +#endif
  76396. + struct super_block *super;
  76397. +};
  76398. +
  76399. +#define ATOM_DIRTY_LIST(atom, level) (&(atom)->dirty_nodes[level])
  76400. +#define ATOM_CLEAN_LIST(atom) (&(atom)->clean_nodes)
  76401. +#define ATOM_OVRWR_LIST(atom) (&(atom)->ovrwr_nodes)
  76402. +#define ATOM_WB_LIST(atom) (&(atom)->writeback_nodes)
  76403. +#define ATOM_FQ_LIST(fq) (&(fq)->prepped)
  76404. +
  76405. +#define NODE_LIST(node) (node)->list
  76406. +#define ASSIGN_NODE_LIST(node, list) ON_DEBUG(NODE_LIST(node) = list)
  76407. +ON_DEBUG(void
  76408. + count_jnode(txn_atom *, jnode *, atom_list old_list,
  76409. + atom_list new_list, int check_lists));
  76410. +
  76411. +/* A transaction handle: the client obtains and commits this handle which is assigned by
  76412. + the system to a txn_atom. */
  76413. +struct txn_handle {
  76414. + /* Spinlock protecting ->atom pointer */
  76415. + spinlock_t hlock;
  76416. +
  76417. + /* Flags for controlling commit_txnh() behavior */
  76418. + /* from txn_handle_flags_t */
  76419. + txn_handle_flags_t flags;
  76420. +
  76421. + /* Whether it is READ_FUSING or WRITE_FUSING. */
  76422. + txn_mode mode;
  76423. +
  76424. + /* If assigned, the atom it is part of. */
  76425. + txn_atom *atom;
  76426. +
  76427. + /* Transaction list link. Head is in txn_atom. */
  76428. + struct list_head txnh_link;
  76429. +};
  76430. +
  76431. +/* The transaction manager: one is contained in the reiser4_super_info_data */
  76432. +struct txn_mgr {
  76433. + /* A spinlock protecting the atom list, id_count, flush_control */
  76434. + spinlock_t tmgr_lock;
  76435. +
  76436. + /* List of atoms. */
  76437. + struct list_head atoms_list;
  76438. +
  76439. + /* Number of atoms. */
  76440. + int atom_count;
  76441. +
  76442. + /* A counter used to assign atom->atom_id values. */
  76443. + __u32 id_count;
  76444. +
  76445. + /* a mutex object for commit serialization */
  76446. + struct mutex commit_mutex;
  76447. +
  76448. + /* a list of all txnmrgs served by particular daemon. */
  76449. + struct list_head linkage;
  76450. +
  76451. + /* description of daemon for this txnmgr */
  76452. + ktxnmgrd_context *daemon;
  76453. +
  76454. + /* parameters. Adjustable through mount options. */
  76455. + unsigned int atom_max_size;
  76456. + unsigned int atom_max_age;
  76457. + unsigned int atom_min_size;
  76458. + /* max number of concurrent flushers for one atom, 0 - unlimited. */
  76459. + unsigned int atom_max_flushers;
  76460. + struct dentry *debugfs_atom_count;
  76461. + struct dentry *debugfs_id_count;
  76462. +};
  76463. +
  76464. +/* FUNCTION DECLARATIONS */
  76465. +
  76466. +/* These are the externally (within Reiser4) visible transaction functions, therefore they
  76467. + are prefixed with "txn_". For comments, see txnmgr.c. */
  76468. +
  76469. +extern int init_txnmgr_static(void);
  76470. +extern void done_txnmgr_static(void);
  76471. +
  76472. +extern void reiser4_init_txnmgr(txn_mgr *);
  76473. +extern void reiser4_done_txnmgr(txn_mgr *);
  76474. +
  76475. +extern int reiser4_txn_reserve(int reserved);
  76476. +
  76477. +extern void reiser4_txn_begin(reiser4_context * context);
  76478. +extern int reiser4_txn_end(reiser4_context * context);
  76479. +
  76480. +extern void reiser4_txn_restart(reiser4_context * context);
  76481. +extern void reiser4_txn_restart_current(void);
  76482. +
  76483. +extern int txnmgr_force_commit_all(struct super_block *, int);
  76484. +extern int current_atom_should_commit(void);
  76485. +
  76486. +extern jnode *find_first_dirty_jnode(txn_atom *, int);
  76487. +
  76488. +extern int commit_some_atoms(txn_mgr *);
  76489. +extern int force_commit_atom(txn_handle *);
  76490. +extern int flush_current_atom(int, long, long *, txn_atom **, jnode *);
  76491. +
  76492. +extern int flush_some_atom(jnode *, long *, const struct writeback_control *, int);
  76493. +
  76494. +extern void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage);
  76495. +
  76496. +extern int same_slum_check(jnode * base, jnode * check, int alloc_check,
  76497. + int alloc_value);
  76498. +extern void atom_dec_and_unlock(txn_atom * atom);
  76499. +
  76500. +extern int reiser4_try_capture(jnode * node, znode_lock_mode mode, txn_capture flags);
  76501. +extern int try_capture_page_to_invalidate(struct page *pg);
  76502. +
  76503. +extern void reiser4_uncapture_page(struct page *pg);
  76504. +extern void reiser4_uncapture_block(jnode *);
  76505. +extern void reiser4_uncapture_jnode(jnode *);
  76506. +
  76507. +extern int reiser4_capture_inode(struct inode *);
  76508. +extern int reiser4_uncapture_inode(struct inode *);
  76509. +
  76510. +extern txn_atom *get_current_atom_locked_nocheck(void);
  76511. +
  76512. +#if REISER4_DEBUG
  76513. +
  76514. +/**
  76515. + * atom_is_protected - make sure that nobody but us can do anything with atom
  76516. + * @atom: atom to be checked
  76517. + *
  76518. + * This is used to assert that atom either entered commit stages or is spin
  76519. + * locked.
  76520. + */
  76521. +static inline int atom_is_protected(txn_atom *atom)
  76522. +{
  76523. + if (atom->stage >= ASTAGE_PRE_COMMIT)
  76524. + return 1;
  76525. + assert_spin_locked(&(atom->alock));
  76526. + return 1;
  76527. +}
  76528. +
  76529. +#endif
  76530. +
  76531. +/* Get the current atom and spinlock it if current atom present. May not return NULL */
  76532. +static inline txn_atom *get_current_atom_locked(void)
  76533. +{
  76534. + txn_atom *atom;
  76535. +
  76536. + atom = get_current_atom_locked_nocheck();
  76537. + assert("zam-761", atom != NULL);
  76538. +
  76539. + return atom;
  76540. +}
  76541. +
  76542. +extern txn_atom *jnode_get_atom(jnode *);
  76543. +
  76544. +extern void reiser4_atom_wait_event(txn_atom *);
  76545. +extern void reiser4_atom_send_event(txn_atom *);
  76546. +
  76547. +extern void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node);
  76548. +extern int reiser4_capture_super_block(struct super_block *s);
  76549. +int capture_bulk(jnode **, int count);
  76550. +
  76551. +/* See the comment on the function blocknrset.c:blocknr_set_add for the
  76552. + calling convention of these three routines. */
  76553. +extern int blocknr_set_init_static(void);
  76554. +extern void blocknr_set_done_static(void);
  76555. +extern void blocknr_set_init(struct list_head * bset);
  76556. +extern void blocknr_set_destroy(struct list_head * bset);
  76557. +extern void blocknr_set_merge(struct list_head * from, struct list_head * into);
  76558. +extern int blocknr_set_add_extent(txn_atom * atom,
  76559. + struct list_head * bset,
  76560. + blocknr_set_entry ** new_bsep,
  76561. + const reiser4_block_nr * start,
  76562. + const reiser4_block_nr * len);
  76563. +extern int blocknr_set_add_pair(txn_atom * atom, struct list_head * bset,
  76564. + blocknr_set_entry ** new_bsep,
  76565. + const reiser4_block_nr * a,
  76566. + const reiser4_block_nr * b);
  76567. +
  76568. +typedef int (*blocknr_set_actor_f) (txn_atom *, const reiser4_block_nr *,
  76569. + const reiser4_block_nr *, void *);
  76570. +
  76571. +extern int blocknr_set_iterator(txn_atom * atom, struct list_head * bset,
  76572. + blocknr_set_actor_f actor, void *data,
  76573. + int delete);
  76574. +
  76575. +/* This is the block list interface (see blocknrlist.c) */
  76576. +extern int blocknr_list_init_static(void);
  76577. +extern void blocknr_list_done_static(void);
  76578. +extern void blocknr_list_init(struct list_head *blist);
  76579. +extern void blocknr_list_destroy(struct list_head *blist);
  76580. +extern void blocknr_list_merge(struct list_head *from, struct list_head *to);
  76581. +extern void blocknr_list_sort_and_join(struct list_head *blist);
  76582. +/**
  76583. + * The @atom should be locked.
  76584. + */
  76585. +extern int blocknr_list_add_extent(txn_atom *atom,
  76586. + struct list_head *blist,
  76587. + blocknr_list_entry **new_entry,
  76588. + const reiser4_block_nr *start,
  76589. + const reiser4_block_nr *len);
  76590. +extern int blocknr_list_iterator(txn_atom *atom,
  76591. + struct list_head *blist,
  76592. + blocknr_set_actor_f actor,
  76593. + void *data,
  76594. + int delete);
  76595. +
  76596. +/* These are wrappers for accessing and modifying atom's delete lists,
  76597. + depending on whether discard is enabled or not.
  76598. + If it is enabled, (less memory efficient) blocknr_list is used for delete
  76599. + list storage. Otherwise, blocknr_set is used for this purpose. */
  76600. +extern void atom_dset_init(txn_atom *atom);
  76601. +extern void atom_dset_destroy(txn_atom *atom);
  76602. +extern void atom_dset_merge(txn_atom *from, txn_atom *to);
  76603. +extern int atom_dset_deferred_apply(txn_atom* atom,
  76604. + blocknr_set_actor_f actor,
  76605. + void *data,
  76606. + int delete);
  76607. +extern int atom_dset_deferred_add_extent(txn_atom *atom,
  76608. + void **new_entry,
  76609. + const reiser4_block_nr *start,
  76610. + const reiser4_block_nr *len);
  76611. +
  76612. +/* flush code takes care about how to fuse flush queues */
  76613. +extern void flush_init_atom(txn_atom * atom);
  76614. +extern void flush_fuse_queues(txn_atom * large, txn_atom * small);
  76615. +
  76616. +static inline void spin_lock_atom(txn_atom *atom)
  76617. +{
  76618. + /* check that spinlocks of lower priorities are not held */
  76619. + assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
  76620. + LOCK_CNT_NIL(spin_locked_atom) &&
  76621. + LOCK_CNT_NIL(spin_locked_jnode) &&
  76622. + LOCK_CNT_NIL(spin_locked_zlock) &&
  76623. + LOCK_CNT_NIL(rw_locked_dk) &&
  76624. + LOCK_CNT_NIL(rw_locked_tree)));
  76625. +
  76626. + spin_lock(&(atom->alock));
  76627. +
  76628. + LOCK_CNT_INC(spin_locked_atom);
  76629. + LOCK_CNT_INC(spin_locked);
  76630. +}
  76631. +
  76632. +static inline void spin_lock_atom_nested(txn_atom *atom)
  76633. +{
  76634. + assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
  76635. + LOCK_CNT_NIL(spin_locked_jnode) &&
  76636. + LOCK_CNT_NIL(spin_locked_zlock) &&
  76637. + LOCK_CNT_NIL(rw_locked_dk) &&
  76638. + LOCK_CNT_NIL(rw_locked_tree)));
  76639. +
  76640. + spin_lock_nested(&(atom->alock), SINGLE_DEPTH_NESTING);
  76641. +
  76642. + LOCK_CNT_INC(spin_locked_atom);
  76643. + LOCK_CNT_INC(spin_locked);
  76644. +}
  76645. +
  76646. +static inline int spin_trylock_atom(txn_atom *atom)
  76647. +{
  76648. + if (spin_trylock(&(atom->alock))) {
  76649. + LOCK_CNT_INC(spin_locked_atom);
  76650. + LOCK_CNT_INC(spin_locked);
  76651. + return 1;
  76652. + }
  76653. + return 0;
  76654. +}
  76655. +
  76656. +static inline void spin_unlock_atom(txn_atom *atom)
  76657. +{
  76658. + assert_spin_locked(&(atom->alock));
  76659. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_atom));
  76660. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  76661. +
  76662. + LOCK_CNT_DEC(spin_locked_atom);
  76663. + LOCK_CNT_DEC(spin_locked);
  76664. +
  76665. + spin_unlock(&(atom->alock));
  76666. +}
  76667. +
  76668. +static inline void spin_lock_txnh(txn_handle *txnh)
  76669. +{
  76670. + /* check that spinlocks of lower priorities are not held */
  76671. + assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
  76672. + LOCK_CNT_NIL(spin_locked_zlock) &&
  76673. + LOCK_CNT_NIL(rw_locked_tree)));
  76674. +
  76675. + spin_lock(&(txnh->hlock));
  76676. +
  76677. + LOCK_CNT_INC(spin_locked_txnh);
  76678. + LOCK_CNT_INC(spin_locked);
  76679. +}
  76680. +
  76681. +static inline int spin_trylock_txnh(txn_handle *txnh)
  76682. +{
  76683. + if (spin_trylock(&(txnh->hlock))) {
  76684. + LOCK_CNT_INC(spin_locked_txnh);
  76685. + LOCK_CNT_INC(spin_locked);
  76686. + return 1;
  76687. + }
  76688. + return 0;
  76689. +}
  76690. +
  76691. +static inline void spin_unlock_txnh(txn_handle *txnh)
  76692. +{
  76693. + assert_spin_locked(&(txnh->hlock));
  76694. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnh));
  76695. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  76696. +
  76697. + LOCK_CNT_DEC(spin_locked_txnh);
  76698. + LOCK_CNT_DEC(spin_locked);
  76699. +
  76700. + spin_unlock(&(txnh->hlock));
  76701. +}
  76702. +
  76703. +#define spin_ordering_pred_txnmgr(tmgr) \
  76704. + ( LOCK_CNT_NIL(spin_locked_atom) && \
  76705. + LOCK_CNT_NIL(spin_locked_txnh) && \
  76706. + LOCK_CNT_NIL(spin_locked_jnode) && \
  76707. + LOCK_CNT_NIL(rw_locked_zlock) && \
  76708. + LOCK_CNT_NIL(rw_locked_dk) && \
  76709. + LOCK_CNT_NIL(rw_locked_tree) )
  76710. +
  76711. +static inline void spin_lock_txnmgr(txn_mgr *mgr)
  76712. +{
  76713. + /* check that spinlocks of lower priorities are not held */
  76714. + assert("", (LOCK_CNT_NIL(spin_locked_atom) &&
  76715. + LOCK_CNT_NIL(spin_locked_txnh) &&
  76716. + LOCK_CNT_NIL(spin_locked_jnode) &&
  76717. + LOCK_CNT_NIL(spin_locked_zlock) &&
  76718. + LOCK_CNT_NIL(rw_locked_dk) &&
  76719. + LOCK_CNT_NIL(rw_locked_tree)));
  76720. +
  76721. + spin_lock(&(mgr->tmgr_lock));
  76722. +
  76723. + LOCK_CNT_INC(spin_locked_txnmgr);
  76724. + LOCK_CNT_INC(spin_locked);
  76725. +}
  76726. +
  76727. +static inline int spin_trylock_txnmgr(txn_mgr *mgr)
  76728. +{
  76729. + if (spin_trylock(&(mgr->tmgr_lock))) {
  76730. + LOCK_CNT_INC(spin_locked_txnmgr);
  76731. + LOCK_CNT_INC(spin_locked);
  76732. + return 1;
  76733. + }
  76734. + return 0;
  76735. +}
  76736. +
  76737. +static inline void spin_unlock_txnmgr(txn_mgr *mgr)
  76738. +{
  76739. + assert_spin_locked(&(mgr->tmgr_lock));
  76740. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnmgr));
  76741. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  76742. +
  76743. + LOCK_CNT_DEC(spin_locked_txnmgr);
  76744. + LOCK_CNT_DEC(spin_locked);
  76745. +
  76746. + spin_unlock(&(mgr->tmgr_lock));
  76747. +}
  76748. +
  76749. +typedef enum {
  76750. + FQ_IN_USE = 0x1
  76751. +} flush_queue_state_t;
  76752. +
  76753. +typedef struct flush_queue flush_queue_t;
  76754. +
  76755. +/* This is an accumulator for jnodes prepared for writing to disk. A flush queue
  76756. + is filled by the jnode_flush() routine, and written to disk under memory
  76757. + pressure or at atom commit time. */
  76758. +/* LOCKING: fq state and fq->atom are protected by guard spinlock, fq->nr_queued
  76759. + field and fq->prepped list can be modified if atom is spin-locked and fq
  76760. + object is "in-use" state. For read-only traversal of the fq->prepped list
  76761. + and reading of the fq->nr_queued field it is enough to keep fq "in-use" or
  76762. + only have atom spin-locked. */
  76763. +struct flush_queue {
  76764. + /* linkage element is the first in this structure to make debugging
  76765. + easier. See field in atom struct for description of list. */
  76766. + struct list_head alink;
  76767. + /* A spinlock to protect changes of fq state and fq->atom pointer */
  76768. + spinlock_t guard;
  76769. + /* flush_queue state: [in_use | ready] */
  76770. + flush_queue_state_t state;
  76771. + /* A list which contains queued nodes, queued nodes are removed from any
  76772. + * atom's list and put on this ->prepped one. */
  76773. + struct list_head prepped;
  76774. + /* number of submitted i/o requests */
  76775. + atomic_t nr_submitted;
  76776. + /* number of i/o errors */
  76777. + atomic_t nr_errors;
  76778. + /* An atom this flush queue is attached to */
  76779. + txn_atom *atom;
  76780. + /* A wait queue head to wait on i/o completion */
  76781. + wait_queue_head_t wait;
  76782. +#if REISER4_DEBUG
  76783. + /* A thread which took this fq in exclusive use, NULL if fq is free,
  76784. + * used for debugging. */
  76785. + struct task_struct *owner;
  76786. +#endif
  76787. +};
  76788. +
  76789. +extern int reiser4_fq_by_atom(txn_atom *, flush_queue_t **);
  76790. +extern void reiser4_fq_put_nolock(flush_queue_t *);
  76791. +extern void reiser4_fq_put(flush_queue_t *);
  76792. +extern void reiser4_fuse_fq(txn_atom * to, txn_atom * from);
  76793. +extern void queue_jnode(flush_queue_t *, jnode *);
  76794. +
  76795. +extern int reiser4_write_fq(flush_queue_t *, long *, int);
  76796. +extern int current_atom_finish_all_fq(void);
  76797. +extern void init_atom_fq_parts(txn_atom *);
  76798. +
  76799. +extern reiser4_block_nr txnmgr_count_deleted_blocks(void);
  76800. +
  76801. +extern void znode_make_dirty(znode * node);
  76802. +extern void jnode_make_dirty_locked(jnode * node);
  76803. +
  76804. +extern int reiser4_sync_atom(txn_atom * atom);
  76805. +
  76806. +#if REISER4_DEBUG
  76807. +extern int atom_fq_parts_are_clean(txn_atom *);
  76808. +#endif
  76809. +
  76810. +extern void add_fq_to_bio(flush_queue_t *, struct bio *);
  76811. +extern flush_queue_t *get_fq_for_current_atom(void);
  76812. +
  76813. +void reiser4_invalidate_list(struct list_head * head);
  76814. +
  76815. +# endif /* __REISER4_TXNMGR_H__ */
  76816. +
  76817. +/* Make Linus happy.
  76818. + Local variables:
  76819. + c-indentation-style: "K&R"
  76820. + mode-name: "LC"
  76821. + c-basic-offset: 8
  76822. + tab-width: 8
  76823. + fill-column: 120
  76824. + End:
  76825. +*/
  76826. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/type_safe_hash.h linux-5.16.14/fs/reiser4/type_safe_hash.h
  76827. --- linux-5.16.14.orig/fs/reiser4/type_safe_hash.h 1970-01-01 01:00:00.000000000 +0100
  76828. +++ linux-5.16.14/fs/reiser4/type_safe_hash.h 2022-03-12 13:26:19.691892823 +0100
  76829. @@ -0,0 +1,320 @@
  76830. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  76831. + * reiser4/README */
  76832. +
  76833. +/* A hash table class that uses hash chains (singly-linked) and is
  76834. + parametrized to provide type safety. */
  76835. +
  76836. +#ifndef __REISER4_TYPE_SAFE_HASH_H__
  76837. +#define __REISER4_TYPE_SAFE_HASH_H__
  76838. +
  76839. +#include "debug.h"
  76840. +
  76841. +#include <asm/errno.h>
  76842. +/* Step 1: Use TYPE_SAFE_HASH_DECLARE() to define the TABLE and LINK objects
  76843. + based on the object type. You need to declare the item type before
  76844. + this definition, define it after this definition. */
  76845. +#define TYPE_SAFE_HASH_DECLARE(PREFIX,ITEM_TYPE) \
  76846. + \
  76847. +typedef struct PREFIX##_hash_table_ PREFIX##_hash_table; \
  76848. +typedef struct PREFIX##_hash_link_ PREFIX##_hash_link; \
  76849. + \
  76850. +struct PREFIX##_hash_table_ \
  76851. +{ \
  76852. + ITEM_TYPE **_table; \
  76853. + __u32 _buckets; \
  76854. +}; \
  76855. + \
  76856. +struct PREFIX##_hash_link_ \
  76857. +{ \
  76858. + ITEM_TYPE *_next; \
  76859. +}
  76860. +
  76861. +/* Step 2: Define the object type of the hash: give it field of type
  76862. + PREFIX_hash_link. */
  76863. +
  76864. +/* Step 3: Use TYPE_SAFE_HASH_DEFINE to define the hash table interface using
  76865. + the type and field name used in step 3. The arguments are:
  76866. +
  76867. + ITEM_TYPE The item type being hashed
  76868. + KEY_TYPE The type of key being hashed
  76869. + KEY_NAME The name of the key field within the item
  76870. + LINK_NAME The name of the link field within the item, which you must make type PREFIX_hash_link)
  76871. + HASH_FUNC The name of the hash function (or macro, takes const pointer to key)
  76872. + EQ_FUNC The name of the equality function (or macro, takes const pointer to two keys)
  76873. +
  76874. + It implements these functions:
  76875. +
  76876. + prefix_hash_init Initialize the table given its size.
  76877. + prefix_hash_insert Insert an item
  76878. + prefix_hash_insert_index Insert an item w/ precomputed hash_index
  76879. + prefix_hash_find Find an item by key
  76880. + prefix_hash_find_index Find an item w/ precomputed hash_index
  76881. + prefix_hash_remove Remove an item, returns 1 if found, 0 if not found
  76882. + prefix_hash_remove_index Remove an item w/ precomputed hash_index
  76883. +
  76884. + If you'd like something to be done differently, feel free to ask me
  76885. + for modifications. Additional features that could be added but
  76886. + have not been:
  76887. +
  76888. + prefix_hash_remove_key Find and remove an item by key
  76889. + prefix_hash_remove_key_index Find and remove an item by key w/ precomputed hash_index
  76890. +
  76891. + The hash_function currently receives only the key as an argument,
  76892. + meaning it must somehow know the number of buckets. If this is a
  76893. + problem let me know.
  76894. +
  76895. + This hash table uses a single-linked hash chain. This means
  76896. + insertion is fast but deletion requires searching the chain.
  76897. +
  76898. + There is also the doubly-linked hash chain approach, under which
  76899. + deletion requires no search but the code is longer and it takes two
  76900. + pointers per item.
  76901. +
  76902. + The circularly-linked approach has the shortest code but requires
  76903. + two pointers per bucket, doubling the size of the bucket array (in
  76904. + addition to two pointers per item).
  76905. +*/
  76906. +#define TYPE_SAFE_HASH_DEFINE(PREFIX,ITEM_TYPE,KEY_TYPE,KEY_NAME,LINK_NAME,HASH_FUNC,EQ_FUNC) \
  76907. + \
  76908. +static __inline__ void \
  76909. +PREFIX##_check_hash (PREFIX##_hash_table *table UNUSED_ARG, \
  76910. + __u32 hash UNUSED_ARG) \
  76911. +{ \
  76912. + assert("nikita-2780", hash < table->_buckets); \
  76913. +} \
  76914. + \
  76915. +static __inline__ int \
  76916. +PREFIX##_hash_init (PREFIX##_hash_table *hash, \
  76917. + __u32 buckets) \
  76918. +{ \
  76919. + hash->_table = (ITEM_TYPE**) KMALLOC (sizeof (ITEM_TYPE*) * buckets); \
  76920. + hash->_buckets = buckets; \
  76921. + if (hash->_table == NULL) \
  76922. + { \
  76923. + return RETERR(-ENOMEM); \
  76924. + } \
  76925. + memset (hash->_table, 0, sizeof (ITEM_TYPE*) * buckets); \
  76926. + ON_DEBUG(printk(#PREFIX "_hash_table: %i buckets\n", buckets)); \
  76927. + return 0; \
  76928. +} \
  76929. + \
  76930. +static __inline__ void \
  76931. +PREFIX##_hash_done (PREFIX##_hash_table *hash) \
  76932. +{ \
  76933. + if (REISER4_DEBUG && hash->_table != NULL) { \
  76934. + __u32 i; \
  76935. + for (i = 0 ; i < hash->_buckets ; ++ i) \
  76936. + assert("nikita-2905", hash->_table[i] == NULL); \
  76937. + } \
  76938. + if (hash->_table != NULL) \
  76939. + KFREE (hash->_table, sizeof (ITEM_TYPE*) * hash->_buckets); \
  76940. + hash->_table = NULL; \
  76941. +} \
  76942. + \
  76943. +static __inline__ void \
  76944. +PREFIX##_hash_prefetch_next (ITEM_TYPE *item) \
  76945. +{ \
  76946. + prefetch(item->LINK_NAME._next); \
  76947. +} \
  76948. + \
  76949. +static __inline__ void \
  76950. +PREFIX##_hash_prefetch_bucket (PREFIX##_hash_table *hash, \
  76951. + __u32 index) \
  76952. +{ \
  76953. + prefetch(hash->_table[index]); \
  76954. +} \
  76955. + \
  76956. +static __inline__ ITEM_TYPE* \
  76957. +PREFIX##_hash_find_index (PREFIX##_hash_table *hash, \
  76958. + __u32 hash_index, \
  76959. + KEY_TYPE const *find_key) \
  76960. +{ \
  76961. + ITEM_TYPE *item; \
  76962. + \
  76963. + PREFIX##_check_hash(hash, hash_index); \
  76964. + \
  76965. + for (item = hash->_table[hash_index]; \
  76966. + item != NULL; \
  76967. + item = item->LINK_NAME._next) \
  76968. + { \
  76969. + prefetch(item->LINK_NAME._next); \
  76970. + prefetch(item->LINK_NAME._next + offsetof(ITEM_TYPE, KEY_NAME)); \
  76971. + if (EQ_FUNC (& item->KEY_NAME, find_key)) \
  76972. + { \
  76973. + return item; \
  76974. + } \
  76975. + } \
  76976. + \
  76977. + return NULL; \
  76978. +} \
  76979. + \
  76980. +static __inline__ ITEM_TYPE* \
  76981. +PREFIX##_hash_find_index_lru (PREFIX##_hash_table *hash, \
  76982. + __u32 hash_index, \
  76983. + KEY_TYPE const *find_key) \
  76984. +{ \
  76985. + ITEM_TYPE ** item = &hash->_table[hash_index]; \
  76986. + \
  76987. + PREFIX##_check_hash(hash, hash_index); \
  76988. + \
  76989. + while (*item != NULL) { \
  76990. + prefetch(&(*item)->LINK_NAME._next); \
  76991. + if (EQ_FUNC (&(*item)->KEY_NAME, find_key)) { \
  76992. + ITEM_TYPE *found; \
  76993. + \
  76994. + found = *item; \
  76995. + *item = found->LINK_NAME._next; \
  76996. + found->LINK_NAME._next = hash->_table[hash_index]; \
  76997. + hash->_table[hash_index] = found; \
  76998. + return found; \
  76999. + } \
  77000. + item = &(*item)->LINK_NAME._next; \
  77001. + } \
  77002. + return NULL; \
  77003. +} \
  77004. + \
  77005. +static __inline__ int \
  77006. +PREFIX##_hash_remove_index (PREFIX##_hash_table *hash, \
  77007. + __u32 hash_index, \
  77008. + ITEM_TYPE *del_item) \
  77009. +{ \
  77010. + ITEM_TYPE ** hash_item_p = &hash->_table[hash_index]; \
  77011. + \
  77012. + PREFIX##_check_hash(hash, hash_index); \
  77013. + \
  77014. + while (*hash_item_p != NULL) { \
  77015. + prefetch(&(*hash_item_p)->LINK_NAME._next); \
  77016. + if (*hash_item_p == del_item) { \
  77017. + *hash_item_p = (*hash_item_p)->LINK_NAME._next; \
  77018. + return 1; \
  77019. + } \
  77020. + hash_item_p = &(*hash_item_p)->LINK_NAME._next; \
  77021. + } \
  77022. + return 0; \
  77023. +} \
  77024. + \
  77025. +static __inline__ void \
  77026. +PREFIX##_hash_insert_index (PREFIX##_hash_table *hash, \
  77027. + __u32 hash_index, \
  77028. + ITEM_TYPE *ins_item) \
  77029. +{ \
  77030. + PREFIX##_check_hash(hash, hash_index); \
  77031. + \
  77032. + ins_item->LINK_NAME._next = hash->_table[hash_index]; \
  77033. + hash->_table[hash_index] = ins_item; \
  77034. +} \
  77035. + \
  77036. +static __inline__ void \
  77037. +PREFIX##_hash_insert_index_rcu (PREFIX##_hash_table *hash, \
  77038. + __u32 hash_index, \
  77039. + ITEM_TYPE *ins_item) \
  77040. +{ \
  77041. + PREFIX##_check_hash(hash, hash_index); \
  77042. + \
  77043. + ins_item->LINK_NAME._next = hash->_table[hash_index]; \
  77044. + smp_wmb(); \
  77045. + hash->_table[hash_index] = ins_item; \
  77046. +} \
  77047. + \
  77048. +static __inline__ ITEM_TYPE* \
  77049. +PREFIX##_hash_find (PREFIX##_hash_table *hash, \
  77050. + KEY_TYPE const *find_key) \
  77051. +{ \
  77052. + return PREFIX##_hash_find_index (hash, HASH_FUNC(hash, find_key), find_key); \
  77053. +} \
  77054. + \
  77055. +static __inline__ ITEM_TYPE* \
  77056. +PREFIX##_hash_find_lru (PREFIX##_hash_table *hash, \
  77057. + KEY_TYPE const *find_key) \
  77058. +{ \
  77059. + return PREFIX##_hash_find_index_lru (hash, HASH_FUNC(hash, find_key), find_key); \
  77060. +} \
  77061. + \
  77062. +static __inline__ int \
  77063. +PREFIX##_hash_remove (PREFIX##_hash_table *hash, \
  77064. + ITEM_TYPE *del_item) \
  77065. +{ \
  77066. + return PREFIX##_hash_remove_index (hash, \
  77067. + HASH_FUNC(hash, &del_item->KEY_NAME), del_item); \
  77068. +} \
  77069. + \
  77070. +static __inline__ int \
  77071. +PREFIX##_hash_remove_rcu (PREFIX##_hash_table *hash, \
  77072. + ITEM_TYPE *del_item) \
  77073. +{ \
  77074. + return PREFIX##_hash_remove (hash, del_item); \
  77075. +} \
  77076. + \
  77077. +static __inline__ void \
  77078. +PREFIX##_hash_insert (PREFIX##_hash_table *hash, \
  77079. + ITEM_TYPE *ins_item) \
  77080. +{ \
  77081. + return PREFIX##_hash_insert_index (hash, \
  77082. + HASH_FUNC(hash, &ins_item->KEY_NAME), ins_item); \
  77083. +} \
  77084. + \
  77085. +static __inline__ void \
  77086. +PREFIX##_hash_insert_rcu (PREFIX##_hash_table *hash, \
  77087. + ITEM_TYPE *ins_item) \
  77088. +{ \
  77089. + return PREFIX##_hash_insert_index_rcu (hash, HASH_FUNC(hash, &ins_item->KEY_NAME), \
  77090. + ins_item); \
  77091. +} \
  77092. + \
  77093. +static __inline__ ITEM_TYPE * \
  77094. +PREFIX##_hash_first (PREFIX##_hash_table *hash, __u32 ind) \
  77095. +{ \
  77096. + ITEM_TYPE *first; \
  77097. + \
  77098. + for (first = NULL; ind < hash->_buckets; ++ ind) { \
  77099. + first = hash->_table[ind]; \
  77100. + if (first != NULL) \
  77101. + break; \
  77102. + } \
  77103. + return first; \
  77104. +} \
  77105. + \
  77106. +static __inline__ ITEM_TYPE * \
  77107. +PREFIX##_hash_next (PREFIX##_hash_table *hash, \
  77108. + ITEM_TYPE *item) \
  77109. +{ \
  77110. + ITEM_TYPE *next; \
  77111. + \
  77112. + if (item == NULL) \
  77113. + return NULL; \
  77114. + next = item->LINK_NAME._next; \
  77115. + if (next == NULL) \
  77116. + next = PREFIX##_hash_first (hash, HASH_FUNC(hash, &item->KEY_NAME) + 1); \
  77117. + return next; \
  77118. +} \
  77119. + \
  77120. +typedef struct {} PREFIX##_hash_dummy
  77121. +
  77122. +#define for_all_ht_buckets(table, head) \
  77123. +for ((head) = &(table) -> _table[ 0 ] ; \
  77124. + (head) != &(table) -> _table[ (table) -> _buckets ] ; ++ (head))
  77125. +
  77126. +#define for_all_in_bucket(bucket, item, next, field) \
  77127. +for ((item) = *(bucket), (next) = (item) ? (item) -> field._next : NULL ; \
  77128. + (item) != NULL ; \
  77129. + (item) = (next), (next) = (item) ? (item) -> field._next : NULL )
  77130. +
  77131. +#define for_all_in_htable(table, prefix, item, next) \
  77132. +for ((item) = prefix ## _hash_first ((table), 0), \
  77133. + (next) = prefix ## _hash_next ((table), (item)) ; \
  77134. + (item) != NULL ; \
  77135. + (item) = (next), \
  77136. + (next) = prefix ## _hash_next ((table), (item)))
  77137. +
  77138. +/* __REISER4_TYPE_SAFE_HASH_H__ */
  77139. +#endif
  77140. +
  77141. +/* Make Linus happy.
  77142. + Local variables:
  77143. + c-indentation-style: "K&R"
  77144. + mode-name: "LC"
  77145. + c-basic-offset: 8
  77146. + tab-width: 8
  77147. + fill-column: 120
  77148. + End:
  77149. +*/
  77150. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/vfs_ops.c linux-5.16.14/fs/reiser4/vfs_ops.c
  77151. --- linux-5.16.14.orig/fs/reiser4/vfs_ops.c 1970-01-01 01:00:00.000000000 +0100
  77152. +++ linux-5.16.14/fs/reiser4/vfs_ops.c 2022-03-12 13:26:19.691892823 +0100
  77153. @@ -0,0 +1,260 @@
  77154. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  77155. + * reiser4/README */
  77156. +
  77157. +/* Interface to VFS. Reiser4 {super|export|dentry}_operations are defined
  77158. + here. */
  77159. +
  77160. +#include "forward.h"
  77161. +#include "debug.h"
  77162. +#include "dformat.h"
  77163. +#include "coord.h"
  77164. +#include "plugin/item/item.h"
  77165. +#include "plugin/file/file.h"
  77166. +#include "plugin/security/perm.h"
  77167. +#include "plugin/disk_format/disk_format.h"
  77168. +#include "plugin/plugin.h"
  77169. +#include "plugin/plugin_set.h"
  77170. +#include "plugin/object.h"
  77171. +#include "txnmgr.h"
  77172. +#include "jnode.h"
  77173. +#include "znode.h"
  77174. +#include "block_alloc.h"
  77175. +#include "tree.h"
  77176. +#include "vfs_ops.h"
  77177. +#include "inode.h"
  77178. +#include "page_cache.h"
  77179. +#include "ktxnmgrd.h"
  77180. +#include "super.h"
  77181. +#include "reiser4.h"
  77182. +#include "entd.h"
  77183. +#include "status_flags.h"
  77184. +#include "flush.h"
  77185. +#include "dscale.h"
  77186. +
  77187. +#include <linux/profile.h>
  77188. +#include <linux/types.h>
  77189. +#include <linux/mount.h>
  77190. +#include <linux/vfs.h>
  77191. +#include <linux/mm.h>
  77192. +#include <linux/buffer_head.h>
  77193. +#include <linux/dcache.h>
  77194. +#include <linux/list.h>
  77195. +#include <linux/pagemap.h>
  77196. +#include <linux/slab.h>
  77197. +#include <linux/seq_file.h>
  77198. +#include <linux/init.h>
  77199. +#include <linux/module.h>
  77200. +#include <linux/writeback.h>
  77201. +#include <linux/blkdev.h>
  77202. +#include <linux/security.h>
  77203. +#include <linux/reboot.h>
  77204. +#include <linux/rcupdate.h>
  77205. +
  77206. +/* update inode stat-data by calling plugin */
  77207. +int reiser4_update_sd(struct inode *object)
  77208. +{
  77209. + file_plugin *fplug;
  77210. +
  77211. + assert("nikita-2338", object != NULL);
  77212. + /* check for read-only file system. */
  77213. + if (IS_RDONLY(object))
  77214. + return 0;
  77215. +
  77216. + fplug = inode_file_plugin(object);
  77217. + assert("nikita-2339", fplug != NULL);
  77218. + return fplug->write_sd_by_inode(object);
  77219. +}
  77220. +
  77221. +/* helper function: increase inode nlink count and call plugin method to save
  77222. + updated stat-data.
  77223. +
  77224. + Used by link/create and during creation of dot and dotdot in mkdir
  77225. +*/
  77226. +int reiser4_add_nlink(struct inode *object /* object to which link is added */ ,
  77227. + struct inode *parent /* parent where new entry will be */
  77228. + ,
  77229. + int write_sd_p /* true if stat-data has to be
  77230. + * updated */ )
  77231. +{
  77232. + file_plugin *fplug;
  77233. + int result;
  77234. +
  77235. + assert("nikita-1351", object != NULL);
  77236. +
  77237. + fplug = inode_file_plugin(object);
  77238. + assert("nikita-1445", fplug != NULL);
  77239. +
  77240. + /* ask plugin whether it can add yet another link to this
  77241. + object */
  77242. + if (!fplug->can_add_link(object))
  77243. + return RETERR(-EMLINK);
  77244. +
  77245. + assert("nikita-2211", fplug->add_link != NULL);
  77246. + /* call plugin to do actual addition of link */
  77247. + result = fplug->add_link(object, parent);
  77248. +
  77249. + /* optionally update stat data */
  77250. + if (result == 0 && write_sd_p)
  77251. + result = fplug->write_sd_by_inode(object);
  77252. + return result;
  77253. +}
  77254. +
  77255. +/* helper function: decrease inode nlink count and call plugin method to save
  77256. + updated stat-data.
  77257. +
  77258. + Used by unlink/create
  77259. +*/
  77260. +int reiser4_del_nlink(struct inode *object /* object from which link is
  77261. + * removed */ ,
  77262. + struct inode *parent /* parent where entry was */ ,
  77263. + int write_sd_p /* true is stat-data has to be
  77264. + * updated */ )
  77265. +{
  77266. + file_plugin *fplug;
  77267. + int result;
  77268. +
  77269. + assert("nikita-1349", object != NULL);
  77270. +
  77271. + fplug = inode_file_plugin(object);
  77272. + assert("nikita-1350", fplug != NULL);
  77273. + assert("nikita-1446", object->i_nlink > 0);
  77274. + assert("nikita-2210", fplug->rem_link != NULL);
  77275. +
  77276. + /* call plugin to do actual deletion of link */
  77277. + result = fplug->rem_link(object, parent);
  77278. +
  77279. + /* optionally update stat data */
  77280. + if (result == 0 && write_sd_p)
  77281. + result = fplug->write_sd_by_inode(object);
  77282. + return result;
  77283. +}
  77284. +
  77285. +/* Release reiser4 dentry. This is d_op->d_release() method. */
  77286. +static void reiser4_d_release(struct dentry *dentry /* dentry released */ )
  77287. +{
  77288. + reiser4_free_dentry_fsdata(dentry);
  77289. +}
  77290. +
  77291. +/*
  77292. + * Called by reiser4_sync_inodes(), during speculative write-back (through
  77293. + * pdflush, or balance_dirty_pages()).
  77294. + */
  77295. +void reiser4_writeout(struct super_block *sb, struct writeback_control *wbc)
  77296. +{
  77297. + long written = 0;
  77298. + int repeats = 0;
  77299. + int result;
  77300. +
  77301. + /*
  77302. + * Performs early flushing, trying to free some memory. If there
  77303. + * is nothing to flush, commits some atoms.
  77304. + *
  77305. + * Commit all atoms if reiser4_writepages_dispatch() is called
  77306. + * from sys_sync() or sys_fsync()
  77307. + */
  77308. + if (wbc->sync_mode != WB_SYNC_NONE) {
  77309. + txnmgr_force_commit_all(sb, 0);
  77310. + return;
  77311. + }
  77312. +
  77313. + BUG_ON(reiser4_get_super_fake(sb) == NULL);
  77314. + do {
  77315. + long nr_submitted = 0;
  77316. + jnode *node = NULL;
  77317. +
  77318. + /* do not put more requests to overload write queue */
  77319. + if (bdi_write_congested(inode_to_bdi(reiser4_get_super_fake(sb)))) {
  77320. + //blk_flush_plug(current);
  77321. + break;
  77322. + }
  77323. + repeats++;
  77324. + BUG_ON(wbc->nr_to_write <= 0);
  77325. +
  77326. + if (get_current_context()->entd) {
  77327. + entd_context *ent = get_entd_context(sb);
  77328. +
  77329. + if (ent->cur_request->node)
  77330. + /*
  77331. + * this is ent thread and it managed to capture
  77332. + * requested page itself - start flush from
  77333. + * that page
  77334. + */
  77335. + node = ent->cur_request->node;
  77336. + }
  77337. +
  77338. + result = flush_some_atom(node, &nr_submitted, wbc,
  77339. + JNODE_FLUSH_WRITE_BLOCKS);
  77340. + if (result != 0)
  77341. + warning("nikita-31001", "Flush failed: %i", result);
  77342. + if (node)
  77343. + /* drop the reference aquired
  77344. + in find_or_create_extent() */
  77345. + jput(node);
  77346. + if (!nr_submitted)
  77347. + break;
  77348. +
  77349. + wbc->nr_to_write -= nr_submitted;
  77350. + written += nr_submitted;
  77351. + } while (wbc->nr_to_write > 0);
  77352. +}
  77353. +
  77354. +/* tell VM how many pages were dirtied */
  77355. +void reiser4_throttle_write(struct inode *inode)
  77356. +{
  77357. + reiser4_context *ctx;
  77358. +
  77359. + ctx = get_current_context();
  77360. + reiser4_txn_restart(ctx);
  77361. + current->journal_info = NULL;
  77362. + balance_dirty_pages_ratelimited(inode->i_mapping);
  77363. + current->journal_info = ctx;
  77364. +}
  77365. +
  77366. +const int REISER4_MAGIC_OFFSET = 16 * 4096; /* offset to magic string from the
  77367. + * beginning of device */
  77368. +
  77369. +/*
  77370. + * Reiser4 initialization/shutdown.
  77371. + *
  77372. + * Code below performs global reiser4 initialization that is done either as
  77373. + * part of kernel initialization (when reiser4 is statically built-in), or
  77374. + * during reiser4 module load (when compiled as module).
  77375. + */
  77376. +
  77377. +void reiser4_handle_error(void)
  77378. +{
  77379. + struct super_block *sb = reiser4_get_current_sb();
  77380. +
  77381. + if (!sb)
  77382. + return;
  77383. + reiser4_status_write(REISER4_STATUS_DAMAGED, 0,
  77384. + "Filesystem error occured");
  77385. + switch (get_super_private(sb)->onerror) {
  77386. + case 1:
  77387. + reiser4_panic("foobar-42", "Filesystem error occured\n");
  77388. + default:
  77389. + if (sb_rdonly(sb))
  77390. + return;
  77391. + sb->s_flags |= SB_RDONLY;
  77392. + break;
  77393. + }
  77394. +}
  77395. +
  77396. +struct dentry_operations reiser4_dentry_operations = {
  77397. + .d_revalidate = NULL,
  77398. + .d_hash = NULL,
  77399. + .d_compare = NULL,
  77400. + .d_delete = NULL,
  77401. + .d_release = reiser4_d_release,
  77402. + .d_iput = NULL,
  77403. +};
  77404. +
  77405. +/* Make Linus happy.
  77406. + Local variables:
  77407. + c-indentation-style: "K&R"
  77408. + mode-name: "LC"
  77409. + c-basic-offset: 8
  77410. + tab-width: 8
  77411. + fill-column: 120
  77412. + End:
  77413. +*/
  77414. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/vfs_ops.h linux-5.16.14/fs/reiser4/vfs_ops.h
  77415. --- linux-5.16.14.orig/fs/reiser4/vfs_ops.h 1970-01-01 01:00:00.000000000 +0100
  77416. +++ linux-5.16.14/fs/reiser4/vfs_ops.h 2022-03-12 13:26:19.691892823 +0100
  77417. @@ -0,0 +1,60 @@
  77418. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  77419. + * reiser4/README */
  77420. +
  77421. +/* vfs_ops.c's exported symbols */
  77422. +
  77423. +#if !defined( __FS_REISER4_VFS_OPS_H__ )
  77424. +#define __FS_REISER4_VFS_OPS_H__
  77425. +
  77426. +#include "forward.h"
  77427. +#include "coord.h"
  77428. +#include "seal.h"
  77429. +#include "plugin/file/file.h"
  77430. +#include "super.h"
  77431. +#include "readahead.h"
  77432. +
  77433. +#include <linux/types.h> /* for loff_t */
  77434. +#include <linux/fs.h> /* for struct address_space */
  77435. +#include <linux/dcache.h> /* for struct dentry */
  77436. +#include <linux/mm.h>
  77437. +#include <linux/backing-dev.h>
  77438. +
  77439. +/* address space operations */
  77440. +int reiser4_writepage(struct page *, struct writeback_control *);
  77441. +int reiser4_set_page_dirty(struct page *);
  77442. +void reiser4_invalidatepage(struct page *, unsigned int offset, unsigned int length);
  77443. +int reiser4_releasepage(struct page *, gfp_t);
  77444. +
  77445. +#ifdef CONFIG_MIGRATION
  77446. +int reiser4_migratepage(struct address_space *, struct page *,
  77447. + struct page *, enum migrate_mode);
  77448. +#else
  77449. +#define reiser4_migratepage NULL
  77450. +#endif /* CONFIG_MIGRATION */
  77451. +
  77452. +extern int reiser4_update_sd(struct inode *);
  77453. +extern int reiser4_add_nlink(struct inode *, struct inode *, int);
  77454. +extern int reiser4_del_nlink(struct inode *, struct inode *, int);
  77455. +
  77456. +extern int reiser4_start_up_io(struct page *page);
  77457. +extern void reiser4_throttle_write(struct inode *);
  77458. +extern int jnode_is_releasable(jnode *);
  77459. +
  77460. +#define CAPTURE_APAGE_BURST (1024l)
  77461. +void reiser4_writeout(struct super_block *, struct writeback_control *);
  77462. +
  77463. +extern void reiser4_handle_error(void);
  77464. +
  77465. +/* __FS_REISER4_VFS_OPS_H__ */
  77466. +#endif
  77467. +
  77468. +/* Make Linus happy.
  77469. + Local variables:
  77470. + c-indentation-style: "K&R"
  77471. + mode-name: "LC"
  77472. + c-basic-offset: 8
  77473. + tab-width: 8
  77474. + fill-column: 120
  77475. + scroll-step: 1
  77476. + End:
  77477. +*/
  77478. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/wander.c linux-5.16.14/fs/reiser4/wander.c
  77479. --- linux-5.16.14.orig/fs/reiser4/wander.c 1970-01-01 01:00:00.000000000 +0100
  77480. +++ linux-5.16.14/fs/reiser4/wander.c 2022-03-12 13:26:19.692892826 +0100
  77481. @@ -0,0 +1,1757 @@
  77482. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  77483. + * reiser4/README */
  77484. +
  77485. +/* Reiser4 Wandering Log */
  77486. +
  77487. +/* You should read http://www.namesys.com/txn-doc.html
  77488. +
  77489. + That describes how filesystem operations are performed as atomic
  77490. + transactions, and how we try to arrange it so that we can write most of the
  77491. + data only once while performing the operation atomically.
  77492. +
  77493. + For the purposes of this code, it is enough for it to understand that it
  77494. + has been told a given block should be written either once, or twice (if
  77495. + twice then once to the wandered location and once to the real location).
  77496. +
  77497. + This code guarantees that those blocks that are defined to be part of an
  77498. + atom either all take effect or none of them take effect.
  77499. +
  77500. + The "relocate set" of nodes are submitted to write by the jnode_flush()
  77501. + routine, and the "overwrite set" is submitted by reiser4_write_log().
  77502. + This is because with the overwrite set we seek to optimize writes, and
  77503. + with the relocate set we seek to cause disk order to correlate with the
  77504. + "parent first order" (preorder).
  77505. +
  77506. + reiser4_write_log() allocates and writes wandered blocks and maintains
  77507. + additional on-disk structures of the atom as wander records (each wander
  77508. + record occupies one block) for storing of the "wandered map" (a table which
  77509. + contains a relation between wandered and real block numbers) and other
  77510. + information which might be needed at transaction recovery time.
  77511. +
  77512. + The wander records are unidirectionally linked into a circle: each wander
  77513. + record contains a block number of the next wander record, the last wander
  77514. + record points to the first one.
  77515. +
  77516. + One wander record (named "tx head" in this file) has a format which is
  77517. + different from the other wander records. The "tx head" has a reference to the
  77518. + "tx head" block of the previously committed atom. Also, "tx head" contains
  77519. + fs information (the free blocks counter, and the oid allocator state) which
  77520. + is logged in a special way .
  77521. +
  77522. + There are two journal control blocks, named journal header and journal
  77523. + footer which have fixed on-disk locations. The journal header has a
  77524. + reference to the "tx head" block of the last committed atom. The journal
  77525. + footer points to the "tx head" of the last flushed atom. The atom is
  77526. + "played" when all blocks from its overwrite set are written to disk the
  77527. + second time (i.e. written to their real locations).
  77528. +
  77529. + NOTE: People who know reiserfs internals and its journal structure might be
  77530. + confused with these terms journal footer and journal header. There is a table
  77531. + with terms of similar semantics in reiserfs (reiser3) and reiser4:
  77532. +
  77533. + REISER3 TERM | REISER4 TERM | DESCRIPTION
  77534. + --------------------+-----------------------+----------------------------
  77535. + commit record | journal header | atomic write of this record
  77536. + | | ends transaction commit
  77537. + --------------------+-----------------------+----------------------------
  77538. + journal header | journal footer | atomic write of this record
  77539. + | | ends post-commit writes.
  77540. + | | After successful
  77541. + | | writing of this journal
  77542. + | | blocks (in reiser3) or
  77543. + | | wandered blocks/records are
  77544. + | | free for re-use.
  77545. + --------------------+-----------------------+----------------------------
  77546. +
  77547. + The atom commit process is the following:
  77548. +
  77549. + 1. The overwrite set is taken from atom's clean list, and its size is
  77550. + counted.
  77551. +
  77552. + 2. The number of necessary wander records (including tx head) is calculated,
  77553. + and the wander record blocks are allocated.
  77554. +
  77555. + 3. Allocate wandered blocks and populate wander records by wandered map.
  77556. +
  77557. + 4. submit write requests for wander records and wandered blocks.
  77558. +
  77559. + 5. wait until submitted write requests complete.
  77560. +
  77561. + 6. update journal header: change the pointer to the block number of just
  77562. + written tx head, submit an i/o for modified journal header block and wait
  77563. + for i/o completion.
  77564. +
  77565. + NOTE: The special logging for bitmap blocks and some reiser4 super block
  77566. + fields makes processes of atom commit, flush and recovering a bit more
  77567. + complex (see comments in the source code for details).
  77568. +
  77569. + The atom playing process is the following:
  77570. +
  77571. + 1. Write atom's overwrite set in-place.
  77572. +
  77573. + 2. Wait on i/o.
  77574. +
  77575. + 3. Update journal footer: change the pointer to block number of tx head
  77576. + block of the atom we currently flushing, submit an i/o, wait on i/o
  77577. + completion.
  77578. +
  77579. + 4. Free disk space which was used for wandered blocks and wander records.
  77580. +
  77581. + After the freeing of wandered blocks and wander records we have that journal
  77582. + footer points to the on-disk structure which might be overwritten soon.
  77583. + Neither the log writer nor the journal recovery procedure use that pointer
  77584. + for accessing the data. When the journal recovery procedure finds the oldest
  77585. + transaction it compares the journal footer pointer value with the "prev_tx"
  77586. + pointer value in tx head, if values are equal the oldest not flushed
  77587. + transaction is found.
  77588. +
  77589. + NOTE on disk space leakage: the information about of what blocks and how many
  77590. + blocks are allocated for wandered blocks, wandered records is not written to
  77591. + the disk because of special logging for bitmaps and some super blocks
  77592. + counters. After a system crash we the reiser4 does not remember those
  77593. + objects allocation, thus we have no such a kind of disk space leakage.
  77594. +*/
  77595. +
  77596. +/* Special logging of reiser4 super block fields. */
  77597. +
  77598. +/* There are some reiser4 super block fields (free block count and OID allocator
  77599. + state (number of files and next free OID) which are logged separately from
  77600. + super block to avoid unnecessary atom fusion.
  77601. +
  77602. + So, the reiser4 super block can be not captured by a transaction with
  77603. + allocates/deallocates disk blocks or create/delete file objects. Moreover,
  77604. + the reiser4 on-disk super block is not touched when such a transaction is
  77605. + committed and flushed. Those "counters logged specially" are logged in "tx
  77606. + head" blocks and in the journal footer block.
  77607. +
  77608. + A step-by-step description of special logging:
  77609. +
  77610. + 0. The per-atom information about deleted or created files and allocated or
  77611. + freed blocks is collected during the transaction. The atom's
  77612. + ->nr_objects_created and ->nr_objects_deleted are for object
  77613. + deletion/creation tracking, the numbers of allocated and freed blocks are
  77614. + calculated using atom's delete set and atom's capture list -- all new and
  77615. + relocated nodes should be on atom's clean list and should have JNODE_RELOC
  77616. + bit set.
  77617. +
  77618. + 1. The "logged specially" reiser4 super block fields have their "committed"
  77619. + versions in the reiser4 in-memory super block. They get modified only at
  77620. + atom commit time. The atom's commit thread has an exclusive access to those
  77621. + "committed" fields because the log writer implementation supports only one
  77622. + atom commit a time (there is a per-fs "commit" mutex). At
  77623. + that time "committed" counters are modified using per-atom information
  77624. + collected during the transaction. These counters are stored on disk as a
  77625. + part of tx head block when atom is committed.
  77626. +
  77627. + 2. When the atom is flushed the value of the free block counter and the OID
  77628. + allocator state get written to the journal footer block. A special journal
  77629. + procedure (journal_recover_sb_data()) takes those values from the journal
  77630. + footer and updates the reiser4 in-memory super block.
  77631. +
  77632. + NOTE: That means free block count and OID allocator state are logged
  77633. + separately from the reiser4 super block regardless of the fact that the
  77634. + reiser4 super block has fields to store both the free block counter and the
  77635. + OID allocator.
  77636. +
  77637. + Writing the whole super block at commit time requires knowing true values of
  77638. + all its fields without changes made by not yet committed transactions. It is
  77639. + possible by having their "committed" version of the super block like the
  77640. + reiser4 bitmap blocks have "committed" and "working" versions. However,
  77641. + another scheme was implemented which stores special logged values in the
  77642. + unused free space inside transaction head block. In my opinion it has an
  77643. + advantage of not writing whole super block when only part of it was
  77644. + modified. */
  77645. +
  77646. +#include "debug.h"
  77647. +#include "dformat.h"
  77648. +#include "txnmgr.h"
  77649. +#include "jnode.h"
  77650. +#include "znode.h"
  77651. +#include "block_alloc.h"
  77652. +#include "page_cache.h"
  77653. +#include "wander.h"
  77654. +#include "reiser4.h"
  77655. +#include "super.h"
  77656. +#include "vfs_ops.h"
  77657. +#include "writeout.h"
  77658. +#include "inode.h"
  77659. +#include "entd.h"
  77660. +
  77661. +#include <linux/types.h>
  77662. +#include <linux/fs.h> /* for struct super_block */
  77663. +#include <linux/mm.h> /* for struct page */
  77664. +#include <linux/pagemap.h>
  77665. +#include <linux/bio.h> /* for struct bio */
  77666. +#include <linux/blkdev.h>
  77667. +
  77668. +static int write_jnodes_to_disk_extent(
  77669. + jnode *, int, const reiser4_block_nr *, flush_queue_t *, int);
  77670. +
  77671. +/* The commit_handle is a container for objects needed at atom commit time */
  77672. +struct commit_handle {
  77673. + /* A pointer to atom's list of OVRWR nodes */
  77674. + struct list_head *overwrite_set;
  77675. + /* atom's overwrite set size */
  77676. + int overwrite_set_size;
  77677. + /* jnodes for wander record blocks */
  77678. + struct list_head tx_list;
  77679. + /* number of wander records */
  77680. + __u32 tx_size;
  77681. + /* 'committed' sb counters are saved here until atom is completely
  77682. + flushed */
  77683. + __u64 free_blocks;
  77684. + __u64 nr_files;
  77685. + __u64 next_oid;
  77686. + /* A pointer to the atom which is being committed */
  77687. + txn_atom *atom;
  77688. + /* A pointer to current super block */
  77689. + struct super_block *super;
  77690. + /* The counter of modified bitmaps */
  77691. + reiser4_block_nr nr_bitmap;
  77692. +};
  77693. +
  77694. +static void init_commit_handle(struct commit_handle *ch, txn_atom *atom)
  77695. +{
  77696. + memset(ch, 0, sizeof(struct commit_handle));
  77697. + INIT_LIST_HEAD(&ch->tx_list);
  77698. +
  77699. + ch->atom = atom;
  77700. + ch->super = reiser4_get_current_sb();
  77701. +}
  77702. +
  77703. +static void done_commit_handle(struct commit_handle *ch)
  77704. +{
  77705. + assert("zam-690", list_empty(&ch->tx_list));
  77706. +}
  77707. +
  77708. +/* fill journal header block data */
  77709. +static void format_journal_header(struct commit_handle *ch)
  77710. +{
  77711. + struct reiser4_super_info_data *sbinfo;
  77712. + struct journal_header *header;
  77713. + jnode *txhead;
  77714. +
  77715. + sbinfo = get_super_private(ch->super);
  77716. + assert("zam-479", sbinfo != NULL);
  77717. + assert("zam-480", sbinfo->journal_header != NULL);
  77718. +
  77719. + txhead = list_entry(ch->tx_list.next, jnode, capture_link);
  77720. +
  77721. + jload(sbinfo->journal_header);
  77722. +
  77723. + header = (struct journal_header *)jdata(sbinfo->journal_header);
  77724. + assert("zam-484", header != NULL);
  77725. +
  77726. + put_unaligned(cpu_to_le64(*jnode_get_block(txhead)),
  77727. + &header->last_committed_tx);
  77728. +
  77729. + jrelse(sbinfo->journal_header);
  77730. +}
  77731. +
  77732. +/* fill journal footer block data */
  77733. +static void format_journal_footer(struct commit_handle *ch)
  77734. +{
  77735. + struct reiser4_super_info_data *sbinfo;
  77736. + struct journal_footer *footer;
  77737. + jnode *tx_head;
  77738. +
  77739. + sbinfo = get_super_private(ch->super);
  77740. +
  77741. + tx_head = list_entry(ch->tx_list.next, jnode, capture_link);
  77742. +
  77743. + assert("zam-493", sbinfo != NULL);
  77744. + assert("zam-494", sbinfo->journal_header != NULL);
  77745. +
  77746. + check_me("zam-691", jload(sbinfo->journal_footer) == 0);
  77747. +
  77748. + footer = (struct journal_footer *)jdata(sbinfo->journal_footer);
  77749. + assert("zam-495", footer != NULL);
  77750. +
  77751. + put_unaligned(cpu_to_le64(*jnode_get_block(tx_head)),
  77752. + &footer->last_flushed_tx);
  77753. + put_unaligned(cpu_to_le64(ch->free_blocks), &footer->free_blocks);
  77754. +
  77755. + put_unaligned(cpu_to_le64(ch->nr_files), &footer->nr_files);
  77756. + put_unaligned(cpu_to_le64(ch->next_oid), &footer->next_oid);
  77757. +
  77758. + jrelse(sbinfo->journal_footer);
  77759. +}
  77760. +
  77761. +/* wander record capacity depends on current block size */
  77762. +static int wander_record_capacity(const struct super_block *super)
  77763. +{
  77764. + return (super->s_blocksize -
  77765. + sizeof(struct wander_record_header)) /
  77766. + sizeof(struct wander_entry);
  77767. +}
  77768. +
  77769. +/* Fill first wander record (tx head) in accordance with supplied given data */
  77770. +static void format_tx_head(struct commit_handle *ch)
  77771. +{
  77772. + jnode *tx_head;
  77773. + jnode *next;
  77774. + struct tx_header *header;
  77775. +
  77776. + tx_head = list_entry(ch->tx_list.next, jnode, capture_link);
  77777. + assert("zam-692", &ch->tx_list != &tx_head->capture_link);
  77778. +
  77779. + next = list_entry(tx_head->capture_link.next, jnode, capture_link);
  77780. + if (&ch->tx_list == &next->capture_link)
  77781. + next = tx_head;
  77782. +
  77783. + header = (struct tx_header *)jdata(tx_head);
  77784. +
  77785. + assert("zam-460", header != NULL);
  77786. + assert("zam-462", ch->super->s_blocksize >= sizeof(struct tx_header));
  77787. +
  77788. + memset(jdata(tx_head), 0, (size_t) ch->super->s_blocksize);
  77789. + memcpy(jdata(tx_head), TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE);
  77790. +
  77791. + put_unaligned(cpu_to_le32(ch->tx_size), &header->total);
  77792. + put_unaligned(cpu_to_le64(get_super_private(ch->super)->last_committed_tx),
  77793. + &header->prev_tx);
  77794. + put_unaligned(cpu_to_le64(*jnode_get_block(next)), &header->next_block);
  77795. + put_unaligned(cpu_to_le64(ch->free_blocks), &header->free_blocks);
  77796. + put_unaligned(cpu_to_le64(ch->nr_files), &header->nr_files);
  77797. + put_unaligned(cpu_to_le64(ch->next_oid), &header->next_oid);
  77798. +}
  77799. +
  77800. +/* prepare ordinary wander record block (fill all service fields) */
  77801. +static void
  77802. +format_wander_record(struct commit_handle *ch, jnode *node, __u32 serial)
  77803. +{
  77804. + struct wander_record_header *LRH;
  77805. + jnode *next;
  77806. +
  77807. + assert("zam-464", node != NULL);
  77808. +
  77809. + LRH = (struct wander_record_header *)jdata(node);
  77810. + next = list_entry(node->capture_link.next, jnode, capture_link);
  77811. +
  77812. + if (&ch->tx_list == &next->capture_link)
  77813. + next = list_entry(ch->tx_list.next, jnode, capture_link);
  77814. +
  77815. + assert("zam-465", LRH != NULL);
  77816. + assert("zam-463",
  77817. + ch->super->s_blocksize > sizeof(struct wander_record_header));
  77818. +
  77819. + memset(jdata(node), 0, (size_t) ch->super->s_blocksize);
  77820. + memcpy(jdata(node), WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE);
  77821. +
  77822. + put_unaligned(cpu_to_le32(ch->tx_size), &LRH->total);
  77823. + put_unaligned(cpu_to_le32(serial), &LRH->serial);
  77824. + put_unaligned(cpu_to_le64(*jnode_get_block(next)), &LRH->next_block);
  77825. +}
  77826. +
  77827. +/* add one wandered map entry to formatted wander record */
  77828. +static void
  77829. +store_entry(jnode * node, int index, const reiser4_block_nr * a,
  77830. + const reiser4_block_nr * b)
  77831. +{
  77832. + char *data;
  77833. + struct wander_entry *pairs;
  77834. +
  77835. + data = jdata(node);
  77836. + assert("zam-451", data != NULL);
  77837. +
  77838. + pairs =
  77839. + (struct wander_entry *)(data + sizeof(struct wander_record_header));
  77840. +
  77841. + put_unaligned(cpu_to_le64(*a), &pairs[index].original);
  77842. + put_unaligned(cpu_to_le64(*b), &pairs[index].wandered);
  77843. +}
  77844. +
  77845. +/* currently, wander records contains contain only wandered map, which depend on
  77846. + overwrite set size */
  77847. +static void get_tx_size(struct commit_handle *ch)
  77848. +{
  77849. + assert("zam-440", ch->overwrite_set_size != 0);
  77850. + assert("zam-695", ch->tx_size == 0);
  77851. +
  77852. + /* count all ordinary wander records
  77853. + (<overwrite_set_size> - 1) / <wander_record_capacity> + 1 and add one
  77854. + for tx head block */
  77855. + ch->tx_size =
  77856. + (ch->overwrite_set_size - 1) / wander_record_capacity(ch->super) +
  77857. + 2;
  77858. +}
  77859. +
  77860. +/* A special structure for using in store_wmap_actor() for saving its state
  77861. + between calls */
  77862. +struct store_wmap_params {
  77863. + jnode *cur; /* jnode of current wander record to fill */
  77864. + int idx; /* free element index in wander record */
  77865. + int capacity; /* capacity */
  77866. +
  77867. +#if REISER4_DEBUG
  77868. + struct list_head *tx_list;
  77869. +#endif
  77870. +};
  77871. +
  77872. +/* an actor for use in blocknr_set_iterator routine which populates the list
  77873. + of pre-formatted wander records by wandered map info */
  77874. +static int
  77875. +store_wmap_actor(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
  77876. + const reiser4_block_nr * b, void *data)
  77877. +{
  77878. + struct store_wmap_params *params = data;
  77879. +
  77880. + if (params->idx >= params->capacity) {
  77881. + /* a new wander record should be taken from the tx_list */
  77882. + params->cur = list_entry(params->cur->capture_link.next, jnode, capture_link);
  77883. + assert("zam-454",
  77884. + params->tx_list != &params->cur->capture_link);
  77885. +
  77886. + params->idx = 0;
  77887. + }
  77888. +
  77889. + store_entry(params->cur, params->idx, a, b);
  77890. + params->idx++;
  77891. +
  77892. + return 0;
  77893. +}
  77894. +
  77895. +/* This function is called after Relocate set gets written to disk, Overwrite
  77896. + set is written to wandered locations and all wander records are written
  77897. + also. Updated journal header blocks contains a pointer (block number) to
  77898. + first wander record of the just written transaction */
  77899. +static int update_journal_header(struct commit_handle *ch)
  77900. +{
  77901. + struct reiser4_super_info_data *sbinfo = get_super_private(ch->super);
  77902. + jnode *jh = sbinfo->journal_header;
  77903. + jnode *head = list_entry(ch->tx_list.next, jnode, capture_link);
  77904. + int ret;
  77905. +
  77906. + format_journal_header(ch);
  77907. +
  77908. + ret = write_jnodes_to_disk_extent(jh, 1, jnode_get_block(jh), NULL,
  77909. + WRITEOUT_FLUSH_FUA);
  77910. + if (ret)
  77911. + return ret;
  77912. +
  77913. + /* blk_run_address_space(sbinfo->fake->i_mapping);
  77914. + * blk_run_queues(); */
  77915. +
  77916. + ret = jwait_io(jh, WRITE);
  77917. +
  77918. + if (ret)
  77919. + return ret;
  77920. +
  77921. + sbinfo->last_committed_tx = *jnode_get_block(head);
  77922. +
  77923. + return 0;
  77924. +}
  77925. +
  77926. +/* This function is called after write-back is finished. We update journal
  77927. + footer block and free blocks which were occupied by wandered blocks and
  77928. + transaction wander records */
  77929. +static int update_journal_footer(struct commit_handle *ch)
  77930. +{
  77931. + reiser4_super_info_data *sbinfo = get_super_private(ch->super);
  77932. +
  77933. + jnode *jf = sbinfo->journal_footer;
  77934. +
  77935. + int ret;
  77936. +
  77937. + format_journal_footer(ch);
  77938. +
  77939. + ret = write_jnodes_to_disk_extent(jf, 1, jnode_get_block(jf), NULL,
  77940. + WRITEOUT_FLUSH_FUA);
  77941. + if (ret)
  77942. + return ret;
  77943. +
  77944. + /* blk_run_address_space(sbinfo->fake->i_mapping);
  77945. + * blk_run_queue(); */
  77946. +
  77947. + ret = jwait_io(jf, WRITE);
  77948. + if (ret)
  77949. + return ret;
  77950. +
  77951. + return 0;
  77952. +}
  77953. +
  77954. +/* free block numbers of wander records of already written in place transaction */
  77955. +static void dealloc_tx_list(struct commit_handle *ch)
  77956. +{
  77957. + while (!list_empty(&ch->tx_list)) {
  77958. + jnode *cur = list_entry(ch->tx_list.next, jnode, capture_link);
  77959. + list_del(&cur->capture_link);
  77960. + ON_DEBUG(INIT_LIST_HEAD(&cur->capture_link));
  77961. + reiser4_dealloc_block(jnode_get_block(cur), 0,
  77962. + BA_DEFER | BA_FORMATTED);
  77963. +
  77964. + unpin_jnode_data(cur);
  77965. + reiser4_drop_io_head(cur);
  77966. + }
  77967. +}
  77968. +
  77969. +/* An actor for use in block_nr_iterator() routine which frees wandered blocks
  77970. + from atom's overwrite set. */
  77971. +static int
  77972. +dealloc_wmap_actor(txn_atom * atom UNUSED_ARG,
  77973. + const reiser4_block_nr * a UNUSED_ARG,
  77974. + const reiser4_block_nr * b, void *data UNUSED_ARG)
  77975. +{
  77976. +
  77977. + assert("zam-499", b != NULL);
  77978. + assert("zam-500", *b != 0);
  77979. + assert("zam-501", !reiser4_blocknr_is_fake(b));
  77980. +
  77981. + reiser4_dealloc_block(b, 0, BA_DEFER | BA_FORMATTED);
  77982. + return 0;
  77983. +}
  77984. +
  77985. +/* free wandered block locations of already written in place transaction */
  77986. +static void dealloc_wmap(struct commit_handle *ch)
  77987. +{
  77988. + assert("zam-696", ch->atom != NULL);
  77989. +
  77990. + blocknr_set_iterator(ch->atom, &ch->atom->wandered_map,
  77991. + dealloc_wmap_actor, NULL, 1);
  77992. +}
  77993. +
  77994. +/* helper function for alloc wandered blocks, which refill set of block
  77995. + numbers needed for wandered blocks */
  77996. +static int
  77997. +get_more_wandered_blocks(int count, reiser4_block_nr * start, int *len)
  77998. +{
  77999. + reiser4_blocknr_hint hint;
  78000. + int ret;
  78001. +
  78002. + reiser4_block_nr wide_len = count;
  78003. +
  78004. + /* FIXME-ZAM: A special policy needed for allocation of wandered blocks
  78005. + ZAM-FIXME-HANS: yes, what happened to our discussion of using a fixed
  78006. + reserved allocation area so as to get the best qualities of fixed
  78007. + journals? */
  78008. + reiser4_blocknr_hint_init(&hint);
  78009. + hint.block_stage = BLOCK_GRABBED;
  78010. +
  78011. + ret = reiser4_alloc_blocks(&hint, start, &wide_len,
  78012. + BA_FORMATTED | BA_USE_DEFAULT_SEARCH_START);
  78013. + *len = (int)wide_len;
  78014. +
  78015. + return ret;
  78016. +}
  78017. +
  78018. +/*
  78019. + * roll back changes made before issuing BIO in the case of IO error.
  78020. + */
  78021. +static void undo_bio(struct bio *bio)
  78022. +{
  78023. + struct bio_vec *bvec;
  78024. + struct bvec_iter_all iter_all;
  78025. +
  78026. + bio_for_each_segment_all(bvec, bio, iter_all) {
  78027. + struct page *pg;
  78028. + jnode *node;
  78029. +
  78030. + pg = bvec->bv_page;
  78031. + end_page_writeback(pg);
  78032. + node = jprivate(pg);
  78033. + spin_lock_jnode(node);
  78034. + JF_CLR(node, JNODE_WRITEBACK);
  78035. + JF_SET(node, JNODE_DIRTY);
  78036. + spin_unlock_jnode(node);
  78037. + }
  78038. + bio_put(bio);
  78039. +}
  78040. +
  78041. +/* put overwrite set back to atom's clean list */
  78042. +static void put_overwrite_set(struct commit_handle *ch)
  78043. +{
  78044. + jnode *cur;
  78045. +
  78046. + list_for_each_entry(cur, ch->overwrite_set, capture_link)
  78047. + jrelse_tail(cur);
  78048. +}
  78049. +
  78050. +/* Count overwrite set size, grab disk space for wandered blocks allocation.
  78051. + Since we have a separate list for atom's overwrite set we just scan the list,
  78052. + count bitmap and other not leaf nodes which wandered blocks allocation we
  78053. + have to grab space for. */
  78054. +static int get_overwrite_set(struct commit_handle *ch)
  78055. +{
  78056. + int ret;
  78057. + jnode *cur;
  78058. + __u64 nr_not_leaves = 0;
  78059. +#if REISER4_DEBUG
  78060. + __u64 nr_formatted_leaves = 0;
  78061. + __u64 nr_unformatted_leaves = 0;
  78062. +#endif
  78063. +
  78064. + assert("zam-697", ch->overwrite_set_size == 0);
  78065. +
  78066. + ch->overwrite_set = ATOM_OVRWR_LIST(ch->atom);
  78067. + cur = list_entry(ch->overwrite_set->next, jnode, capture_link);
  78068. +
  78069. + while (ch->overwrite_set != &cur->capture_link) {
  78070. + jnode *next = list_entry(cur->capture_link.next, jnode, capture_link);
  78071. +
  78072. + /* Count bitmap locks for getting correct statistics what number
  78073. + * of blocks were cleared by the transaction commit. */
  78074. + if (jnode_get_type(cur) == JNODE_BITMAP)
  78075. + ch->nr_bitmap++;
  78076. +
  78077. + assert("zam-939", JF_ISSET(cur, JNODE_OVRWR)
  78078. + || jnode_get_type(cur) == JNODE_BITMAP);
  78079. +
  78080. + if (jnode_is_znode(cur) && znode_above_root(JZNODE(cur))) {
  78081. + /* we replace fake znode by another (real)
  78082. + znode which is suggested by disk_layout
  78083. + plugin */
  78084. +
  78085. + /* FIXME: it looks like fake znode should be
  78086. + replaced by jnode supplied by
  78087. + disk_layout. */
  78088. +
  78089. + struct super_block *s = reiser4_get_current_sb();
  78090. + reiser4_super_info_data *sbinfo =
  78091. + get_current_super_private();
  78092. +
  78093. + if (sbinfo->df_plug->log_super) {
  78094. + jnode *sj = sbinfo->df_plug->log_super(s);
  78095. +
  78096. + assert("zam-593", sj != NULL);
  78097. +
  78098. + if (IS_ERR(sj))
  78099. + return PTR_ERR(sj);
  78100. +
  78101. + spin_lock_jnode(sj);
  78102. + JF_SET(sj, JNODE_OVRWR);
  78103. + insert_into_atom_ovrwr_list(ch->atom, sj);
  78104. + spin_unlock_jnode(sj);
  78105. +
  78106. + /* jload it as the rest of overwrite set */
  78107. + jload_gfp(sj, reiser4_ctx_gfp_mask_get(), 0);
  78108. +
  78109. + ch->overwrite_set_size++;
  78110. + }
  78111. + spin_lock_jnode(cur);
  78112. + reiser4_uncapture_block(cur);
  78113. + jput(cur);
  78114. +
  78115. + } else {
  78116. + int ret;
  78117. + ch->overwrite_set_size++;
  78118. + ret = jload_gfp(cur, reiser4_ctx_gfp_mask_get(), 0);
  78119. + if (ret)
  78120. + reiser4_panic("zam-783",
  78121. + "cannot load e-flushed jnode back (ret = %d)\n",
  78122. + ret);
  78123. + }
  78124. +
  78125. + /* Count not leaves here because we have to grab disk space
  78126. + * for wandered blocks. They were not counted as "flush
  78127. + * reserved". Counting should be done _after_ nodes are pinned
  78128. + * into memory by jload(). */
  78129. + if (!jnode_is_leaf(cur))
  78130. + nr_not_leaves++;
  78131. + else {
  78132. +#if REISER4_DEBUG
  78133. + /* at this point @cur either has JNODE_FLUSH_RESERVED
  78134. + * or is eflushed. Locking is not strong enough to
  78135. + * write an assertion checking for this. */
  78136. + if (jnode_is_znode(cur))
  78137. + nr_formatted_leaves++;
  78138. + else
  78139. + nr_unformatted_leaves++;
  78140. +#endif
  78141. + JF_CLR(cur, JNODE_FLUSH_RESERVED);
  78142. + }
  78143. +
  78144. + cur = next;
  78145. + }
  78146. +
  78147. + /* Grab space for writing (wandered blocks) of not leaves found in
  78148. + * overwrite set. */
  78149. + ret = reiser4_grab_space_force(nr_not_leaves, BA_RESERVED);
  78150. + if (ret)
  78151. + return ret;
  78152. +
  78153. + /* Disk space for allocation of wandered blocks of leaf nodes already
  78154. + * reserved as "flush reserved", move it to grabbed space counter. */
  78155. + spin_lock_atom(ch->atom);
  78156. + assert("zam-940",
  78157. + nr_formatted_leaves + nr_unformatted_leaves <=
  78158. + ch->atom->flush_reserved);
  78159. + flush_reserved2grabbed(ch->atom, ch->atom->flush_reserved);
  78160. + spin_unlock_atom(ch->atom);
  78161. +
  78162. + return ch->overwrite_set_size;
  78163. +}
  78164. +
  78165. +/**
  78166. + * write_jnodes_to_disk_extent - submit write request
  78167. + * @head:
  78168. + * @first: first jnode of the list
  78169. + * @nr: number of jnodes on the list
  78170. + * @block_p:
  78171. + * @fq:
  78172. + * @flags: used to decide whether page is to get PG_reclaim flag
  78173. + *
  78174. + * Submits a write request for @nr jnodes beginning from the @first, other
  78175. + * jnodes are after the @first on the double-linked "capture" list. All jnodes
  78176. + * will be written to the disk region of @nr blocks starting with @block_p block
  78177. + * number. If @fq is not NULL it means that waiting for i/o completion will be
  78178. + * done more efficiently by using flush_queue_t objects.
  78179. + * This function is the one which writes list of jnodes in batch mode. It does
  78180. + * all low-level things as bio construction and page states manipulation.
  78181. + *
  78182. + * ZAM-FIXME-HANS: brief me on why this function exists, and why bios are
  78183. + * aggregated in this function instead of being left to the layers below
  78184. + *
  78185. + * FIXME: ZAM->HANS: What layer are you talking about? Can you point me to that?
  78186. + * Why that layer needed? Why BIOs cannot be constructed here?
  78187. + */
  78188. +static int write_jnodes_to_disk_extent(
  78189. + jnode *first, int nr, const reiser4_block_nr *block_p,
  78190. + flush_queue_t *fq, int flags)
  78191. +{
  78192. + struct super_block *super = reiser4_get_current_sb();
  78193. + int op_flags = (flags & WRITEOUT_FLUSH_FUA) ? REQ_PREFLUSH | REQ_FUA : 0;
  78194. + jnode *cur = first;
  78195. + reiser4_block_nr block;
  78196. +
  78197. + assert("zam-571", first != NULL);
  78198. + assert("zam-572", block_p != NULL);
  78199. + assert("zam-570", nr > 0);
  78200. +
  78201. + block = *block_p;
  78202. +
  78203. + while (nr > 0) {
  78204. + struct bio *bio;
  78205. + int nr_blocks = bio_max_segs(nr);
  78206. + int i;
  78207. + int nr_used;
  78208. +
  78209. + bio = bio_alloc(GFP_NOIO, nr_blocks);
  78210. + if (!bio)
  78211. + return RETERR(-ENOMEM);
  78212. +
  78213. + bio_set_dev(bio, super->s_bdev);
  78214. + bio->bi_iter.bi_sector = block * (super->s_blocksize >> 9);
  78215. + for (nr_used = 0, i = 0; i < nr_blocks; i++) {
  78216. + struct page *pg;
  78217. +
  78218. + pg = jnode_page(cur);
  78219. + assert("zam-573", pg != NULL);
  78220. +
  78221. + get_page(pg);
  78222. +
  78223. + lock_and_wait_page_writeback(pg);
  78224. +
  78225. + if (!bio_add_page(bio, pg, super->s_blocksize, 0)) {
  78226. + /*
  78227. + * underlying device is satiated. Stop adding
  78228. + * pages to the bio.
  78229. + */
  78230. + unlock_page(pg);
  78231. + put_page(pg);
  78232. + break;
  78233. + }
  78234. +
  78235. + spin_lock_jnode(cur);
  78236. + assert("nikita-3166",
  78237. + pg->mapping == jnode_get_mapping(cur));
  78238. + assert("zam-912", !JF_ISSET(cur, JNODE_WRITEBACK));
  78239. +#if REISER4_DEBUG
  78240. + spin_lock(&cur->load);
  78241. + assert("nikita-3165", !jnode_is_releasable(cur));
  78242. + spin_unlock(&cur->load);
  78243. +#endif
  78244. + JF_SET(cur, JNODE_WRITEBACK);
  78245. + JF_CLR(cur, JNODE_DIRTY);
  78246. + ON_DEBUG(cur->written++);
  78247. +
  78248. + assert("edward-1647",
  78249. + ergo(jnode_is_znode(cur), JF_ISSET(cur, JNODE_PARSED)));
  78250. + spin_unlock_jnode(cur);
  78251. + /*
  78252. + * update checksum
  78253. + */
  78254. + if (jnode_is_znode(cur)) {
  78255. + zload(JZNODE(cur));
  78256. + if (node_plugin_by_node(JZNODE(cur))->csum)
  78257. + node_plugin_by_node(JZNODE(cur))->csum(JZNODE(cur), 0);
  78258. + zrelse(JZNODE(cur));
  78259. + }
  78260. + ClearPageError(pg);
  78261. + set_page_writeback(pg);
  78262. +
  78263. + if (get_current_context()->entd) {
  78264. + /* this is ent thread */
  78265. + entd_context *ent = get_entd_context(super);
  78266. + struct wbq *rq, *next;
  78267. +
  78268. + spin_lock(&ent->guard);
  78269. +
  78270. + if (pg == ent->cur_request->page) {
  78271. + /*
  78272. + * entd is called for this page. This
  78273. + * request is not in th etodo list
  78274. + */
  78275. + ent->cur_request->written = 1;
  78276. + } else {
  78277. + /*
  78278. + * if we have written a page for which writepage
  78279. + * is called for - move request to another list.
  78280. + */
  78281. + list_for_each_entry_safe(rq, next, &ent->todo_list, link) {
  78282. + assert("", rq->magic == WBQ_MAGIC);
  78283. + if (pg == rq->page) {
  78284. + /*
  78285. + * remove request from
  78286. + * entd's queue, but do
  78287. + * not wake up a thread
  78288. + * which put this
  78289. + * request
  78290. + */
  78291. + list_del_init(&rq->link);
  78292. + ent->nr_todo_reqs --;
  78293. + list_add_tail(&rq->link, &ent->done_list);
  78294. + ent->nr_done_reqs ++;
  78295. + rq->written = 1;
  78296. + break;
  78297. + }
  78298. + }
  78299. + }
  78300. + spin_unlock(&ent->guard);
  78301. + }
  78302. +
  78303. + clear_page_dirty_for_io(pg);
  78304. +
  78305. + unlock_page(pg);
  78306. +
  78307. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78308. + nr_used++;
  78309. + }
  78310. + if (nr_used > 0) {
  78311. + assert("nikita-3453",
  78312. + bio->bi_iter.bi_size == super->s_blocksize * nr_used);
  78313. +
  78314. + /* Check if we are allowed to write at all */
  78315. + if (sb_rdonly(super))
  78316. + undo_bio(bio);
  78317. + else {
  78318. + add_fq_to_bio(fq, bio);
  78319. + bio_get(bio);
  78320. + bio_set_op_attrs(bio, WRITE, op_flags);
  78321. + submit_bio(bio);
  78322. + bio_put(bio);
  78323. + }
  78324. +
  78325. + block += nr_used - 1;
  78326. + update_blocknr_hint_default(super, &block);
  78327. + block += 1;
  78328. + } else {
  78329. + bio_put(bio);
  78330. + }
  78331. + nr -= nr_used;
  78332. + }
  78333. +
  78334. + return 0;
  78335. +}
  78336. +
  78337. +/* This is a procedure which recovers a contiguous sequences of disk block
  78338. + numbers in the given list of j-nodes and submits write requests on this
  78339. + per-sequence basis */
  78340. +int
  78341. +write_jnode_list(struct list_head *head, flush_queue_t *fq,
  78342. + long *nr_submitted, int flags)
  78343. +{
  78344. + int ret;
  78345. + jnode *beg = list_entry(head->next, jnode, capture_link);
  78346. +
  78347. + while (head != &beg->capture_link) {
  78348. + int nr = 1;
  78349. + jnode *cur = list_entry(beg->capture_link.next, jnode, capture_link);
  78350. +
  78351. + while (head != &cur->capture_link) {
  78352. + if (*jnode_get_block(cur) != *jnode_get_block(beg) + nr)
  78353. + break;
  78354. + ++nr;
  78355. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78356. + }
  78357. +
  78358. + ret = write_jnodes_to_disk_extent(
  78359. + beg, nr, jnode_get_block(beg), fq, flags);
  78360. + if (ret)
  78361. + return ret;
  78362. +
  78363. + if (nr_submitted)
  78364. + *nr_submitted += nr;
  78365. +
  78366. + beg = cur;
  78367. + }
  78368. +
  78369. + return 0;
  78370. +}
  78371. +
  78372. +/* add given wandered mapping to atom's wandered map */
  78373. +static int
  78374. +add_region_to_wmap(jnode * cur, int len, const reiser4_block_nr * block_p)
  78375. +{
  78376. + int ret;
  78377. + blocknr_set_entry *new_bsep = NULL;
  78378. + reiser4_block_nr block;
  78379. +
  78380. + txn_atom *atom;
  78381. +
  78382. + assert("zam-568", block_p != NULL);
  78383. + block = *block_p;
  78384. + assert("zam-569", len > 0);
  78385. +
  78386. + while ((len--) > 0) {
  78387. + do {
  78388. + atom = get_current_atom_locked();
  78389. + assert("zam-536",
  78390. + !reiser4_blocknr_is_fake(jnode_get_block(cur)));
  78391. + ret =
  78392. + blocknr_set_add_pair(atom, &atom->wandered_map,
  78393. + &new_bsep,
  78394. + jnode_get_block(cur), &block);
  78395. + } while (ret == -E_REPEAT);
  78396. +
  78397. + if (ret) {
  78398. + /* deallocate blocks which were not added to wandered
  78399. + map */
  78400. + reiser4_block_nr wide_len = len;
  78401. +
  78402. + reiser4_dealloc_blocks(&block, &wide_len,
  78403. + BLOCK_NOT_COUNTED,
  78404. + BA_FORMATTED
  78405. + /* formatted, without defer */ );
  78406. +
  78407. + return ret;
  78408. + }
  78409. +
  78410. + spin_unlock_atom(atom);
  78411. +
  78412. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78413. + ++block;
  78414. + }
  78415. +
  78416. + return 0;
  78417. +}
  78418. +
  78419. +/* Allocate wandered blocks for current atom's OVERWRITE SET and immediately
  78420. + submit IO for allocated blocks. We assume that current atom is in a stage
  78421. + when any atom fusion is impossible and atom is unlocked and it is safe. */
  78422. +static int alloc_wandered_blocks(struct commit_handle *ch, flush_queue_t *fq)
  78423. +{
  78424. + reiser4_block_nr block;
  78425. +
  78426. + int rest;
  78427. + int len;
  78428. + int ret;
  78429. +
  78430. + jnode *cur;
  78431. +
  78432. + assert("zam-534", ch->overwrite_set_size > 0);
  78433. +
  78434. + rest = ch->overwrite_set_size;
  78435. +
  78436. + cur = list_entry(ch->overwrite_set->next, jnode, capture_link);
  78437. + while (ch->overwrite_set != &cur->capture_link) {
  78438. + assert("zam-567", JF_ISSET(cur, JNODE_OVRWR));
  78439. +
  78440. + ret = get_more_wandered_blocks(rest, &block, &len);
  78441. + if (ret)
  78442. + return ret;
  78443. +
  78444. + rest -= len;
  78445. +
  78446. + ret = add_region_to_wmap(cur, len, &block);
  78447. + if (ret)
  78448. + return ret;
  78449. +
  78450. + ret = write_jnodes_to_disk_extent(cur, len, &block, fq, 0);
  78451. + if (ret)
  78452. + return ret;
  78453. +
  78454. + while ((len--) > 0) {
  78455. + assert("zam-604",
  78456. + ch->overwrite_set != &cur->capture_link);
  78457. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78458. + }
  78459. + }
  78460. +
  78461. + return 0;
  78462. +}
  78463. +
  78464. +/* allocate given number of nodes over the journal area and link them into a
  78465. + list, return pointer to the first jnode in the list */
  78466. +static int alloc_tx(struct commit_handle *ch, flush_queue_t * fq)
  78467. +{
  78468. + reiser4_blocknr_hint hint;
  78469. + reiser4_block_nr allocated = 0;
  78470. + reiser4_block_nr first, len;
  78471. + jnode *cur;
  78472. + jnode *txhead;
  78473. + int ret;
  78474. + reiser4_context *ctx;
  78475. + reiser4_super_info_data *sbinfo;
  78476. +
  78477. + assert("zam-698", ch->tx_size > 0);
  78478. + assert("zam-699", list_empty_careful(&ch->tx_list));
  78479. +
  78480. + ctx = get_current_context();
  78481. + sbinfo = get_super_private(ctx->super);
  78482. +
  78483. + while (allocated < (unsigned)ch->tx_size) {
  78484. + len = (ch->tx_size - allocated);
  78485. +
  78486. + reiser4_blocknr_hint_init(&hint);
  78487. +
  78488. + hint.block_stage = BLOCK_GRABBED;
  78489. +
  78490. + /* FIXME: there should be some block allocation policy for
  78491. + nodes which contain wander records */
  78492. +
  78493. + /* We assume that disk space for wandered record blocks can be
  78494. + * taken from reserved area. */
  78495. + ret = reiser4_alloc_blocks(&hint, &first, &len,
  78496. + BA_FORMATTED | BA_RESERVED |
  78497. + BA_USE_DEFAULT_SEARCH_START);
  78498. + reiser4_blocknr_hint_done(&hint);
  78499. +
  78500. + if (ret)
  78501. + return ret;
  78502. +
  78503. + allocated += len;
  78504. +
  78505. + /* create jnodes for all wander records */
  78506. + while (len--) {
  78507. + cur = reiser4_alloc_io_head(&first);
  78508. +
  78509. + if (cur == NULL) {
  78510. + ret = RETERR(-ENOMEM);
  78511. + goto free_not_assigned;
  78512. + }
  78513. +
  78514. + ret = jinit_new(cur, reiser4_ctx_gfp_mask_get());
  78515. +
  78516. + if (ret != 0) {
  78517. + jfree(cur);
  78518. + goto free_not_assigned;
  78519. + }
  78520. +
  78521. + pin_jnode_data(cur);
  78522. +
  78523. + list_add_tail(&cur->capture_link, &ch->tx_list);
  78524. +
  78525. + first++;
  78526. + }
  78527. + }
  78528. +
  78529. + { /* format a on-disk linked list of wander records */
  78530. + int serial = 1;
  78531. +
  78532. + txhead = list_entry(ch->tx_list.next, jnode, capture_link);
  78533. + format_tx_head(ch);
  78534. +
  78535. + cur = list_entry(txhead->capture_link.next, jnode, capture_link);
  78536. + while (&ch->tx_list != &cur->capture_link) {
  78537. + format_wander_record(ch, cur, serial++);
  78538. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78539. + }
  78540. + }
  78541. +
  78542. + { /* Fill wander records with Wandered Set */
  78543. + struct store_wmap_params params;
  78544. + txn_atom *atom;
  78545. +
  78546. + params.cur = list_entry(txhead->capture_link.next, jnode, capture_link);
  78547. +
  78548. + params.idx = 0;
  78549. + params.capacity =
  78550. + wander_record_capacity(reiser4_get_current_sb());
  78551. +
  78552. + atom = get_current_atom_locked();
  78553. + blocknr_set_iterator(atom, &atom->wandered_map,
  78554. + &store_wmap_actor, &params, 0);
  78555. + spin_unlock_atom(atom);
  78556. + }
  78557. +
  78558. + { /* relse all jnodes from tx_list */
  78559. + cur = list_entry(ch->tx_list.next, jnode, capture_link);
  78560. + while (&ch->tx_list != &cur->capture_link) {
  78561. + jrelse(cur);
  78562. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78563. + }
  78564. + }
  78565. +
  78566. + ret = write_jnode_list(&ch->tx_list, fq, NULL, 0);
  78567. +
  78568. + return ret;
  78569. +
  78570. + free_not_assigned:
  78571. + /* We deallocate blocks not yet assigned to jnodes on tx_list. The
  78572. + caller takes care about invalidating of tx list */
  78573. + reiser4_dealloc_blocks(&first, &len, BLOCK_NOT_COUNTED, BA_FORMATTED);
  78574. +
  78575. + return ret;
  78576. +}
  78577. +
  78578. +static int commit_tx(struct commit_handle *ch)
  78579. +{
  78580. + flush_queue_t *fq;
  78581. + int ret;
  78582. +
  78583. + /* Grab more space for wandered records. */
  78584. + ret = reiser4_grab_space_force((__u64) (ch->tx_size), BA_RESERVED);
  78585. + if (ret)
  78586. + return ret;
  78587. +
  78588. + fq = get_fq_for_current_atom();
  78589. + if (IS_ERR(fq))
  78590. + return PTR_ERR(fq);
  78591. +
  78592. + spin_unlock_atom(fq->atom);
  78593. + do {
  78594. + ret = alloc_wandered_blocks(ch, fq);
  78595. + if (ret)
  78596. + break;
  78597. + ret = alloc_tx(ch, fq);
  78598. + if (ret)
  78599. + break;
  78600. + } while (0);
  78601. +
  78602. + reiser4_fq_put(fq);
  78603. + if (ret)
  78604. + return ret;
  78605. + ret = current_atom_finish_all_fq();
  78606. + if (ret)
  78607. + return ret;
  78608. + return update_journal_header(ch);
  78609. +}
  78610. +
  78611. +static int write_tx_back(struct commit_handle * ch)
  78612. +{
  78613. + flush_queue_t *fq;
  78614. + int ret;
  78615. +
  78616. + fq = get_fq_for_current_atom();
  78617. + if (IS_ERR(fq))
  78618. + return PTR_ERR(fq);
  78619. + spin_unlock_atom(fq->atom);
  78620. + ret = write_jnode_list(
  78621. + ch->overwrite_set, fq, NULL, WRITEOUT_FOR_PAGE_RECLAIM);
  78622. + reiser4_fq_put(fq);
  78623. + if (ret)
  78624. + return ret;
  78625. + ret = current_atom_finish_all_fq();
  78626. + if (ret)
  78627. + return ret;
  78628. + return update_journal_footer(ch);
  78629. +}
  78630. +
  78631. +/* We assume that at this moment all captured blocks are marked as RELOC or
  78632. + WANDER (belong to Relocate o Overwrite set), all nodes from Relocate set
  78633. + are submitted to write.
  78634. +*/
  78635. +
  78636. +int reiser4_write_logs(long *nr_submitted)
  78637. +{
  78638. + txn_atom *atom;
  78639. + struct super_block *super = reiser4_get_current_sb();
  78640. + reiser4_super_info_data *sbinfo = get_super_private(super);
  78641. + struct commit_handle ch;
  78642. + int ret;
  78643. +
  78644. + writeout_mode_enable();
  78645. +
  78646. + /* block allocator may add j-nodes to the clean_list */
  78647. + ret = reiser4_pre_commit_hook();
  78648. + if (ret)
  78649. + return ret;
  78650. +
  78651. + /* No locks are required if we take atom which stage >=
  78652. + * ASTAGE_PRE_COMMIT */
  78653. + atom = get_current_context()->trans->atom;
  78654. + assert("zam-965", atom != NULL);
  78655. +
  78656. + /* relocate set is on the atom->clean_nodes list after
  78657. + * current_atom_complete_writes() finishes. It can be safely
  78658. + * uncaptured after commit_mutex is locked, because any atom that
  78659. + * captures these nodes is guaranteed to commit after current one.
  78660. + *
  78661. + * This can only be done after reiser4_pre_commit_hook(), because it is where
  78662. + * early flushed jnodes with CREATED bit are transferred to the
  78663. + * overwrite list. */
  78664. + reiser4_invalidate_list(ATOM_CLEAN_LIST(atom));
  78665. + spin_lock_atom(atom);
  78666. + /* There might be waiters for the relocate nodes which we have
  78667. + * released, wake them up. */
  78668. + reiser4_atom_send_event(atom);
  78669. + spin_unlock_atom(atom);
  78670. +
  78671. + if (REISER4_DEBUG) {
  78672. + int level;
  78673. +
  78674. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; ++level)
  78675. + assert("nikita-3352",
  78676. + list_empty_careful(ATOM_DIRTY_LIST(atom, level)));
  78677. + }
  78678. +
  78679. + sbinfo->nr_files_committed += (unsigned)atom->nr_objects_created;
  78680. + sbinfo->nr_files_committed -= (unsigned)atom->nr_objects_deleted;
  78681. +
  78682. + init_commit_handle(&ch, atom);
  78683. +
  78684. + ch.free_blocks = sbinfo->blocks_free_committed;
  78685. + ch.nr_files = sbinfo->nr_files_committed;
  78686. + /* ZAM-FIXME-HANS: email me what the contention level is for the super
  78687. + * lock. */
  78688. + ch.next_oid = oid_next(super);
  78689. +
  78690. + /* count overwrite set and place it in a separate list */
  78691. + ret = get_overwrite_set(&ch);
  78692. +
  78693. + if (ret <= 0) {
  78694. + /* It is possible that overwrite set is empty here, it means
  78695. + all captured nodes are clean */
  78696. + goto up_and_ret;
  78697. + }
  78698. +
  78699. + /* Inform the caller about what number of dirty pages will be
  78700. + * submitted to disk. */
  78701. + *nr_submitted += ch.overwrite_set_size - ch.nr_bitmap;
  78702. +
  78703. + /* count all records needed for storing of the wandered set */
  78704. + get_tx_size(&ch);
  78705. +
  78706. + ret = commit_tx(&ch);
  78707. + if (ret)
  78708. + goto up_and_ret;
  78709. +
  78710. + spin_lock_atom(atom);
  78711. + reiser4_atom_set_stage(atom, ASTAGE_POST_COMMIT);
  78712. + spin_unlock_atom(atom);
  78713. + reiser4_post_commit_hook();
  78714. +
  78715. + ret = write_tx_back(&ch);
  78716. +
  78717. + up_and_ret:
  78718. + if (ret) {
  78719. + /* there could be fq attached to current atom; the only way to
  78720. + remove them is: */
  78721. + current_atom_finish_all_fq();
  78722. + }
  78723. +
  78724. + /* free blocks of flushed transaction */
  78725. + dealloc_tx_list(&ch);
  78726. + dealloc_wmap(&ch);
  78727. +
  78728. + reiser4_post_write_back_hook();
  78729. +
  78730. + put_overwrite_set(&ch);
  78731. +
  78732. + done_commit_handle(&ch);
  78733. +
  78734. + writeout_mode_disable();
  78735. +
  78736. + return ret;
  78737. +}
  78738. +
  78739. +/* consistency checks for journal data/control blocks: header, footer, log
  78740. + records, transactions head blocks. All functions return zero on success. */
  78741. +
  78742. +static int check_journal_header(const jnode * node UNUSED_ARG)
  78743. +{
  78744. + /* FIXME: journal header has no magic field yet. */
  78745. + return 0;
  78746. +}
  78747. +
  78748. +/* wait for write completion for all jnodes from given list */
  78749. +static int wait_on_jnode_list(struct list_head *head)
  78750. +{
  78751. + jnode *scan;
  78752. + int ret = 0;
  78753. +
  78754. + list_for_each_entry(scan, head, capture_link) {
  78755. + struct page *pg = jnode_page(scan);
  78756. +
  78757. + if (pg) {
  78758. + if (PageWriteback(pg))
  78759. + wait_on_page_writeback(pg);
  78760. +
  78761. + if (PageError(pg))
  78762. + ret++;
  78763. + }
  78764. + }
  78765. +
  78766. + return ret;
  78767. +}
  78768. +
  78769. +static int check_journal_footer(const jnode * node UNUSED_ARG)
  78770. +{
  78771. + /* FIXME: journal footer has no magic field yet. */
  78772. + return 0;
  78773. +}
  78774. +
  78775. +static int check_tx_head(const jnode * node)
  78776. +{
  78777. + struct tx_header *header = (struct tx_header *)jdata(node);
  78778. +
  78779. + if (memcmp(&header->magic, TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE) != 0) {
  78780. + warning("zam-627", "tx head at block %s corrupted\n",
  78781. + sprint_address(jnode_get_block(node)));
  78782. + return RETERR(-EIO);
  78783. + }
  78784. +
  78785. + return 0;
  78786. +}
  78787. +
  78788. +static int check_wander_record(const jnode * node)
  78789. +{
  78790. + struct wander_record_header *RH =
  78791. + (struct wander_record_header *)jdata(node);
  78792. +
  78793. + if (memcmp(&RH->magic, WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE) !=
  78794. + 0) {
  78795. + warning("zam-628", "wander record at block %s corrupted\n",
  78796. + sprint_address(jnode_get_block(node)));
  78797. + return RETERR(-EIO);
  78798. + }
  78799. +
  78800. + return 0;
  78801. +}
  78802. +
  78803. +/* fill commit_handler structure by everything what is needed for update_journal_footer */
  78804. +static int restore_commit_handle(struct commit_handle *ch, jnode *tx_head)
  78805. +{
  78806. + struct tx_header *TXH;
  78807. + int ret;
  78808. +
  78809. + ret = jload(tx_head);
  78810. + if (ret)
  78811. + return ret;
  78812. +
  78813. + TXH = (struct tx_header *)jdata(tx_head);
  78814. +
  78815. + ch->free_blocks = le64_to_cpu(get_unaligned(&TXH->free_blocks));
  78816. + ch->nr_files = le64_to_cpu(get_unaligned(&TXH->nr_files));
  78817. + ch->next_oid = le64_to_cpu(get_unaligned(&TXH->next_oid));
  78818. +
  78819. + jrelse(tx_head);
  78820. +
  78821. + list_add(&tx_head->capture_link, &ch->tx_list);
  78822. +
  78823. + return 0;
  78824. +}
  78825. +
  78826. +/* replay one transaction: restore and write overwrite set in place */
  78827. +static int replay_transaction(const struct super_block *s,
  78828. + jnode * tx_head,
  78829. + const reiser4_block_nr * log_rec_block_p,
  78830. + const reiser4_block_nr * end_block,
  78831. + unsigned int nr_wander_records)
  78832. +{
  78833. + reiser4_block_nr log_rec_block = *log_rec_block_p;
  78834. + struct commit_handle ch;
  78835. + LIST_HEAD(overwrite_set);
  78836. + jnode *log;
  78837. + int ret;
  78838. +
  78839. + init_commit_handle(&ch, NULL);
  78840. + ch.overwrite_set = &overwrite_set;
  78841. +
  78842. + restore_commit_handle(&ch, tx_head);
  78843. +
  78844. + while (log_rec_block != *end_block) {
  78845. + struct wander_record_header *header;
  78846. + struct wander_entry *entry;
  78847. +
  78848. + int i;
  78849. +
  78850. + if (nr_wander_records == 0) {
  78851. + warning("zam-631",
  78852. + "number of wander records in the linked list"
  78853. + " greater than number stored in tx head.\n");
  78854. + ret = RETERR(-EIO);
  78855. + goto free_ow_set;
  78856. + }
  78857. +
  78858. + log = reiser4_alloc_io_head(&log_rec_block);
  78859. + if (log == NULL)
  78860. + return RETERR(-ENOMEM);
  78861. +
  78862. + ret = jload(log);
  78863. + if (ret < 0) {
  78864. + reiser4_drop_io_head(log);
  78865. + return ret;
  78866. + }
  78867. +
  78868. + ret = check_wander_record(log);
  78869. + if (ret) {
  78870. + jrelse(log);
  78871. + reiser4_drop_io_head(log);
  78872. + return ret;
  78873. + }
  78874. +
  78875. + header = (struct wander_record_header *)jdata(log);
  78876. + log_rec_block = le64_to_cpu(get_unaligned(&header->next_block));
  78877. +
  78878. + entry = (struct wander_entry *)(header + 1);
  78879. +
  78880. + /* restore overwrite set from wander record content */
  78881. + for (i = 0; i < wander_record_capacity(s); i++) {
  78882. + reiser4_block_nr block;
  78883. + jnode *node;
  78884. +
  78885. + block = le64_to_cpu(get_unaligned(&entry->wandered));
  78886. + if (block == 0)
  78887. + break;
  78888. +
  78889. + node = reiser4_alloc_io_head(&block);
  78890. + if (node == NULL) {
  78891. + ret = RETERR(-ENOMEM);
  78892. + /*
  78893. + * FIXME-VS:???
  78894. + */
  78895. + jrelse(log);
  78896. + reiser4_drop_io_head(log);
  78897. + goto free_ow_set;
  78898. + }
  78899. +
  78900. + ret = jload(node);
  78901. +
  78902. + if (ret < 0) {
  78903. + reiser4_drop_io_head(node);
  78904. + /*
  78905. + * FIXME-VS:???
  78906. + */
  78907. + jrelse(log);
  78908. + reiser4_drop_io_head(log);
  78909. + goto free_ow_set;
  78910. + }
  78911. +
  78912. + block = le64_to_cpu(get_unaligned(&entry->original));
  78913. +
  78914. + assert("zam-603", block != 0);
  78915. +
  78916. + jnode_set_block(node, &block);
  78917. +
  78918. + list_add_tail(&node->capture_link, ch.overwrite_set);
  78919. +
  78920. + ++entry;
  78921. + }
  78922. +
  78923. + jrelse(log);
  78924. + reiser4_drop_io_head(log);
  78925. +
  78926. + --nr_wander_records;
  78927. + }
  78928. +
  78929. + if (nr_wander_records != 0) {
  78930. + warning("zam-632", "number of wander records in the linked list"
  78931. + " less than number stored in tx head.\n");
  78932. + ret = RETERR(-EIO);
  78933. + goto free_ow_set;
  78934. + }
  78935. +
  78936. + { /* write wandered set in place */
  78937. + write_jnode_list(ch.overwrite_set, NULL, NULL, 0);
  78938. + ret = wait_on_jnode_list(ch.overwrite_set);
  78939. +
  78940. + if (ret) {
  78941. + ret = RETERR(-EIO);
  78942. + goto free_ow_set;
  78943. + }
  78944. + }
  78945. +
  78946. + ret = update_journal_footer(&ch);
  78947. +
  78948. + free_ow_set:
  78949. +
  78950. + while (!list_empty(ch.overwrite_set)) {
  78951. + jnode *cur = list_entry(ch.overwrite_set->next, jnode, capture_link);
  78952. + list_del_init(&cur->capture_link);
  78953. + jrelse(cur);
  78954. + reiser4_drop_io_head(cur);
  78955. + }
  78956. +
  78957. + list_del_init(&tx_head->capture_link);
  78958. +
  78959. + done_commit_handle(&ch);
  78960. +
  78961. + return ret;
  78962. +}
  78963. +
  78964. +/* find oldest committed and not played transaction and play it. The transaction
  78965. + * was committed and journal header block was updated but the blocks from the
  78966. + * process of writing the atom's overwrite set in-place and updating of journal
  78967. + * footer block were not completed. This function completes the process by
  78968. + * recovering the atom's overwrite set from their wandered locations and writes
  78969. + * them in-place and updating the journal footer. */
  78970. +static int replay_oldest_transaction(struct super_block *s)
  78971. +{
  78972. + reiser4_super_info_data *sbinfo = get_super_private(s);
  78973. + jnode *jf = sbinfo->journal_footer;
  78974. + unsigned int total;
  78975. + struct journal_footer *F;
  78976. + struct tx_header *T;
  78977. +
  78978. + reiser4_block_nr prev_tx;
  78979. + reiser4_block_nr last_flushed_tx;
  78980. + reiser4_block_nr log_rec_block = 0;
  78981. +
  78982. + jnode *tx_head;
  78983. +
  78984. + int ret;
  78985. +
  78986. + if ((ret = jload(jf)) < 0)
  78987. + return ret;
  78988. +
  78989. + F = (struct journal_footer *)jdata(jf);
  78990. +
  78991. + last_flushed_tx = le64_to_cpu(get_unaligned(&F->last_flushed_tx));
  78992. +
  78993. + jrelse(jf);
  78994. +
  78995. + if (sbinfo->last_committed_tx == last_flushed_tx) {
  78996. + /* all transactions are replayed */
  78997. + return 0;
  78998. + }
  78999. +
  79000. + prev_tx = sbinfo->last_committed_tx;
  79001. +
  79002. + /* searching for oldest not flushed transaction */
  79003. + while (1) {
  79004. + tx_head = reiser4_alloc_io_head(&prev_tx);
  79005. + if (!tx_head)
  79006. + return RETERR(-ENOMEM);
  79007. +
  79008. + ret = jload(tx_head);
  79009. + if (ret < 0) {
  79010. + reiser4_drop_io_head(tx_head);
  79011. + return ret;
  79012. + }
  79013. +
  79014. + ret = check_tx_head(tx_head);
  79015. + if (ret) {
  79016. + jrelse(tx_head);
  79017. + reiser4_drop_io_head(tx_head);
  79018. + return ret;
  79019. + }
  79020. +
  79021. + T = (struct tx_header *)jdata(tx_head);
  79022. +
  79023. + prev_tx = le64_to_cpu(get_unaligned(&T->prev_tx));
  79024. +
  79025. + if (prev_tx == last_flushed_tx)
  79026. + break;
  79027. +
  79028. + jrelse(tx_head);
  79029. + reiser4_drop_io_head(tx_head);
  79030. + }
  79031. +
  79032. + total = le32_to_cpu(get_unaligned(&T->total));
  79033. + log_rec_block = le64_to_cpu(get_unaligned(&T->next_block));
  79034. +
  79035. + pin_jnode_data(tx_head);
  79036. + jrelse(tx_head);
  79037. +
  79038. + ret =
  79039. + replay_transaction(s, tx_head, &log_rec_block,
  79040. + jnode_get_block(tx_head), total - 1);
  79041. +
  79042. + unpin_jnode_data(tx_head);
  79043. + reiser4_drop_io_head(tx_head);
  79044. +
  79045. + if (ret)
  79046. + return ret;
  79047. + return -E_REPEAT;
  79048. +}
  79049. +
  79050. +/* The reiser4 journal current implementation was optimized to not to capture
  79051. + super block if certain super blocks fields are modified. Currently, the set
  79052. + is (<free block count>, <OID allocator>). These fields are logged by
  79053. + special way which includes storing them in each transaction head block at
  79054. + atom commit time and writing that information to journal footer block at
  79055. + atom flush time. For getting info from journal footer block to the
  79056. + in-memory super block there is a special function
  79057. + reiser4_journal_recover_sb_data() which should be called after disk format
  79058. + plugin re-reads super block after journal replaying.
  79059. +*/
  79060. +
  79061. +/* get the information from journal footer in-memory super block */
  79062. +int reiser4_journal_recover_sb_data(struct super_block *s)
  79063. +{
  79064. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79065. + struct journal_footer *jf;
  79066. + int ret;
  79067. +
  79068. + assert("zam-673", sbinfo->journal_footer != NULL);
  79069. +
  79070. + ret = jload(sbinfo->journal_footer);
  79071. + if (ret != 0)
  79072. + return ret;
  79073. +
  79074. + ret = check_journal_footer(sbinfo->journal_footer);
  79075. + if (ret != 0)
  79076. + goto out;
  79077. +
  79078. + jf = (struct journal_footer *)jdata(sbinfo->journal_footer);
  79079. +
  79080. + /* was there at least one flushed transaction? */
  79081. + if (jf->last_flushed_tx) {
  79082. +
  79083. + /* restore free block counter logged in this transaction */
  79084. + reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&jf->free_blocks)));
  79085. +
  79086. + /* restore oid allocator state */
  79087. + oid_init_allocator(s,
  79088. + le64_to_cpu(get_unaligned(&jf->nr_files)),
  79089. + le64_to_cpu(get_unaligned(&jf->next_oid)));
  79090. + }
  79091. + out:
  79092. + jrelse(sbinfo->journal_footer);
  79093. + return ret;
  79094. +}
  79095. +
  79096. +/* reiser4 replay journal procedure */
  79097. +int reiser4_journal_replay(struct super_block *s)
  79098. +{
  79099. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79100. + jnode *jh, *jf;
  79101. + struct journal_header *header;
  79102. + int nr_tx_replayed = 0;
  79103. + int ret;
  79104. +
  79105. + assert("zam-582", sbinfo != NULL);
  79106. +
  79107. + jh = sbinfo->journal_header;
  79108. + jf = sbinfo->journal_footer;
  79109. +
  79110. + if (!jh || !jf) {
  79111. + /* it is possible that disk layout does not support journal
  79112. + structures, we just warn about this */
  79113. + warning("zam-583",
  79114. + "journal control blocks were not loaded by disk layout plugin. "
  79115. + "journal replaying is not possible.\n");
  79116. + return 0;
  79117. + }
  79118. +
  79119. + /* Take free block count from journal footer block. The free block
  79120. + counter value corresponds the last flushed transaction state */
  79121. + ret = jload(jf);
  79122. + if (ret < 0)
  79123. + return ret;
  79124. +
  79125. + ret = check_journal_footer(jf);
  79126. + if (ret) {
  79127. + jrelse(jf);
  79128. + return ret;
  79129. + }
  79130. +
  79131. + jrelse(jf);
  79132. +
  79133. + /* store last committed transaction info in reiser4 in-memory super
  79134. + block */
  79135. + ret = jload(jh);
  79136. + if (ret < 0)
  79137. + return ret;
  79138. +
  79139. + ret = check_journal_header(jh);
  79140. + if (ret) {
  79141. + jrelse(jh);
  79142. + return ret;
  79143. + }
  79144. +
  79145. + header = (struct journal_header *)jdata(jh);
  79146. + sbinfo->last_committed_tx = le64_to_cpu(get_unaligned(&header->last_committed_tx));
  79147. +
  79148. + jrelse(jh);
  79149. +
  79150. + /* replay committed transactions */
  79151. + while ((ret = replay_oldest_transaction(s)) == -E_REPEAT)
  79152. + nr_tx_replayed++;
  79153. +
  79154. + return ret;
  79155. +}
  79156. +
  79157. +/* load journal control block (either journal header or journal footer block) */
  79158. +static int
  79159. +load_journal_control_block(jnode ** node, const reiser4_block_nr * block)
  79160. +{
  79161. + int ret;
  79162. +
  79163. + *node = reiser4_alloc_io_head(block);
  79164. + if (!(*node))
  79165. + return RETERR(-ENOMEM);
  79166. +
  79167. + ret = jload(*node);
  79168. +
  79169. + if (ret) {
  79170. + reiser4_drop_io_head(*node);
  79171. + *node = NULL;
  79172. + return ret;
  79173. + }
  79174. +
  79175. + pin_jnode_data(*node);
  79176. + jrelse(*node);
  79177. +
  79178. + return 0;
  79179. +}
  79180. +
  79181. +/* unload journal header or footer and free jnode */
  79182. +static void unload_journal_control_block(jnode ** node)
  79183. +{
  79184. + if (*node) {
  79185. + unpin_jnode_data(*node);
  79186. + reiser4_drop_io_head(*node);
  79187. + *node = NULL;
  79188. + }
  79189. +}
  79190. +
  79191. +/* release journal control blocks */
  79192. +void reiser4_done_journal_info(struct super_block *s)
  79193. +{
  79194. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79195. +
  79196. + assert("zam-476", sbinfo != NULL);
  79197. +
  79198. + unload_journal_control_block(&sbinfo->journal_header);
  79199. + unload_journal_control_block(&sbinfo->journal_footer);
  79200. + rcu_barrier();
  79201. +}
  79202. +
  79203. +/* load journal control blocks */
  79204. +int reiser4_init_journal_info(struct super_block *s)
  79205. +{
  79206. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79207. + journal_location *loc;
  79208. + int ret;
  79209. +
  79210. + loc = &sbinfo->jloc;
  79211. +
  79212. + assert("zam-651", loc != NULL);
  79213. + assert("zam-652", loc->header != 0);
  79214. + assert("zam-653", loc->footer != 0);
  79215. +
  79216. + ret = load_journal_control_block(&sbinfo->journal_header, &loc->header);
  79217. +
  79218. + if (ret)
  79219. + return ret;
  79220. +
  79221. + ret = load_journal_control_block(&sbinfo->journal_footer, &loc->footer);
  79222. +
  79223. + if (ret) {
  79224. + unload_journal_control_block(&sbinfo->journal_header);
  79225. + }
  79226. +
  79227. + return ret;
  79228. +}
  79229. +
  79230. +/* Make Linus happy.
  79231. + Local variables:
  79232. + c-indentation-style: "K&R"
  79233. + mode-name: "LC"
  79234. + c-basic-offset: 8
  79235. + tab-width: 8
  79236. + fill-column: 80
  79237. + End:
  79238. +*/
  79239. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/wander.h linux-5.16.14/fs/reiser4/wander.h
  79240. --- linux-5.16.14.orig/fs/reiser4/wander.h 1970-01-01 01:00:00.000000000 +0100
  79241. +++ linux-5.16.14/fs/reiser4/wander.h 2022-03-12 13:26:19.692892826 +0100
  79242. @@ -0,0 +1,135 @@
  79243. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  79244. +
  79245. +#if !defined (__FS_REISER4_WANDER_H__)
  79246. +#define __FS_REISER4_WANDER_H__
  79247. +
  79248. +#include "dformat.h"
  79249. +
  79250. +#include <linux/fs.h> /* for struct super_block */
  79251. +
  79252. +/* REISER4 JOURNAL ON-DISK DATA STRUCTURES */
  79253. +
  79254. +#define TX_HEADER_MAGIC "TxMagic4"
  79255. +#define WANDER_RECORD_MAGIC "LogMagc4"
  79256. +
  79257. +#define TX_HEADER_MAGIC_SIZE (8)
  79258. +#define WANDER_RECORD_MAGIC_SIZE (8)
  79259. +
  79260. +/* journal header block format */
  79261. +struct journal_header {
  79262. + /* last written transaction head location */
  79263. + d64 last_committed_tx;
  79264. +};
  79265. +
  79266. +typedef struct journal_location {
  79267. + reiser4_block_nr footer;
  79268. + reiser4_block_nr header;
  79269. +} journal_location;
  79270. +
  79271. +/* The wander.c head comment describes usage and semantic of all these structures */
  79272. +/* journal footer block format */
  79273. +struct journal_footer {
  79274. + /* last flushed transaction location. */
  79275. + /* This block number is no more valid after the transaction it points
  79276. + to gets flushed, this number is used only at journal replaying time
  79277. + for detection of the end of on-disk list of committed transactions
  79278. + which were not flushed completely */
  79279. + d64 last_flushed_tx;
  79280. +
  79281. + /* free block counter is written in journal footer at transaction
  79282. + flushing , not in super block because free blocks counter is logged
  79283. + by another way than super block fields (root pointer, for
  79284. + example). */
  79285. + d64 free_blocks;
  79286. +
  79287. + /* number of used OIDs and maximal used OID are logged separately from
  79288. + super block */
  79289. + d64 nr_files;
  79290. + d64 next_oid;
  79291. +};
  79292. +
  79293. +/* Each wander record (except the first one) has unified format with wander
  79294. + record header followed by an array of log entries */
  79295. +struct wander_record_header {
  79296. + /* when there is no predefined location for wander records, this magic
  79297. + string should help reiser4fsck. */
  79298. + char magic[WANDER_RECORD_MAGIC_SIZE];
  79299. +
  79300. + /* transaction id */
  79301. + d64 id;
  79302. +
  79303. + /* total number of wander records in current transaction */
  79304. + d32 total;
  79305. +
  79306. + /* this block number in transaction */
  79307. + d32 serial;
  79308. +
  79309. + /* number of previous block in commit */
  79310. + d64 next_block;
  79311. +};
  79312. +
  79313. +/* The first wander record (transaction head) of written transaction has the
  79314. + special format */
  79315. +struct tx_header {
  79316. + /* magic string makes first block in transaction different from other
  79317. + logged blocks, it should help fsck. */
  79318. + char magic[TX_HEADER_MAGIC_SIZE];
  79319. +
  79320. + /* transaction id */
  79321. + d64 id;
  79322. +
  79323. + /* total number of records (including this first tx head) in the
  79324. + transaction */
  79325. + d32 total;
  79326. +
  79327. + /* align next field to 8-byte boundary; this field always is zero */
  79328. + d32 padding;
  79329. +
  79330. + /* block number of previous transaction head */
  79331. + d64 prev_tx;
  79332. +
  79333. + /* next wander record location */
  79334. + d64 next_block;
  79335. +
  79336. + /* committed versions of free blocks counter */
  79337. + d64 free_blocks;
  79338. +
  79339. + /* number of used OIDs (nr_files) and maximal used OID are logged
  79340. + separately from super block */
  79341. + d64 nr_files;
  79342. + d64 next_oid;
  79343. +};
  79344. +
  79345. +/* A transaction gets written to disk as a set of wander records (each wander
  79346. + record size is fs block) */
  79347. +
  79348. +/* As it was told above a wander The rest of wander record is filled by these log entries, unused space filled
  79349. + by zeroes */
  79350. +struct wander_entry {
  79351. + d64 original; /* block original location */
  79352. + d64 wandered; /* block wandered location */
  79353. +};
  79354. +
  79355. +/* REISER4 JOURNAL WRITER FUNCTIONS */
  79356. +
  79357. +extern int reiser4_write_logs(long *);
  79358. +extern int reiser4_journal_replay(struct super_block *);
  79359. +extern int reiser4_journal_recover_sb_data(struct super_block *);
  79360. +
  79361. +extern int reiser4_init_journal_info(struct super_block *);
  79362. +extern void reiser4_done_journal_info(struct super_block *);
  79363. +
  79364. +extern int write_jnode_list(struct list_head *, flush_queue_t *, long *, int);
  79365. +
  79366. +#endif /* __FS_REISER4_WANDER_H__ */
  79367. +
  79368. +/* Make Linus happy.
  79369. + Local variables:
  79370. + c-indentation-style: "K&R"
  79371. + mode-name: "LC"
  79372. + c-basic-offset: 8
  79373. + tab-width: 8
  79374. + fill-column: 80
  79375. + scroll-step: 1
  79376. + End:
  79377. +*/
  79378. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/writeout.h linux-5.16.14/fs/reiser4/writeout.h
  79379. --- linux-5.16.14.orig/fs/reiser4/writeout.h 1970-01-01 01:00:00.000000000 +0100
  79380. +++ linux-5.16.14/fs/reiser4/writeout.h 2022-03-12 13:26:19.692892826 +0100
  79381. @@ -0,0 +1,21 @@
  79382. +/* Copyright 2002, 2003, 2004 by Hans Reiser, licensing governed by reiser4/README */
  79383. +
  79384. +#if !defined (__FS_REISER4_WRITEOUT_H__)
  79385. +
  79386. +#define WRITEOUT_SINGLE_STREAM (0x1)
  79387. +#define WRITEOUT_FOR_PAGE_RECLAIM (0x2)
  79388. +#define WRITEOUT_FLUSH_FUA (0x4)
  79389. +
  79390. +extern int reiser4_get_writeout_flags(void);
  79391. +
  79392. +#endif /* __FS_REISER4_WRITEOUT_H__ */
  79393. +
  79394. +/* Make Linus happy.
  79395. + Local variables:
  79396. + c-indentation-style: "K&R"
  79397. + mode-name: "LC"
  79398. + c-basic-offset: 8
  79399. + tab-width: 8
  79400. + fill-column: 80
  79401. + End:
  79402. +*/
  79403. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/znode.c linux-5.16.14/fs/reiser4/znode.c
  79404. --- linux-5.16.14.orig/fs/reiser4/znode.c 1970-01-01 01:00:00.000000000 +0100
  79405. +++ linux-5.16.14/fs/reiser4/znode.c 2022-03-12 13:26:19.692892826 +0100
  79406. @@ -0,0 +1,1027 @@
  79407. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  79408. + * reiser4/README */
  79409. +/* Znode manipulation functions. */
  79410. +/* Znode is the in-memory header for a tree node. It is stored
  79411. + separately from the node itself so that it does not get written to
  79412. + disk. In this respect znode is like buffer head or page head. We
  79413. + also use znodes for additional reiser4 specific purposes:
  79414. +
  79415. + . they are organized into tree structure which is a part of whole
  79416. + reiser4 tree.
  79417. + . they are used to implement node grained locking
  79418. + . they are used to keep additional state associated with a
  79419. + node
  79420. + . they contain links to lists used by the transaction manager
  79421. +
  79422. + Znode is attached to some variable "block number" which is instance of
  79423. + fs/reiser4/tree.h:reiser4_block_nr type. Znode can exist without
  79424. + appropriate node being actually loaded in memory. Existence of znode itself
  79425. + is regulated by reference count (->x_count) in it. Each time thread
  79426. + acquires reference to znode through call to zget(), ->x_count is
  79427. + incremented and decremented on call to zput(). Data (content of node) are
  79428. + brought in memory through call to zload(), which also increments ->d_count
  79429. + reference counter. zload can block waiting on IO. Call to zrelse()
  79430. + decreases this counter. Also, ->c_count keeps track of number of child
  79431. + znodes and prevents parent znode from being recycled until all of its
  79432. + children are. ->c_count is decremented whenever child goes out of existence
  79433. + (being actually recycled in zdestroy()) which can be some time after last
  79434. + reference to this child dies if we support some form of LRU cache for
  79435. + znodes.
  79436. +
  79437. +*/
  79438. +/* EVERY ZNODE'S STORY
  79439. +
  79440. + 1. His infancy.
  79441. +
  79442. + Once upon a time, the znode was born deep inside of zget() by call to
  79443. + zalloc(). At the return from zget() znode had:
  79444. +
  79445. + . reference counter (x_count) of 1
  79446. + . assigned block number, marked as used in bitmap
  79447. + . pointer to parent znode. Root znode parent pointer points
  79448. + to its father: "fake" znode. This, in turn, has NULL parent pointer.
  79449. + . hash table linkage
  79450. + . no data loaded from disk
  79451. + . no node plugin
  79452. + . no sibling linkage
  79453. +
  79454. + 2. His childhood
  79455. +
  79456. + Each node is either brought into memory as a result of tree traversal, or
  79457. + created afresh, creation of the root being a special case of the latter. In
  79458. + either case it's inserted into sibling list. This will typically require
  79459. + some ancillary tree traversing, but ultimately both sibling pointers will
  79460. + exist and JNODE_LEFT_CONNECTED and JNODE_RIGHT_CONNECTED will be true in
  79461. + zjnode.state.
  79462. +
  79463. + 3. His youth.
  79464. +
  79465. + If znode is bound to already existing node in a tree, its content is read
  79466. + from the disk by call to zload(). At that moment, JNODE_LOADED bit is set
  79467. + in zjnode.state and zdata() function starts to return non null for this
  79468. + znode. zload() further calls zparse() that determines which node layout
  79469. + this node is rendered in, and sets ->nplug on success.
  79470. +
  79471. + If znode is for new node just created, memory for it is allocated and
  79472. + zinit_new() function is called to initialise data, according to selected
  79473. + node layout.
  79474. +
  79475. + 4. His maturity.
  79476. +
  79477. + After this point, znode lingers in memory for some time. Threads can
  79478. + acquire references to znode either by blocknr through call to zget(), or by
  79479. + following a pointer to unallocated znode from internal item. Each time
  79480. + reference to znode is obtained, x_count is increased. Thread can read/write
  79481. + lock znode. Znode data can be loaded through calls to zload(), d_count will
  79482. + be increased appropriately. If all references to znode are released
  79483. + (x_count drops to 0), znode is not recycled immediately. Rather, it is
  79484. + still cached in the hash table in the hope that it will be accessed
  79485. + shortly.
  79486. +
  79487. + There are two ways in which znode existence can be terminated:
  79488. +
  79489. + . sudden death: node bound to this znode is removed from the tree
  79490. + . overpopulation: znode is purged out of memory due to memory pressure
  79491. +
  79492. + 5. His death.
  79493. +
  79494. + Death is complex process.
  79495. +
  79496. + When we irrevocably commit ourselves to decision to remove node from the
  79497. + tree, JNODE_HEARD_BANSHEE bit is set in zjnode.state of corresponding
  79498. + znode. This is done either in ->kill_hook() of internal item or in
  79499. + reiser4_kill_root() function when tree root is removed.
  79500. +
  79501. + At this moment znode still has:
  79502. +
  79503. + . locks held on it, necessary write ones
  79504. + . references to it
  79505. + . disk block assigned to it
  79506. + . data loaded from the disk
  79507. + . pending requests for lock
  79508. +
  79509. + But once JNODE_HEARD_BANSHEE bit set, last call to unlock_znode() does node
  79510. + deletion. Node deletion includes two phases. First all ways to get
  79511. + references to that znode (sibling and parent links and hash lookup using
  79512. + block number stored in parent node) should be deleted -- it is done through
  79513. + sibling_list_remove(), also we assume that nobody uses down link from
  79514. + parent node due to its nonexistence or proper parent node locking and
  79515. + nobody uses parent pointers from children due to absence of them. Second we
  79516. + invalidate all pending lock requests which still are on znode's lock
  79517. + request queue, this is done by reiser4_invalidate_lock(). Another
  79518. + JNODE_IS_DYING znode status bit is used to invalidate pending lock requests.
  79519. + Once it set all requesters are forced to return -EINVAL from
  79520. + longterm_lock_znode(). Future locking attempts are not possible because all
  79521. + ways to get references to that znode are removed already. Last, node is
  79522. + uncaptured from transaction.
  79523. +
  79524. + When last reference to the dying znode is just about to be released,
  79525. + block number for this lock is released and znode is removed from the
  79526. + hash table.
  79527. +
  79528. + Now znode can be recycled.
  79529. +
  79530. + [it's possible to free bitmap block and remove znode from the hash
  79531. + table when last lock is released. This will result in having
  79532. + referenced but completely orphaned znode]
  79533. +
  79534. + 6. Limbo
  79535. +
  79536. + As have been mentioned above znodes with reference counter 0 are
  79537. + still cached in a hash table. Once memory pressure increases they are
  79538. + purged out of there [this requires something like LRU list for
  79539. + efficient implementation. LRU list would also greatly simplify
  79540. + implementation of coord cache that would in this case morph to just
  79541. + scanning some initial segment of LRU list]. Data loaded into
  79542. + unreferenced znode are flushed back to the durable storage if
  79543. + necessary and memory is freed. Znodes themselves can be recycled at
  79544. + this point too.
  79545. +
  79546. +*/
  79547. +
  79548. +#include "debug.h"
  79549. +#include "dformat.h"
  79550. +#include "key.h"
  79551. +#include "coord.h"
  79552. +#include "plugin/plugin_header.h"
  79553. +#include "plugin/node/node.h"
  79554. +#include "plugin/plugin.h"
  79555. +#include "txnmgr.h"
  79556. +#include "jnode.h"
  79557. +#include "znode.h"
  79558. +#include "block_alloc.h"
  79559. +#include "tree.h"
  79560. +#include "tree_walk.h"
  79561. +#include "super.h"
  79562. +#include "reiser4.h"
  79563. +
  79564. +#include <linux/pagemap.h>
  79565. +#include <linux/spinlock.h>
  79566. +#include <linux/slab.h>
  79567. +#include <linux/err.h>
  79568. +
  79569. +static z_hash_table *get_htable(reiser4_tree *,
  79570. + const reiser4_block_nr * const blocknr);
  79571. +static z_hash_table *znode_get_htable(const znode *);
  79572. +static void zdrop(znode *);
  79573. +
  79574. +/* hash table support */
  79575. +
  79576. +/* compare two block numbers for equality. Used by hash-table macros */
  79577. +static inline int
  79578. +blknreq(const reiser4_block_nr * b1, const reiser4_block_nr * b2)
  79579. +{
  79580. + assert("nikita-534", b1 != NULL);
  79581. + assert("nikita-535", b2 != NULL);
  79582. +
  79583. + return *b1 == *b2;
  79584. +}
  79585. +
  79586. +/* Hash znode by block number. Used by hash-table macros */
  79587. +/* Audited by: umka (2002.06.11) */
  79588. +static inline __u32
  79589. +blknrhashfn(z_hash_table * table, const reiser4_block_nr * b)
  79590. +{
  79591. + assert("nikita-536", b != NULL);
  79592. +
  79593. + return *b & (REISER4_ZNODE_HASH_TABLE_SIZE - 1);
  79594. +}
  79595. +
  79596. +/* The hash table definition */
  79597. +#define KMALLOC(size) reiser4_vmalloc(size)
  79598. +#define KFREE(ptr, size) vfree(ptr)
  79599. +TYPE_SAFE_HASH_DEFINE(z, znode, reiser4_block_nr, zjnode.key.z, zjnode.link.z,
  79600. + blknrhashfn, blknreq);
  79601. +#undef KFREE
  79602. +#undef KMALLOC
  79603. +
  79604. +/* slab for znodes */
  79605. +static struct kmem_cache *znode_cache;
  79606. +
  79607. +int znode_shift_order;
  79608. +
  79609. +/**
  79610. + * init_znodes - create znode cache
  79611. + *
  79612. + * Initializes slab cache of znodes. It is part of reiser4 module initialization.
  79613. + */
  79614. +int init_znodes(void)
  79615. +{
  79616. + znode_cache = kmem_cache_create("znode", sizeof(znode), 0,
  79617. + SLAB_HWCACHE_ALIGN |
  79618. + SLAB_RECLAIM_ACCOUNT, NULL);
  79619. + if (znode_cache == NULL)
  79620. + return RETERR(-ENOMEM);
  79621. +
  79622. + for (znode_shift_order = 0; (1 << znode_shift_order) < sizeof(znode);
  79623. + ++znode_shift_order);
  79624. + --znode_shift_order;
  79625. + return 0;
  79626. +}
  79627. +
  79628. +/**
  79629. + * done_znodes - delete znode cache
  79630. + *
  79631. + * This is called on reiser4 module unloading or system shutdown.
  79632. + */
  79633. +void done_znodes(void)
  79634. +{
  79635. + destroy_reiser4_cache(&znode_cache);
  79636. +}
  79637. +
  79638. +/* call this to initialise tree of znodes */
  79639. +int znodes_tree_init(reiser4_tree * tree /* tree to initialise znodes for */ )
  79640. +{
  79641. + int result;
  79642. + assert("umka-050", tree != NULL);
  79643. +
  79644. + rwlock_init(&tree->dk_lock);
  79645. +
  79646. + result = z_hash_init(&tree->zhash_table, REISER4_ZNODE_HASH_TABLE_SIZE);
  79647. + if (result != 0)
  79648. + return result;
  79649. + result = z_hash_init(&tree->zfake_table, REISER4_ZNODE_HASH_TABLE_SIZE);
  79650. + return result;
  79651. +}
  79652. +
  79653. +/* free this znode */
  79654. +void zfree(znode * node /* znode to free */ )
  79655. +{
  79656. + assert("nikita-465", node != NULL);
  79657. + assert("nikita-2120", znode_page(node) == NULL);
  79658. + assert("nikita-2301", list_empty_careful(&node->lock.owners));
  79659. + assert("nikita-2302", list_empty_careful(&node->lock.requestors));
  79660. + assert("nikita-2663", (list_empty_careful(&ZJNODE(node)->capture_link) &&
  79661. + NODE_LIST(ZJNODE(node)) == NOT_CAPTURED));
  79662. + assert("nikita-3220", list_empty(&ZJNODE(node)->jnodes));
  79663. + assert("nikita-3293", !znode_is_right_connected(node));
  79664. + assert("nikita-3294", !znode_is_left_connected(node));
  79665. + assert("nikita-3295", node->left == NULL);
  79666. + assert("nikita-3296", node->right == NULL);
  79667. +
  79668. + /* not yet phash_jnode_destroy(ZJNODE(node)); */
  79669. +
  79670. + kmem_cache_free(znode_cache, node);
  79671. +}
  79672. +
  79673. +/* call this to free tree of znodes */
  79674. +void znodes_tree_done(reiser4_tree * tree /* tree to finish with znodes of */ )
  79675. +{
  79676. + znode *node;
  79677. + znode *next;
  79678. + z_hash_table *ztable;
  79679. +
  79680. + /* scan znode hash-tables and kill all znodes, then free hash tables
  79681. + * themselves. */
  79682. +
  79683. + assert("nikita-795", tree != NULL);
  79684. +
  79685. + ztable = &tree->zhash_table;
  79686. +
  79687. + if (ztable->_table != NULL) {
  79688. + for_all_in_htable(ztable, z, node, next) {
  79689. + node->c_count = 0;
  79690. + node->in_parent.node = NULL;
  79691. + assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0);
  79692. + zdrop(node);
  79693. + }
  79694. +
  79695. + z_hash_done(&tree->zhash_table);
  79696. + }
  79697. +
  79698. + ztable = &tree->zfake_table;
  79699. +
  79700. + if (ztable->_table != NULL) {
  79701. + for_all_in_htable(ztable, z, node, next) {
  79702. + node->c_count = 0;
  79703. + node->in_parent.node = NULL;
  79704. + assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0);
  79705. + zdrop(node);
  79706. + }
  79707. +
  79708. + z_hash_done(&tree->zfake_table);
  79709. + }
  79710. +}
  79711. +
  79712. +/* ZNODE STRUCTURES */
  79713. +
  79714. +/* allocate fresh znode */
  79715. +znode *zalloc(gfp_t gfp_flag /* allocation flag */ )
  79716. +{
  79717. + znode *node;
  79718. +
  79719. + node = kmem_cache_alloc(znode_cache, gfp_flag);
  79720. + return node;
  79721. +}
  79722. +
  79723. +/* Initialize fields of znode
  79724. + @node: znode to initialize;
  79725. + @parent: parent znode;
  79726. + @tree: tree we are in. */
  79727. +void zinit(znode * node, const znode * parent, reiser4_tree * tree)
  79728. +{
  79729. + assert("nikita-466", node != NULL);
  79730. + assert("umka-268", current_tree != NULL);
  79731. +
  79732. + memset(node, 0, sizeof *node);
  79733. +
  79734. + assert("umka-051", tree != NULL);
  79735. +
  79736. + jnode_init(&node->zjnode, tree, JNODE_FORMATTED_BLOCK);
  79737. + reiser4_init_lock(&node->lock);
  79738. + init_parent_coord(&node->in_parent, parent);
  79739. +}
  79740. +
  79741. +/*
  79742. + * remove znode from indices. This is called jput() when last reference on
  79743. + * znode is released.
  79744. + */
  79745. +void znode_remove(znode * node /* znode to remove */ , reiser4_tree * tree)
  79746. +{
  79747. + assert("nikita-2108", node != NULL);
  79748. + assert("nikita-470", node->c_count == 0);
  79749. + assert_rw_write_locked(&(tree->tree_lock));
  79750. +
  79751. + /* remove reference to this znode from cbk cache */
  79752. + cbk_cache_invalidate(node, tree);
  79753. +
  79754. + /* update c_count of parent */
  79755. + if (znode_parent(node) != NULL) {
  79756. + assert("nikita-472", znode_parent(node)->c_count > 0);
  79757. + /* father, onto your hands I forward my spirit... */
  79758. + znode_parent(node)->c_count--;
  79759. + node->in_parent.node = NULL;
  79760. + } else {
  79761. + /* orphaned znode?! Root? */
  79762. + }
  79763. +
  79764. + /* remove znode from hash-table */
  79765. + z_hash_remove_rcu(znode_get_htable(node), node);
  79766. +}
  79767. +
  79768. +/* zdrop() -- Remove znode from the tree.
  79769. +
  79770. + This is called when znode is removed from the memory. */
  79771. +static void zdrop(znode * node /* znode to finish with */ )
  79772. +{
  79773. + jdrop(ZJNODE(node));
  79774. +}
  79775. +
  79776. +/*
  79777. + * put znode into right place in the hash table. This is called by relocate
  79778. + * code.
  79779. + */
  79780. +int znode_rehash(znode * node /* node to rehash */ ,
  79781. + const reiser4_block_nr * new_block_nr /* new block number */ )
  79782. +{
  79783. + z_hash_table *oldtable;
  79784. + z_hash_table *newtable;
  79785. + reiser4_tree *tree;
  79786. +
  79787. + assert("nikita-2018", node != NULL);
  79788. +
  79789. + tree = znode_get_tree(node);
  79790. + oldtable = znode_get_htable(node);
  79791. + newtable = get_htable(tree, new_block_nr);
  79792. +
  79793. + write_lock_tree(tree);
  79794. + /* remove znode from hash-table */
  79795. + z_hash_remove_rcu(oldtable, node);
  79796. +
  79797. + /* assertion no longer valid due to RCU */
  79798. + /* assert("nikita-2019", z_hash_find(newtable, new_block_nr) == NULL); */
  79799. +
  79800. + /* update blocknr */
  79801. + znode_set_block(node, new_block_nr);
  79802. + node->zjnode.key.z = *new_block_nr;
  79803. +
  79804. + /* insert it into hash */
  79805. + z_hash_insert_rcu(newtable, node);
  79806. + write_unlock_tree(tree);
  79807. + return 0;
  79808. +}
  79809. +
  79810. +/* ZNODE LOOKUP, GET, PUT */
  79811. +
  79812. +/* zlook() - get znode with given block_nr in a hash table or return NULL
  79813. +
  79814. + If result is non-NULL then the znode's x_count is incremented. Internal version
  79815. + accepts pre-computed hash index. The hash table is accessed under caller's
  79816. + tree->hash_lock.
  79817. +*/
  79818. +znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const blocknr)
  79819. +{
  79820. + znode *result;
  79821. + __u32 hash;
  79822. + z_hash_table *htable;
  79823. +
  79824. + assert("jmacd-506", tree != NULL);
  79825. + assert("jmacd-507", blocknr != NULL);
  79826. +
  79827. + htable = get_htable(tree, blocknr);
  79828. + hash = blknrhashfn(htable, blocknr);
  79829. +
  79830. + rcu_read_lock();
  79831. + result = z_hash_find_index(htable, hash, blocknr);
  79832. +
  79833. + if (result != NULL) {
  79834. + add_x_ref(ZJNODE(result));
  79835. + result = znode_rip_check(tree, result);
  79836. + }
  79837. + rcu_read_unlock();
  79838. +
  79839. + return result;
  79840. +}
  79841. +
  79842. +/* return hash table where znode with block @blocknr is (or should be)
  79843. + * stored */
  79844. +static z_hash_table *get_htable(reiser4_tree * tree,
  79845. + const reiser4_block_nr * const blocknr)
  79846. +{
  79847. + z_hash_table *table;
  79848. + if (is_disk_addr_unallocated(blocknr))
  79849. + table = &tree->zfake_table;
  79850. + else
  79851. + table = &tree->zhash_table;
  79852. + return table;
  79853. +}
  79854. +
  79855. +/* return hash table where znode @node is (or should be) stored */
  79856. +static z_hash_table *znode_get_htable(const znode * node)
  79857. +{
  79858. + return get_htable(znode_get_tree(node), znode_get_block(node));
  79859. +}
  79860. +
  79861. +/* zget() - get znode from hash table, allocating it if necessary.
  79862. +
  79863. + First a call to zlook, locating a x-referenced znode if one
  79864. + exists. If znode is not found, allocate new one and return. Result
  79865. + is returned with x_count reference increased.
  79866. +
  79867. + LOCKS TAKEN: TREE_LOCK, ZNODE_LOCK
  79868. + LOCK ORDERING: NONE
  79869. +*/
  79870. +znode *zget(reiser4_tree * tree,
  79871. + const reiser4_block_nr * const blocknr,
  79872. + znode * parent, tree_level level, gfp_t gfp_flag)
  79873. +{
  79874. + znode *result;
  79875. + __u32 hashi;
  79876. +
  79877. + z_hash_table *zth;
  79878. +
  79879. + assert("jmacd-512", tree != NULL);
  79880. + assert("jmacd-513", blocknr != NULL);
  79881. + assert("jmacd-514", level < REISER4_MAX_ZTREE_HEIGHT);
  79882. +
  79883. + zth = get_htable(tree, blocknr);
  79884. + hashi = blknrhashfn(zth, blocknr);
  79885. +
  79886. + /* NOTE-NIKITA address-as-unallocated-blocknr still is not
  79887. + implemented. */
  79888. +
  79889. + z_hash_prefetch_bucket(zth, hashi);
  79890. +
  79891. + rcu_read_lock();
  79892. + /* Find a matching BLOCKNR in the hash table. If the znode is found,
  79893. + we obtain an reference (x_count) but the znode remains unlocked.
  79894. + Have to worry about race conditions later. */
  79895. + result = z_hash_find_index(zth, hashi, blocknr);
  79896. + /* According to the current design, the hash table lock protects new
  79897. + znode references. */
  79898. + if (result != NULL) {
  79899. + add_x_ref(ZJNODE(result));
  79900. + /* NOTE-NIKITA it should be so, but special case during
  79901. + creation of new root makes such assertion highly
  79902. + complicated. */
  79903. + assert("nikita-2131", 1 || znode_parent(result) == parent ||
  79904. + (ZF_ISSET(result, JNODE_ORPHAN)
  79905. + && (znode_parent(result) == NULL)));
  79906. + result = znode_rip_check(tree, result);
  79907. + }
  79908. +
  79909. + rcu_read_unlock();
  79910. +
  79911. + if (!result) {
  79912. + znode *shadow;
  79913. +
  79914. + result = zalloc(gfp_flag);
  79915. + if (!result) {
  79916. + return ERR_PTR(RETERR(-ENOMEM));
  79917. + }
  79918. +
  79919. + zinit(result, parent, tree);
  79920. + ZJNODE(result)->blocknr = *blocknr;
  79921. + ZJNODE(result)->key.z = *blocknr;
  79922. + result->level = level;
  79923. +
  79924. + write_lock_tree(tree);
  79925. +
  79926. + shadow = z_hash_find_index(zth, hashi, blocknr);
  79927. + if (unlikely(shadow != NULL && !ZF_ISSET(shadow, JNODE_RIP))) {
  79928. + jnode_list_remove(ZJNODE(result));
  79929. + zfree(result);
  79930. + result = shadow;
  79931. + } else {
  79932. + result->version = znode_build_version(tree);
  79933. + z_hash_insert_index_rcu(zth, hashi, result);
  79934. +
  79935. + if (parent != NULL)
  79936. + ++parent->c_count;
  79937. + }
  79938. +
  79939. + add_x_ref(ZJNODE(result));
  79940. +
  79941. + write_unlock_tree(tree);
  79942. + }
  79943. +
  79944. + assert("intelfx-6",
  79945. + ergo(!reiser4_blocknr_is_fake(blocknr) && *blocknr != 0,
  79946. + reiser4_check_block(blocknr, 1)));
  79947. +
  79948. + /* Check for invalid tree level, return -EIO */
  79949. + if (unlikely(znode_get_level(result) != level)) {
  79950. + warning("jmacd-504",
  79951. + "Wrong level for cached block %llu: %i expecting %i",
  79952. + (unsigned long long)(*blocknr), znode_get_level(result),
  79953. + level);
  79954. + zput(result);
  79955. + return ERR_PTR(RETERR(-EIO));
  79956. + }
  79957. +
  79958. + assert("nikita-1227", znode_invariant(result));
  79959. +
  79960. + return result;
  79961. +}
  79962. +
  79963. +/* ZNODE PLUGINS/DATA */
  79964. +
  79965. +/* "guess" plugin for node loaded from the disk. Plugin id of node plugin is
  79966. + stored at the fixed offset from the beginning of the node. */
  79967. +static node_plugin *znode_guess_plugin(const znode * node /* znode to guess
  79968. + * plugin of */ )
  79969. +{
  79970. + reiser4_tree *tree;
  79971. +
  79972. + assert("nikita-1053", node != NULL);
  79973. + assert("nikita-1055", zdata(node) != NULL);
  79974. +
  79975. + tree = znode_get_tree(node);
  79976. + assert("umka-053", tree != NULL);
  79977. +
  79978. + if (reiser4_is_set(tree->super, REISER4_ONE_NODE_PLUGIN)) {
  79979. + return tree->nplug;
  79980. + } else {
  79981. + return node_plugin_by_disk_id
  79982. + (tree, &((common_node_header *) zdata(node))->plugin_id);
  79983. +#ifdef GUESS_EXISTS
  79984. + reiser4_plugin *plugin;
  79985. +
  79986. + /* NOTE-NIKITA add locking here when dynamic plugins will be
  79987. + * implemented */
  79988. + for_all_plugins(REISER4_NODE_PLUGIN_TYPE, plugin) {
  79989. + if ((plugin->u.node.guess != NULL)
  79990. + && plugin->u.node.guess(node))
  79991. + return plugin;
  79992. + }
  79993. + warning("nikita-1057", "Cannot guess node plugin");
  79994. + print_znode("node", node);
  79995. + return NULL;
  79996. +#endif
  79997. + }
  79998. +}
  79999. +
  80000. +/* parse node header and install ->node_plugin */
  80001. +int zparse(znode * node /* znode to parse */ )
  80002. +{
  80003. + int result;
  80004. +
  80005. + assert("nikita-1233", node != NULL);
  80006. + assert("nikita-2370", zdata(node) != NULL);
  80007. +
  80008. + if (node->nplug == NULL) {
  80009. + node_plugin *nplug;
  80010. +
  80011. + nplug = znode_guess_plugin(node);
  80012. + if (likely(nplug != NULL)) {
  80013. + result = nplug->parse(node);
  80014. + if (likely(result == 0))
  80015. + node->nplug = nplug;
  80016. + } else {
  80017. + result = RETERR(-EIO);
  80018. + }
  80019. + } else
  80020. + result = 0;
  80021. + return result;
  80022. +}
  80023. +
  80024. +/* zload with readahead */
  80025. +int zload_ra(znode * node /* znode to load */ , ra_info_t * info)
  80026. +{
  80027. + int result;
  80028. +
  80029. + assert("nikita-484", node != NULL);
  80030. + assert("nikita-1377", znode_invariant(node));
  80031. + assert("jmacd-7771", !znode_above_root(node));
  80032. + assert("nikita-2125", atomic_read(&ZJNODE(node)->x_count) > 0);
  80033. + assert("nikita-3016", reiser4_schedulable());
  80034. +
  80035. + if (info)
  80036. + formatted_readahead(node, info);
  80037. +
  80038. + result = jload(ZJNODE(node));
  80039. + assert("nikita-1378", znode_invariant(node));
  80040. + return result;
  80041. +}
  80042. +
  80043. +/* load content of node into memory */
  80044. +int zload(znode *node)
  80045. +{
  80046. + return zload_ra(node, NULL);
  80047. +}
  80048. +
  80049. +/* call node plugin to initialise newly allocated node. */
  80050. +int zinit_new(znode * node /* znode to initialise */ , gfp_t gfp_flags)
  80051. +{
  80052. + return jinit_new(ZJNODE(node), gfp_flags);
  80053. +}
  80054. +
  80055. +/* drop reference to node data. When last reference is dropped, data are
  80056. + unloaded. */
  80057. +void zrelse(znode * node /* znode to release references to */ )
  80058. +{
  80059. + assert("nikita-1381", znode_invariant(node));
  80060. + jrelse(ZJNODE(node));
  80061. +}
  80062. +
  80063. +/* returns free space in node */
  80064. +unsigned znode_free_space(znode * node /* znode to query */ )
  80065. +{
  80066. + assert("nikita-852", node != NULL);
  80067. + return node_plugin_by_node(node)->free_space(node);
  80068. +}
  80069. +
  80070. +/* left delimiting key of znode */
  80071. +reiser4_key *znode_get_rd_key(znode * node /* znode to query */ )
  80072. +{
  80073. + assert("nikita-958", node != NULL);
  80074. + assert_rw_locked(&(znode_get_tree(node)->dk_lock));
  80075. + assert("nikita-3067", LOCK_CNT_GTZ(rw_locked_dk));
  80076. + assert("nikita-30671", node->rd_key_version != 0);
  80077. + return &node->rd_key;
  80078. +}
  80079. +
  80080. +/* right delimiting key of znode */
  80081. +reiser4_key *znode_get_ld_key(znode * node /* znode to query */ )
  80082. +{
  80083. + assert("nikita-974", node != NULL);
  80084. + assert_rw_locked(&(znode_get_tree(node)->dk_lock));
  80085. + assert("nikita-3068", LOCK_CNT_GTZ(rw_locked_dk));
  80086. + assert("nikita-30681", node->ld_key_version != 0);
  80087. + return &node->ld_key;
  80088. +}
  80089. +
  80090. +ON_DEBUG(atomic_t delim_key_version = ATOMIC_INIT(0);
  80091. + )
  80092. +
  80093. +/* update right-delimiting key of @node */
  80094. +reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key)
  80095. +{
  80096. + assert("nikita-2937", node != NULL);
  80097. + assert("nikita-2939", key != NULL);
  80098. + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
  80099. + assert("nikita-3069", LOCK_CNT_GTZ(write_locked_dk));
  80100. + assert("nikita-2944",
  80101. + znode_is_any_locked(node) ||
  80102. + znode_get_level(node) != LEAF_LEVEL ||
  80103. + keyge(key, &node->rd_key) ||
  80104. + keyeq(&node->rd_key, reiser4_min_key()) ||
  80105. + ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  80106. +
  80107. + node->rd_key = *key;
  80108. + ON_DEBUG(node->rd_key_version = atomic_inc_return(&delim_key_version));
  80109. + return &node->rd_key;
  80110. +}
  80111. +
  80112. +/* update left-delimiting key of @node */
  80113. +reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key)
  80114. +{
  80115. + assert("nikita-2940", node != NULL);
  80116. + assert("nikita-2941", key != NULL);
  80117. + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
  80118. + assert("nikita-3070", LOCK_CNT_GTZ(write_locked_dk));
  80119. + assert("nikita-2943",
  80120. + znode_is_any_locked(node) || keyeq(&node->ld_key,
  80121. + reiser4_min_key()));
  80122. +
  80123. + node->ld_key = *key;
  80124. + ON_DEBUG(node->ld_key_version = atomic_inc_return(&delim_key_version));
  80125. + return &node->ld_key;
  80126. +}
  80127. +
  80128. +/* true if @key is inside key range for @node */
  80129. +int znode_contains_key(znode * node /* znode to look in */ ,
  80130. + const reiser4_key * key /* key to look for */ )
  80131. +{
  80132. + assert("nikita-1237", node != NULL);
  80133. + assert("nikita-1238", key != NULL);
  80134. +
  80135. + /* left_delimiting_key <= key <= right_delimiting_key */
  80136. + return keyle(znode_get_ld_key(node), key)
  80137. + && keyle(key, znode_get_rd_key(node));
  80138. +}
  80139. +
  80140. +/* same as znode_contains_key(), but lock dk lock */
  80141. +int znode_contains_key_lock(znode * node /* znode to look in */ ,
  80142. + const reiser4_key * key /* key to look for */ )
  80143. +{
  80144. + int result;
  80145. +
  80146. + assert("umka-056", node != NULL);
  80147. + assert("umka-057", key != NULL);
  80148. +
  80149. + read_lock_dk(znode_get_tree(node));
  80150. + result = znode_contains_key(node, key);
  80151. + read_unlock_dk(znode_get_tree(node));
  80152. + return result;
  80153. +}
  80154. +
  80155. +/* get parent pointer, assuming tree is not locked */
  80156. +znode *znode_parent_nolock(const znode * node /* child znode */ )
  80157. +{
  80158. + assert("nikita-1444", node != NULL);
  80159. + return node->in_parent.node;
  80160. +}
  80161. +
  80162. +/* get parent pointer of znode */
  80163. +znode *znode_parent(const znode * node /* child znode */ )
  80164. +{
  80165. + assert("nikita-1226", node != NULL);
  80166. + assert("nikita-1406", LOCK_CNT_GTZ(rw_locked_tree));
  80167. + return znode_parent_nolock(node);
  80168. +}
  80169. +
  80170. +/* detect uber znode used to protect in-superblock tree root pointer */
  80171. +int znode_above_root(const znode * node /* znode to query */ )
  80172. +{
  80173. + assert("umka-059", node != NULL);
  80174. +
  80175. + return disk_addr_eq(&ZJNODE(node)->blocknr, &UBER_TREE_ADDR);
  80176. +}
  80177. +
  80178. +/* check that @node is root---that its block number is recorder in the tree as
  80179. + that of root node */
  80180. +#if REISER4_DEBUG
  80181. +static int znode_is_true_root(const znode * node /* znode to query */ )
  80182. +{
  80183. + assert("umka-060", node != NULL);
  80184. + assert("umka-061", current_tree != NULL);
  80185. +
  80186. + return disk_addr_eq(znode_get_block(node),
  80187. + &znode_get_tree(node)->root_block);
  80188. +}
  80189. +#endif
  80190. +
  80191. +/* check that @node is root */
  80192. +int znode_is_root(const znode * node /* znode to query */ )
  80193. +{
  80194. + return znode_get_level(node) == znode_get_tree(node)->height;
  80195. +}
  80196. +
  80197. +/* Returns true is @node was just created by zget() and wasn't ever loaded
  80198. + into memory. */
  80199. +/* NIKITA-HANS: yes */
  80200. +int znode_just_created(const znode * node)
  80201. +{
  80202. + assert("nikita-2188", node != NULL);
  80203. + return (znode_page(node) == NULL);
  80204. +}
  80205. +
  80206. +/* obtain updated ->znode_epoch. See seal.c for description. */
  80207. +__u64 znode_build_version(reiser4_tree * tree)
  80208. +{
  80209. + __u64 result;
  80210. +
  80211. + spin_lock(&tree->epoch_lock);
  80212. + result = ++tree->znode_epoch;
  80213. + spin_unlock(&tree->epoch_lock);
  80214. + return result;
  80215. +}
  80216. +
  80217. +void init_load_count(load_count * dh)
  80218. +{
  80219. + assert("nikita-2105", dh != NULL);
  80220. + memset(dh, 0, sizeof *dh);
  80221. +}
  80222. +
  80223. +void done_load_count(load_count * dh)
  80224. +{
  80225. + assert("nikita-2106", dh != NULL);
  80226. + if (dh->node != NULL) {
  80227. + for (; dh->d_ref > 0; --dh->d_ref)
  80228. + zrelse(dh->node);
  80229. + dh->node = NULL;
  80230. + }
  80231. +}
  80232. +
  80233. +static int incr_load_count(load_count * dh)
  80234. +{
  80235. + int result;
  80236. +
  80237. + assert("nikita-2110", dh != NULL);
  80238. + assert("nikita-2111", dh->node != NULL);
  80239. +
  80240. + result = zload(dh->node);
  80241. + if (result == 0)
  80242. + ++dh->d_ref;
  80243. + return result;
  80244. +}
  80245. +
  80246. +int incr_load_count_znode(load_count * dh, znode * node)
  80247. +{
  80248. + assert("nikita-2107", dh != NULL);
  80249. + assert("nikita-2158", node != NULL);
  80250. + assert("nikita-2109",
  80251. + ergo(dh->node != NULL, (dh->node == node) || (dh->d_ref == 0)));
  80252. +
  80253. + dh->node = node;
  80254. + return incr_load_count(dh);
  80255. +}
  80256. +
  80257. +int incr_load_count_jnode(load_count * dh, jnode * node)
  80258. +{
  80259. + if (jnode_is_znode(node)) {
  80260. + return incr_load_count_znode(dh, JZNODE(node));
  80261. + }
  80262. + return 0;
  80263. +}
  80264. +
  80265. +void copy_load_count(load_count * new, load_count * old)
  80266. +{
  80267. + int ret = 0;
  80268. + done_load_count(new);
  80269. + new->node = old->node;
  80270. + new->d_ref = 0;
  80271. +
  80272. + while ((new->d_ref < old->d_ref) && (ret = incr_load_count(new)) == 0) {
  80273. + }
  80274. +
  80275. + assert("jmacd-87589", ret == 0);
  80276. +}
  80277. +
  80278. +void move_load_count(load_count * new, load_count * old)
  80279. +{
  80280. + done_load_count(new);
  80281. + new->node = old->node;
  80282. + new->d_ref = old->d_ref;
  80283. + old->node = NULL;
  80284. + old->d_ref = 0;
  80285. +}
  80286. +
  80287. +/* convert parent pointer into coord */
  80288. +void parent_coord_to_coord(const parent_coord_t * pcoord, coord_t * coord)
  80289. +{
  80290. + assert("nikita-3204", pcoord != NULL);
  80291. + assert("nikita-3205", coord != NULL);
  80292. +
  80293. + coord_init_first_unit_nocheck(coord, pcoord->node);
  80294. + coord_set_item_pos(coord, pcoord->item_pos);
  80295. + coord->between = AT_UNIT;
  80296. +}
  80297. +
  80298. +/* pack coord into parent_coord_t */
  80299. +void coord_to_parent_coord(const coord_t * coord, parent_coord_t * pcoord)
  80300. +{
  80301. + assert("nikita-3206", pcoord != NULL);
  80302. + assert("nikita-3207", coord != NULL);
  80303. +
  80304. + pcoord->node = coord->node;
  80305. + pcoord->item_pos = coord->item_pos;
  80306. +}
  80307. +
  80308. +/* Initialize a parent hint pointer. (parent hint pointer is a field in znode,
  80309. + look for comments there) */
  80310. +void init_parent_coord(parent_coord_t * pcoord, const znode * node)
  80311. +{
  80312. + pcoord->node = (znode *) node;
  80313. + pcoord->item_pos = (unsigned short)~0;
  80314. +}
  80315. +
  80316. +#if REISER4_DEBUG
  80317. +
  80318. +/* debugging aid: znode invariant */
  80319. +static int znode_invariant_f(const znode * node /* znode to check */ ,
  80320. + char const **msg /* where to store error
  80321. + * message, if any */ )
  80322. +{
  80323. +#define _ergo(ant, con) \
  80324. + ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
  80325. +
  80326. +#define _equi(e1, e2) \
  80327. + ((*msg) = "{" #e1 "} <=> {" #e2 "}", equi((e1), (e2)))
  80328. +
  80329. +#define _check(exp) ((*msg) = #exp, (exp))
  80330. +
  80331. + return jnode_invariant_f(ZJNODE(node), msg) &&
  80332. + /* [znode-fake] invariant */
  80333. + /* fake znode doesn't have a parent, and */
  80334. + _ergo(znode_get_level(node) == 0, znode_parent(node) == NULL) &&
  80335. + /* there is another way to express this very check, and */
  80336. + _ergo(znode_above_root(node), znode_parent(node) == NULL) &&
  80337. + /* it has special block number, and */
  80338. + _ergo(znode_get_level(node) == 0,
  80339. + disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) &&
  80340. + /* it is the only znode with such block number, and */
  80341. + _ergo(!znode_above_root(node) && znode_is_loaded(node),
  80342. + !disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) &&
  80343. + /* it is parent of the tree root node */
  80344. + _ergo(znode_is_true_root(node),
  80345. + znode_above_root(znode_parent(node))) &&
  80346. + /* [znode-level] invariant */
  80347. + /* level of parent znode is one larger than that of child,
  80348. + except for the fake znode, and */
  80349. + _ergo(znode_parent(node) && !znode_above_root(znode_parent(node)),
  80350. + znode_get_level(znode_parent(node)) ==
  80351. + znode_get_level(node) + 1) &&
  80352. + /* left neighbor is at the same level, and */
  80353. + _ergo(znode_is_left_connected(node) && node->left != NULL,
  80354. + znode_get_level(node) == znode_get_level(node->left)) &&
  80355. + /* right neighbor is at the same level */
  80356. + _ergo(znode_is_right_connected(node) && node->right != NULL,
  80357. + znode_get_level(node) == znode_get_level(node->right)) &&
  80358. + /* [znode-connected] invariant */
  80359. + _ergo(node->left != NULL, znode_is_left_connected(node)) &&
  80360. + _ergo(node->right != NULL, znode_is_right_connected(node)) &&
  80361. + _ergo(!znode_is_root(node) && node->left != NULL,
  80362. + znode_is_right_connected(node->left) &&
  80363. + node->left->right == node) &&
  80364. + _ergo(!znode_is_root(node) && node->right != NULL,
  80365. + znode_is_left_connected(node->right) &&
  80366. + node->right->left == node) &&
  80367. + /* [znode-c_count] invariant */
  80368. + /* for any znode, c_count of its parent is greater than 0 */
  80369. + _ergo(znode_parent(node) != NULL &&
  80370. + !znode_above_root(znode_parent(node)),
  80371. + znode_parent(node)->c_count > 0) &&
  80372. + /* leaves don't have children */
  80373. + _ergo(znode_get_level(node) == LEAF_LEVEL,
  80374. + node->c_count == 0) &&
  80375. + _check(node->zjnode.jnodes.prev != NULL) &&
  80376. + _check(node->zjnode.jnodes.next != NULL) &&
  80377. + /* orphan doesn't have a parent */
  80378. + _ergo(ZF_ISSET(node, JNODE_ORPHAN), znode_parent(node) == 0) &&
  80379. + /* [znode-modify] invariant */
  80380. + /* if znode is not write-locked, its checksum remains
  80381. + * invariant */
  80382. + /* unfortunately, zlock is unordered w.r.t. jnode_lock, so we
  80383. + * cannot check this. */
  80384. + /* [znode-refs] invariant */
  80385. + /* only referenced znode can be long-term locked */
  80386. + _ergo(znode_is_locked(node),
  80387. + atomic_read(&ZJNODE(node)->x_count) != 0);
  80388. +}
  80389. +
  80390. +/* debugging aid: check znode invariant and panic if it doesn't hold */
  80391. +int znode_invariant(znode * node /* znode to check */ )
  80392. +{
  80393. + char const *failed_msg;
  80394. + int result;
  80395. +
  80396. + assert("umka-063", node != NULL);
  80397. + assert("umka-064", current_tree != NULL);
  80398. +
  80399. + spin_lock_znode(node);
  80400. + read_lock_tree(znode_get_tree(node));
  80401. + result = znode_invariant_f(node, &failed_msg);
  80402. + if (!result) {
  80403. + /* print_znode("corrupted node", node); */
  80404. + warning("jmacd-555", "Condition %s failed", failed_msg);
  80405. + }
  80406. + read_unlock_tree(znode_get_tree(node));
  80407. + spin_unlock_znode(node);
  80408. + return result;
  80409. +}
  80410. +
  80411. +/* return non-0 iff data are loaded into znode */
  80412. +int znode_is_loaded(const znode * node /* znode to query */ )
  80413. +{
  80414. + assert("nikita-497", node != NULL);
  80415. + return jnode_is_loaded(ZJNODE(node));
  80416. +}
  80417. +
  80418. +unsigned long znode_times_locked(const znode * z)
  80419. +{
  80420. + return z->times_locked;
  80421. +}
  80422. +
  80423. +#endif /* REISER4_DEBUG */
  80424. +
  80425. +/* Make Linus happy.
  80426. + Local variables:
  80427. + c-indentation-style: "K&R"
  80428. + mode-name: "LC"
  80429. + c-basic-offset: 8
  80430. + tab-width: 8
  80431. + fill-column: 120
  80432. + End:
  80433. +*/
  80434. diff -urN --no-dereference linux-5.16.14.orig/fs/reiser4/znode.h linux-5.16.14/fs/reiser4/znode.h
  80435. --- linux-5.16.14.orig/fs/reiser4/znode.h 1970-01-01 01:00:00.000000000 +0100
  80436. +++ linux-5.16.14/fs/reiser4/znode.h 2022-03-12 13:26:19.692892826 +0100
  80437. @@ -0,0 +1,435 @@
  80438. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  80439. + * reiser4/README */
  80440. +
  80441. +/* Declaration of znode (Zam's node). See znode.c for more details. */
  80442. +
  80443. +#ifndef __ZNODE_H__
  80444. +#define __ZNODE_H__
  80445. +
  80446. +#include "forward.h"
  80447. +#include "debug.h"
  80448. +#include "dformat.h"
  80449. +#include "key.h"
  80450. +#include "coord.h"
  80451. +#include "plugin/node/node.h"
  80452. +#include "jnode.h"
  80453. +#include "lock.h"
  80454. +#include "readahead.h"
  80455. +
  80456. +#include <linux/types.h>
  80457. +#include <linux/spinlock.h>
  80458. +#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
  80459. +#include <asm/atomic.h>
  80460. +
  80461. +/* znode tracks its position within parent (internal item in a parent node,
  80462. + * that contains znode's block number). */
  80463. +typedef struct parent_coord {
  80464. + znode *node;
  80465. + pos_in_node_t item_pos;
  80466. +} parent_coord_t;
  80467. +
  80468. +/* &znode - node in a reiser4 tree.
  80469. +
  80470. + NOTE-NIKITA fields in this struct have to be rearranged (later) to reduce
  80471. + cacheline pressure.
  80472. +
  80473. + Locking:
  80474. +
  80475. + Long term: data in a disk node attached to this znode are protected
  80476. + by long term, deadlock aware lock ->lock;
  80477. +
  80478. + Spin lock: the following fields are protected by the spin lock:
  80479. +
  80480. + ->lock
  80481. +
  80482. + Following fields are protected by the global tree lock:
  80483. +
  80484. + ->left
  80485. + ->right
  80486. + ->in_parent
  80487. + ->c_count
  80488. +
  80489. + Following fields are protected by the global delimiting key lock (dk_lock):
  80490. +
  80491. + ->ld_key (to update ->ld_key long-term lock on the node is also required)
  80492. + ->rd_key
  80493. +
  80494. + Following fields are protected by the long term lock:
  80495. +
  80496. + ->nr_items
  80497. +
  80498. + ->node_plugin is never changed once set. This means that after code made
  80499. + itself sure that field is valid it can be accessed without any additional
  80500. + locking.
  80501. +
  80502. + ->level is immutable.
  80503. +
  80504. + Invariants involving this data-type:
  80505. +
  80506. + [znode-fake]
  80507. + [znode-level]
  80508. + [znode-connected]
  80509. + [znode-c_count]
  80510. + [znode-refs]
  80511. + [jnode-refs]
  80512. + [jnode-queued]
  80513. + [znode-modify]
  80514. +
  80515. + For this to be made into a clustering or NUMA filesystem, we would want to eliminate all of the global locks.
  80516. + Suggestions for how to do that are desired.*/
  80517. +struct znode {
  80518. + /* Embedded jnode. */
  80519. + jnode zjnode;
  80520. +
  80521. + /* contains three subfields, node, pos_in_node, and pos_in_unit.
  80522. +
  80523. + pos_in_node and pos_in_unit are only hints that are cached to
  80524. + speed up lookups during balancing. They are not required to be up to
  80525. + date. Synched in find_child_ptr().
  80526. +
  80527. + This value allows us to avoid expensive binary searches.
  80528. +
  80529. + in_parent->node points to the parent of this node, and is NOT a
  80530. + hint.
  80531. + */
  80532. + parent_coord_t in_parent;
  80533. +
  80534. + /*
  80535. + * sibling list pointers
  80536. + */
  80537. +
  80538. + /* left-neighbor */
  80539. + znode *left;
  80540. + /* right-neighbor */
  80541. + znode *right;
  80542. +
  80543. + /* long term lock on node content. This lock supports deadlock
  80544. + detection. See lock.c
  80545. + */
  80546. + zlock lock;
  80547. +
  80548. + /* You cannot remove from memory a node that has children in
  80549. + memory. This is because we rely on the fact that parent of given
  80550. + node can always be reached without blocking for io. When reading a
  80551. + node into memory you must increase the c_count of its parent, when
  80552. + removing it from memory you must decrease the c_count. This makes
  80553. + the code simpler, and the cases where it is suboptimal are truly
  80554. + obscure.
  80555. + */
  80556. + int c_count;
  80557. +
  80558. + /* plugin of node attached to this znode. NULL if znode is not
  80559. + loaded. */
  80560. + node_plugin *nplug;
  80561. +
  80562. + /* version of znode data. This is increased on each modification. This
  80563. + * is necessary to implement seals (see seal.[ch]) efficiently. */
  80564. + __u64 version;
  80565. +
  80566. + /* left delimiting key. Necessary to efficiently perform
  80567. + balancing with node-level locking. Kept in memory only. */
  80568. + reiser4_key ld_key;
  80569. + /* right delimiting key. */
  80570. + reiser4_key rd_key;
  80571. +
  80572. + /* znode's tree level */
  80573. + __u16 level;
  80574. + /* number of items in this node. This field is modified by node
  80575. + * plugin. */
  80576. + __u16 nr_items;
  80577. +
  80578. +#if REISER4_DEBUG
  80579. + void *creator;
  80580. + reiser4_key first_key;
  80581. + unsigned long times_locked;
  80582. + int left_version; /* when node->left was updated */
  80583. + int right_version; /* when node->right was updated */
  80584. + int ld_key_version; /* when node->ld_key was updated */
  80585. + int rd_key_version; /* when node->rd_key was updated */
  80586. +#endif
  80587. +
  80588. +} __attribute__ ((aligned(16)));
  80589. +
  80590. +ON_DEBUG(extern atomic_t delim_key_version;
  80591. + )
  80592. +
  80593. +/* In general I think these macros should not be exposed. */
  80594. +#define znode_is_locked(node) (lock_is_locked(&node->lock))
  80595. +#define znode_is_rlocked(node) (lock_is_rlocked(&node->lock))
  80596. +#define znode_is_wlocked(node) (lock_is_wlocked(&node->lock))
  80597. +#define znode_is_wlocked_once(node) (lock_is_wlocked_once(&node->lock))
  80598. +#define znode_can_be_rlocked(node) (lock_can_be_rlocked(&node->lock))
  80599. +#define is_lock_compatible(node, mode) (lock_mode_compatible(&node->lock, mode))
  80600. +/* Macros for accessing the znode state. */
  80601. +#define ZF_CLR(p,f) JF_CLR (ZJNODE(p), (f))
  80602. +#define ZF_ISSET(p,f) JF_ISSET(ZJNODE(p), (f))
  80603. +#define ZF_SET(p,f) JF_SET (ZJNODE(p), (f))
  80604. +extern znode *zget(reiser4_tree * tree, const reiser4_block_nr * const block,
  80605. + znode * parent, tree_level level, gfp_t gfp_flag);
  80606. +extern znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const block);
  80607. +extern int zload(znode * node);
  80608. +extern int zload_ra(znode * node, ra_info_t * info);
  80609. +extern int zinit_new(znode * node, gfp_t gfp_flags);
  80610. +extern void zrelse(znode * node);
  80611. +extern void znode_change_parent(znode * new_parent, reiser4_block_nr * block);
  80612. +extern void znode_update_csum(znode *node);
  80613. +
  80614. +/* size of data in znode */
  80615. +static inline unsigned
  80616. +znode_size(const znode * node UNUSED_ARG /* znode to query */ )
  80617. +{
  80618. + assert("nikita-1416", node != NULL);
  80619. + return PAGE_SIZE;
  80620. +}
  80621. +
  80622. +extern void parent_coord_to_coord(const parent_coord_t * pcoord,
  80623. + coord_t * coord);
  80624. +extern void coord_to_parent_coord(const coord_t * coord,
  80625. + parent_coord_t * pcoord);
  80626. +extern void init_parent_coord(parent_coord_t * pcoord, const znode * node);
  80627. +
  80628. +extern unsigned znode_free_space(znode * node);
  80629. +
  80630. +extern reiser4_key *znode_get_rd_key(znode * node);
  80631. +extern reiser4_key *znode_get_ld_key(znode * node);
  80632. +
  80633. +extern reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key);
  80634. +extern reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key);
  80635. +
  80636. +/* `connected' state checks */
  80637. +static inline int znode_is_right_connected(const znode * node)
  80638. +{
  80639. + return ZF_ISSET(node, JNODE_RIGHT_CONNECTED);
  80640. +}
  80641. +
  80642. +static inline int znode_is_left_connected(const znode * node)
  80643. +{
  80644. + return ZF_ISSET(node, JNODE_LEFT_CONNECTED);
  80645. +}
  80646. +
  80647. +static inline int znode_is_connected(const znode * node)
  80648. +{
  80649. + return znode_is_right_connected(node) && znode_is_left_connected(node);
  80650. +}
  80651. +
  80652. +extern int znode_shift_order;
  80653. +extern int znode_rehash(znode * node, const reiser4_block_nr * new_block_nr);
  80654. +extern void znode_remove(znode *, reiser4_tree *);
  80655. +extern znode *znode_parent(const znode * node);
  80656. +extern znode *znode_parent_nolock(const znode * node);
  80657. +extern int znode_above_root(const znode * node);
  80658. +extern int init_znode(jnode *node);
  80659. +extern int init_znodes(void);
  80660. +extern void done_znodes(void);
  80661. +extern int znodes_tree_init(reiser4_tree * ztree);
  80662. +extern void znodes_tree_done(reiser4_tree * ztree);
  80663. +extern int znode_contains_key(znode * node, const reiser4_key * key);
  80664. +extern int znode_contains_key_lock(znode * node, const reiser4_key * key);
  80665. +extern unsigned znode_save_free_space(znode * node);
  80666. +extern unsigned znode_recover_free_space(znode * node);
  80667. +extern znode *zalloc(gfp_t gfp_flag);
  80668. +extern void zinit(znode *, const znode * parent, reiser4_tree *);
  80669. +extern int zparse(znode * node);
  80670. +
  80671. +extern int znode_just_created(const znode * node);
  80672. +
  80673. +extern void zfree(znode * node);
  80674. +
  80675. +#if REISER4_DEBUG
  80676. +extern void print_znode(const char *prefix, const znode * node);
  80677. +#else
  80678. +#define print_znode( p, n ) noop
  80679. +#endif
  80680. +
  80681. +/* Make it look like various znode functions exist instead of treating znodes as
  80682. + jnodes in znode-specific code. */
  80683. +#define znode_page(x) jnode_page ( ZJNODE(x) )
  80684. +#define zdata(x) jdata ( ZJNODE(x) )
  80685. +#define znode_get_block(x) jnode_get_block ( ZJNODE(x) )
  80686. +#define znode_created(x) jnode_created ( ZJNODE(x) )
  80687. +#define znode_set_created(x) jnode_set_created ( ZJNODE(x) )
  80688. +#define znode_convertible(x) jnode_convertible (ZJNODE(x))
  80689. +#define znode_set_convertible(x) jnode_set_convertible (ZJNODE(x))
  80690. +
  80691. +#define znode_is_dirty(x) jnode_is_dirty ( ZJNODE(x) )
  80692. +#define znode_check_dirty(x) jnode_check_dirty ( ZJNODE(x) )
  80693. +#define znode_make_clean(x) jnode_make_clean ( ZJNODE(x) )
  80694. +#define znode_set_block(x, b) jnode_set_block ( ZJNODE(x), (b) )
  80695. +
  80696. +#define spin_lock_znode(x) spin_lock_jnode ( ZJNODE(x) )
  80697. +#define spin_unlock_znode(x) spin_unlock_jnode ( ZJNODE(x) )
  80698. +#define spin_trylock_znode(x) spin_trylock_jnode ( ZJNODE(x) )
  80699. +#define spin_znode_is_locked(x) spin_jnode_is_locked ( ZJNODE(x) )
  80700. +#define spin_znode_is_not_locked(x) spin_jnode_is_not_locked ( ZJNODE(x) )
  80701. +
  80702. +#if REISER4_DEBUG
  80703. +extern int znode_x_count_is_protected(const znode * node);
  80704. +extern int znode_invariant(znode * node);
  80705. +#endif
  80706. +
  80707. +/* acquire reference to @node */
  80708. +static inline znode *zref(znode * node)
  80709. +{
  80710. + /* change of x_count from 0 to 1 is protected by tree spin-lock */
  80711. + return JZNODE(jref(ZJNODE(node)));
  80712. +}
  80713. +
  80714. +/* release reference to @node */
  80715. +static inline void zput(znode * node)
  80716. +{
  80717. + assert("nikita-3564", znode_invariant(node));
  80718. + jput(ZJNODE(node));
  80719. +}
  80720. +
  80721. +/* get the level field for a znode */
  80722. +static inline tree_level znode_get_level(const znode * node)
  80723. +{
  80724. + return node->level;
  80725. +}
  80726. +
  80727. +/* get the level field for a jnode */
  80728. +static inline tree_level jnode_get_level(const jnode * node)
  80729. +{
  80730. + if (jnode_is_znode(node))
  80731. + return znode_get_level(JZNODE(node));
  80732. + else
  80733. + /* unformatted nodes are all at the LEAF_LEVEL and for
  80734. + "semi-formatted" nodes like bitmaps, level doesn't matter. */
  80735. + return LEAF_LEVEL;
  80736. +}
  80737. +
  80738. +/* true if jnode is on leaf level */
  80739. +static inline int jnode_is_leaf(const jnode * node)
  80740. +{
  80741. + if (jnode_is_znode(node))
  80742. + return (znode_get_level(JZNODE(node)) == LEAF_LEVEL);
  80743. + if (jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK)
  80744. + return 1;
  80745. + return 0;
  80746. +}
  80747. +
  80748. +/* return znode's tree */
  80749. +static inline reiser4_tree *znode_get_tree(const znode * node)
  80750. +{
  80751. + assert("nikita-2692", node != NULL);
  80752. + return jnode_get_tree(ZJNODE(node));
  80753. +}
  80754. +
  80755. +/* resolve race with zput */
  80756. +static inline znode *znode_rip_check(reiser4_tree * tree, znode * node)
  80757. +{
  80758. + jnode *j;
  80759. +
  80760. + j = jnode_rip_sync(tree, ZJNODE(node));
  80761. + if (likely(j != NULL))
  80762. + node = JZNODE(j);
  80763. + else
  80764. + node = NULL;
  80765. + return node;
  80766. +}
  80767. +
  80768. +#if defined(REISER4_DEBUG)
  80769. +int znode_is_loaded(const znode * node /* znode to query */ );
  80770. +#endif
  80771. +
  80772. +extern __u64 znode_build_version(reiser4_tree * tree);
  80773. +
  80774. +/* Data-handles. A data handle object manages pairing calls to zload() and zrelse(). We
  80775. + must load the data for a node in many places. We could do this by simply calling
  80776. + zload() everywhere, the difficulty arises when we must release the loaded data by
  80777. + calling zrelse. In a function with many possible error/return paths, it requires extra
  80778. + work to figure out which exit paths must call zrelse and those which do not. The data
  80779. + handle automatically calls zrelse for every zload that it is responsible for. In that
  80780. + sense, it acts much like a lock_handle.
  80781. +*/
  80782. +typedef struct load_count {
  80783. + znode *node;
  80784. + int d_ref;
  80785. +} load_count;
  80786. +
  80787. +extern void init_load_count(load_count * lc); /* Initialize a load_count set the current node to NULL. */
  80788. +extern void done_load_count(load_count * dh); /* Finalize a load_count: call zrelse() if necessary */
  80789. +extern int incr_load_count_znode(load_count * dh, znode * node); /* Set the argument znode to the current node, call zload(). */
  80790. +extern int incr_load_count_jnode(load_count * dh, jnode * node); /* If the argument jnode is formatted, do the same as
  80791. + * incr_load_count_znode, otherwise do nothing (unformatted nodes
  80792. + * don't require zload/zrelse treatment). */
  80793. +extern void move_load_count(load_count * new, load_count * old); /* Move the contents of a load_count. Old handle is released. */
  80794. +extern void copy_load_count(load_count * new, load_count * old); /* Copy the contents of a load_count. Old handle remains held. */
  80795. +
  80796. +/* Variable initializers for load_count. */
  80797. +#define INIT_LOAD_COUNT ( load_count * ){ .node = NULL, .d_ref = 0 }
  80798. +#define INIT_LOAD_COUNT_NODE( n ) ( load_count ){ .node = ( n ), .d_ref = 0 }
  80799. +/* A convenience macro for use in assertions or debug-only code, where loaded
  80800. + data is only required to perform the debugging check. This macro
  80801. + encapsulates an expression inside a pair of calls to zload()/zrelse(). */
  80802. +#define WITH_DATA( node, exp ) \
  80803. +({ \
  80804. + long __with_dh_result; \
  80805. + znode *__with_dh_node; \
  80806. + \
  80807. + __with_dh_node = ( node ); \
  80808. + __with_dh_result = zload( __with_dh_node ); \
  80809. + if( __with_dh_result == 0 ) { \
  80810. + __with_dh_result = ( long )( exp ); \
  80811. + zrelse( __with_dh_node ); \
  80812. + } \
  80813. + __with_dh_result; \
  80814. +})
  80815. +
  80816. +/* Same as above, but accepts a return value in case zload fails. */
  80817. +#define WITH_DATA_RET( node, ret, exp ) \
  80818. +({ \
  80819. + int __with_dh_result; \
  80820. + znode *__with_dh_node; \
  80821. + \
  80822. + __with_dh_node = ( node ); \
  80823. + __with_dh_result = zload( __with_dh_node ); \
  80824. + if( __with_dh_result == 0 ) { \
  80825. + __with_dh_result = ( int )( exp ); \
  80826. + zrelse( __with_dh_node ); \
  80827. + } else \
  80828. + __with_dh_result = ( ret ); \
  80829. + __with_dh_result; \
  80830. +})
  80831. +
  80832. +#define WITH_COORD(coord, exp) \
  80833. +({ \
  80834. + coord_t *__coord; \
  80835. + \
  80836. + __coord = (coord); \
  80837. + coord_clear_iplug(__coord); \
  80838. + WITH_DATA(__coord->node, exp); \
  80839. +})
  80840. +
  80841. +#if REISER4_DEBUG
  80842. +#define STORE_COUNTERS \
  80843. + reiser4_lock_cnt_info __entry_counters = \
  80844. + *reiser4_lock_counters()
  80845. +#define CHECK_COUNTERS \
  80846. +ON_DEBUG_CONTEXT( \
  80847. +({ \
  80848. + __entry_counters.x_refs = reiser4_lock_counters() -> x_refs; \
  80849. + __entry_counters.t_refs = reiser4_lock_counters() -> t_refs; \
  80850. + __entry_counters.d_refs = reiser4_lock_counters() -> d_refs; \
  80851. + assert("nikita-2159", \
  80852. + !memcmp(&__entry_counters, reiser4_lock_counters(), \
  80853. + sizeof __entry_counters)); \
  80854. +}) )
  80855. +
  80856. +#else
  80857. +#define STORE_COUNTERS
  80858. +#define CHECK_COUNTERS noop
  80859. +#endif
  80860. +
  80861. +/* __ZNODE_H__ */
  80862. +#endif
  80863. +
  80864. +/* Make Linus happy.
  80865. + Local variables:
  80866. + c-indentation-style: "K&R"
  80867. + mode-name: "LC"
  80868. + c-basic-offset: 8
  80869. + tab-width: 8
  80870. + fill-column: 120
  80871. + End:
  80872. +*/
  80873. diff -urN --no-dereference linux-5.16.14.orig/include/linux/fs.h linux-5.16.14/include/linux/fs.h
  80874. --- linux-5.16.14.orig/include/linux/fs.h 2022-03-11 12:42:10.000000000 +0100
  80875. +++ linux-5.16.14/include/linux/fs.h 2022-03-12 13:25:39.546794721 +0100
  80876. @@ -249,7 +249,7 @@
  80877. */
  80878. #define FILESYSTEM_MAX_STACK_DEPTH 2
  80879. -/**
  80880. +/**
  80881. * enum positive_aop_returns - aop return codes with specific semantics
  80882. *
  80883. * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
  80884. @@ -259,7 +259,7 @@
  80885. * be a candidate for writeback again in the near
  80886. * future. Other callers must be careful to unlock
  80887. * the page if they get this return. Returned by
  80888. - * writepage();
  80889. + * writepage();
  80890. *
  80891. * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
  80892. * unlocked it and the page might have been truncated.
  80893. @@ -292,6 +292,8 @@
  80894. struct address_space;
  80895. struct writeback_control;
  80896. struct readahead_control;
  80897. +struct wb_writeback_work;
  80898. +struct bdi_writeback;
  80899. /*
  80900. * Write life time hint values.
  80901. @@ -415,6 +417,7 @@
  80902. int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
  80903. sector_t *span);
  80904. void (*swap_deactivate)(struct file *file);
  80905. + int batch_lock_tabu;
  80906. };
  80907. extern const struct address_space_operations empty_aops;
  80908. @@ -2210,6 +2213,14 @@
  80909. int (*remount_fs) (struct super_block *, int *, char *);
  80910. void (*umount_begin) (struct super_block *);
  80911. + long (*writeback_inodes)(struct super_block *sb,
  80912. + struct bdi_writeback *wb,
  80913. + struct writeback_control *wbc,
  80914. + struct wb_writeback_work *work,
  80915. + bool flush_all);
  80916. + void (*sync_inodes) (struct super_block *sb,
  80917. + struct writeback_control *wbc);
  80918. +
  80919. int (*show_options)(struct seq_file *, struct dentry *);
  80920. int (*show_devname)(struct seq_file *, struct dentry *);
  80921. int (*show_path)(struct seq_file *, struct dentry *);
  80922. @@ -2832,6 +2843,13 @@
  80923. extern int invalidate_inode_pages2_range(struct address_space *mapping,
  80924. pgoff_t start, pgoff_t end);
  80925. extern int write_inode_now(struct inode *, int);
  80926. +extern void writeback_skip_sb_inodes(struct super_block *sb,
  80927. + struct bdi_writeback *wb);
  80928. +extern long generic_writeback_sb_inodes(struct super_block *sb,
  80929. + struct bdi_writeback *wb,
  80930. + struct writeback_control *wbc,
  80931. + struct wb_writeback_work *work,
  80932. + bool flush_all);
  80933. extern int filemap_fdatawrite(struct address_space *);
  80934. extern int filemap_flush(struct address_space *);
  80935. extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
  80936. @@ -3093,7 +3111,7 @@
  80937. extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
  80938. extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
  80939. extern struct file * open_exec(const char *);
  80940. -
  80941. +
  80942. /* fs/dcache.c -- generic fs support functions */
  80943. extern bool is_subdir(struct dentry *, struct dentry *);
  80944. extern bool path_is_under(const struct path *, const struct path *);
  80945. @@ -3103,6 +3121,8 @@
  80946. #include <linux/err.h>
  80947. /* needed for stackable file system support */
  80948. +extern loff_t default_llseek_unlocked(struct file *file, loff_t offset,
  80949. + int whence);
  80950. extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
  80951. extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
  80952. @@ -3211,6 +3231,8 @@
  80953. extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
  80954. extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
  80955. +ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len,
  80956. + loff_t *ppos);
  80957. ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
  80958. rwf_t flags);
  80959. ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
  80960. diff -urN --no-dereference linux-5.16.14.orig/include/linux/miscdevice.h linux-5.16.14/include/linux/miscdevice.h
  80961. --- linux-5.16.14.orig/include/linux/miscdevice.h 2022-03-11 12:42:10.000000000 +0100
  80962. +++ linux-5.16.14/include/linux/miscdevice.h 2022-03-12 13:25:39.547794724 +0100
  80963. @@ -71,6 +71,7 @@
  80964. #define USERIO_MINOR 240
  80965. #define VHOST_VSOCK_MINOR 241
  80966. #define RFKILL_MINOR 242
  80967. +#define REISER4_MINOR 243
  80968. #define MISC_DYNAMIC_MINOR 255
  80969. struct device;
  80970. diff -urN --no-dereference linux-5.16.14.orig/include/linux/pagemap.h linux-5.16.14/include/linux/pagemap.h
  80971. --- linux-5.16.14.orig/include/linux/pagemap.h 2022-03-11 12:42:10.000000000 +0100
  80972. +++ linux-5.16.14/include/linux/pagemap.h 2022-03-12 13:25:56.406835996 +0100
  80973. @@ -908,6 +908,7 @@
  80974. }
  80975. int __set_page_dirty_nobuffers(struct page *page);
  80976. +int set_page_dirty_notag(struct page *page);
  80977. int __set_page_dirty_no_writeback(struct page *page);
  80978. void page_endio(struct page *page, bool is_write, int err);
  80979. diff -urN --no-dereference linux-5.16.14.orig/include/linux/writeback.h linux-5.16.14/include/linux/writeback.h
  80980. --- linux-5.16.14.orig/include/linux/writeback.h 2022-03-11 12:42:10.000000000 +0100
  80981. +++ linux-5.16.14/include/linux/writeback.h 2022-03-12 13:25:56.406835996 +0100
  80982. @@ -181,8 +181,27 @@
  80983. }
  80984. /*
  80985. + * Passed into wb_writeback(), essentially a subset of writeback_control
  80986. + */
  80987. +struct wb_writeback_work {
  80988. + long nr_pages;
  80989. + struct super_block *sb;
  80990. + enum writeback_sync_modes sync_mode;
  80991. + unsigned int tagged_writepages:1;
  80992. + unsigned int for_kupdate:1;
  80993. + unsigned int range_cyclic:1;
  80994. + unsigned int for_background:1;
  80995. + unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
  80996. + unsigned int auto_free:1; /* free on completion */
  80997. + enum wb_reason reason; /* why was writeback initiated? */
  80998. +
  80999. + struct list_head list; /* pending work list */
  81000. + struct wb_completion *done; /* set if the caller waits */
  81001. +};
  81002. +
  81003. +/*
  81004. * fs/fs-writeback.c
  81005. - */
  81006. + */
  81007. struct bdi_writeback;
  81008. void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
  81009. void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
  81010. @@ -390,6 +409,7 @@
  81011. pgoff_t start, pgoff_t end);
  81012. bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
  81013. +bool filemap_dirty_folio_notag(struct address_space *mapping, struct folio *folio);
  81014. void folio_account_redirty(struct folio *folio);
  81015. static inline void account_page_redirty(struct page *page)
  81016. {
  81017. diff -urN --no-dereference linux-5.16.14.orig/mm/filemap.c linux-5.16.14/mm/filemap.c
  81018. --- linux-5.16.14.orig/mm/filemap.c 2022-03-11 12:42:10.000000000 +0100
  81019. +++ linux-5.16.14/mm/filemap.c 2022-03-12 13:25:39.547794724 +0100
  81020. @@ -2116,6 +2116,13 @@
  81021. indices[pvec->nr] = xas.xa_index;
  81022. if (!pagevec_add(pvec, page))
  81023. break;
  81024. + if (mapping->a_ops->batch_lock_tabu)
  81025. + /*
  81026. + * the file system doesn't allow to hold
  81027. + * many pages locked, while calling
  81028. + * ->invalidatepage() for one of them
  81029. + */
  81030. + break;
  81031. goto next;
  81032. unlock:
  81033. unlock_page(page);
  81034. @@ -2196,6 +2203,7 @@
  81035. return ret;
  81036. }
  81037. +EXPORT_SYMBOL(find_get_pages_range);
  81038. /**
  81039. * find_get_pages_contig - gang contiguous pagecache lookup
  81040. diff -urN --no-dereference linux-5.16.14.orig/mm/folio-compat.c linux-5.16.14/mm/folio-compat.c
  81041. --- linux-5.16.14.orig/mm/folio-compat.c 2022-03-11 12:42:10.000000000 +0100
  81042. +++ linux-5.16.14/mm/folio-compat.c 2022-03-12 13:25:56.406835996 +0100
  81043. @@ -90,6 +90,13 @@
  81044. }
  81045. EXPORT_SYMBOL(__set_page_dirty_nobuffers);
  81046. +int set_page_dirty_notag(struct page *page)
  81047. +{
  81048. + return filemap_dirty_folio_notag(page_mapping(page),
  81049. + page_folio(page));
  81050. +}
  81051. +EXPORT_SYMBOL(set_page_dirty_notag);
  81052. +
  81053. bool clear_page_dirty_for_io(struct page *page)
  81054. {
  81055. return folio_clear_dirty_for_io(page_folio(page));
  81056. diff -urN --no-dereference linux-5.16.14.orig/mm/page-writeback.c linux-5.16.14/mm/page-writeback.c
  81057. --- linux-5.16.14.orig/mm/page-writeback.c 2022-03-11 12:42:10.000000000 +0100
  81058. +++ linux-5.16.14/mm/page-writeback.c 2022-03-12 13:25:56.406835996 +0100
  81059. @@ -2513,6 +2513,20 @@
  81060. xa_unlock_irqrestore(&mapping->i_pages, flags);
  81061. }
  81062. +void __folio_mark_dirty_notag(struct folio *folio,
  81063. + struct address_space *mapping,
  81064. + int warn)
  81065. +{
  81066. + unsigned long flags;
  81067. +
  81068. + xa_lock_irqsave(&mapping->i_pages, flags);
  81069. + if (folio->mapping) { /* Race with truncate? */
  81070. + WARN_ON_ONCE(warn && !folio_test_uptodate(folio));
  81071. + folio_account_dirtied(folio, mapping);
  81072. + }
  81073. + xa_unlock_irqrestore(&mapping->i_pages, flags);
  81074. +}
  81075. +
  81076. /**
  81077. * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads.
  81078. * @mapping: Address space this folio belongs to.
  81079. @@ -2551,6 +2565,26 @@
  81080. }
  81081. EXPORT_SYMBOL(filemap_dirty_folio);
  81082. +bool filemap_dirty_folio_notag(struct address_space *mapping,
  81083. + struct folio *folio)
  81084. +{
  81085. + folio_memcg_lock(folio);
  81086. + if (folio_test_set_dirty(folio)) {
  81087. + folio_memcg_unlock(folio);
  81088. + return false;
  81089. + }
  81090. +
  81091. + __folio_mark_dirty_notag(folio, mapping, !folio_test_private(folio));
  81092. + folio_memcg_unlock(folio);
  81093. +
  81094. + if (mapping->host) {
  81095. + /* !PageAnon && !swapper_space */
  81096. + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
  81097. + }
  81098. + return true;
  81099. +}
  81100. +EXPORT_SYMBOL(filemap_dirty_folio_notag);
  81101. +
  81102. /**
  81103. * folio_account_redirty - Manually account for redirtying a page.
  81104. * @folio: The folio which is being redirtied.
  81105. diff -urN --no-dereference linux-5.16.14.orig/mm/vmscan.c linux-5.16.14/mm/vmscan.c
  81106. --- linux-5.16.14.orig/mm/vmscan.c 2022-03-11 12:42:10.000000000 +0100
  81107. +++ linux-5.16.14/mm/vmscan.c 2022-03-12 13:25:39.547794724 +0100
  81108. @@ -3577,7 +3577,11 @@
  81109. pg_data_t *last_pgdat;
  81110. struct zoneref *z;
  81111. struct zone *zone;
  81112. + void *saved;
  81113. retry:
  81114. + saved = current->journal_info; /* save journal info */
  81115. + current->journal_info = NULL;
  81116. +
  81117. delayacct_freepages_start();
  81118. if (!cgroup_reclaim(sc))
  81119. @@ -3622,6 +3626,8 @@
  81120. }
  81121. delayacct_freepages_end();
  81122. + /* restore journal info */
  81123. + current->journal_info = saved;
  81124. if (sc->nr_reclaimed)
  81125. return sc->nr_reclaimed;